Skip to content

Commit 392c5df

Browse files
author
KrishilGandhi
committed
Added RTL support for 2:4 and 1:4 sparsity in tensor core
1 parent bcd539c commit 392c5df

16 files changed

Lines changed: 273 additions & 58 deletions

ci/trace_csv.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,11 @@ def append_reg(text, reg, sep):
169169

170170
def reg_value(rtype, value):
171171
if rtype == 1:
172-
ivalue = int(value, 16)
172+
s = str(value).strip()
173+
if s.startswith("0b") or s.startswith("0B"):
174+
ivalue = int(s, 2)
175+
else:
176+
ivalue = int(s, 16)
173177
ivalue32 = ivalue & 0xFFFFFFFF
174178
return "0x{:x}".format(ivalue32)
175179
else:
@@ -224,9 +228,11 @@ def parse_rtlsim(log_lines):
224228
rs1_pattern = r"rs1=(\d+)"
225229
rs2_pattern = r"rs2=(\d+)"
226230
rs3_pattern = r"rs3=(\d+)"
231+
rs4_pattern = r"rs4=(\d+)"
227232
rs1_data_pattern = r"rs1_data=\{(.+?)\}"
228233
rs2_data_pattern = r"rs2_data=\{(.+?)\}"
229234
rs3_data_pattern = r"rs3_data=\{(.+?)\}"
235+
rs4_data_pattern = r"rs4_data=\{(.+?)\}"
230236
rd_data_pattern = r"data=\{(.+?)\}"
231237
eop_pattern = r"eop=(\d)"
232238
uuid_pattern = r"#(\d+)"
@@ -268,6 +274,8 @@ def parse_rtlsim(log_lines):
268274
trace["rs1"] = re.search(rs1_pattern, line).group(1)
269275
trace["rs2"] = re.search(rs2_pattern, line).group(1)
270276
trace["rs3"] = re.search(rs3_pattern, line).group(1)
277+
m_rs4 = re.search(rs4_pattern, line)
278+
trace["rs4"] = m_rs4.group(1) if m_rs4 else ""
271279
trace["ibuf_ticks"] = timestamp
272280
instr_data[uuid] = trace
273281
if uuid in schd_ticks:
@@ -287,6 +295,10 @@ def parse_rtlsim(log_lines):
287295
merge_data(trace, 'rs2_data', simd_data(re.search(rs2_data_pattern, line).group(1).split(', ')[::-1], sid, num_threads, '0x0'), src_tmask_arr)
288296
if used_rs[2]:
289297
merge_data(trace, 'rs3_data', simd_data(re.search(rs3_data_pattern, line).group(1).split(', ')[::-1], sid, num_threads, '0x0'), src_tmask_arr)
298+
if len(used_rs) > 3 and used_rs[3]:
299+
m_rs4d = re.search(rs4_data_pattern, line)
300+
if m_rs4d:
301+
merge_data(trace, 'rs4_data', simd_data(m_rs4d.group(1).split(', ')[::-1], sid, num_threads, '0x0'), src_tmask_arr)
290302
trace["issued"] = True
291303
trace["issue_ticks"] = timestamp
292304
instr_data[uuid] = trace
@@ -322,6 +334,10 @@ def parse_rtlsim(log_lines):
322334
if used_rs[2]:
323335
operands, sep = append_value(operands, trace["rs3"], trace["rs3_data"], tmask_arr, sep)
324336
del trace["rs3_data"]
337+
if (len(used_rs) > 3 and used_rs[3] and "rs4_data" in trace
338+
and trace.get("rs4", "") != ""):
339+
operands, sep = append_value(operands, trace["rs4"], trace["rs4_data"], tmask_arr, sep)
340+
del trace["rs4_data"]
325341
trace["operands"] = operands
326342
cycles = (timestamp - trace["issue_ticks"] + 1) // 2
327343
perf_exec.update(uuid, cycles)
@@ -332,6 +348,8 @@ def parse_rtlsim(log_lines):
332348
del trace["rs1"]
333349
del trace["rs2"]
334350
del trace["rs3"]
351+
if "rs4" in trace:
352+
del trace["rs4"]
335353
del trace["issued"]
336354
del instr_data[uuid]
337355
entries.append(trace)

hw/rtl/VX_define.vh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,7 @@
448448
logic [__lanes__-1:0][`XLEN-1:0] rs1_data; \
449449
logic [__lanes__-1:0][`XLEN-1:0] rs2_data; \
450450
logic [__lanes__-1:0][`XLEN-1:0] rs3_data; \
451+
logic [__lanes__-1:0][`XLEN-1:0] rs4_data; \
451452
logic [`LOG2UP(`NUM_THREADS / __lanes__)-1:0] pid; \
452453
logic sop; \
453454
logic eop; \

hw/rtl/VX_gpu_pkg.sv

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ package VX_gpu_pkg;
9292

9393
localparam OFFSET_BITS = 12;
9494

95-
localparam NUM_SRC_OPDS = 3;
95+
localparam NUM_SRC_OPDS = 4;
9696
localparam SRC_OPD_BITS = `CLOG2(NUM_SRC_OPDS);
9797
localparam SRC_OPD_WIDTH = `UP(SRC_OPD_BITS);
9898

@@ -532,11 +532,13 @@ package VX_gpu_pkg;
532532

533533
`ifdef EXT_TCU_ENABLE
534534
typedef struct packed {
535-
logic [(INST_ARGS_BITS-16)-1:0] __padding;
535+
logic [(INST_ARGS_BITS-18-4)-1:0] __padding;
536536
logic [3:0] fmt_d;
537537
logic [3:0] fmt_s;
538538
logic [3:0] step_n;
539539
logic [3:0] step_m;
540+
logic [3:0] step_k; // K-step index for this uop (matches VX_tcu_uops metadata_index formula)
541+
logic [1:0] sparsity_degree; // 0=dense, 1=1:4 sparse, 2=2:4 sparse
540542
} tcu_args_t;
541543
`PACKAGE_ASSERT($bits(tcu_args_t) == INST_ARGS_BITS)
542544
`endif
@@ -577,6 +579,7 @@ package VX_gpu_pkg;
577579
logic [NUM_REGS_BITS-1:0] rs1;
578580
logic [NUM_REGS_BITS-1:0] rs2;
579581
logic [NUM_REGS_BITS-1:0] rs3;
582+
logic [NUM_REGS_BITS-1:0] rs4; // TCU sparse: metadata ireg (a0-a7)
580583
} decode_t;
581584

582585
typedef struct packed {
@@ -592,6 +595,7 @@ package VX_gpu_pkg;
592595
logic [NUM_REGS_BITS-1:0] rs1;
593596
logic [NUM_REGS_BITS-1:0] rs2;
594597
logic [NUM_REGS_BITS-1:0] rs3;
598+
logic [NUM_REGS_BITS-1:0] rs4; // TCU sparse: metadata ireg (a0-a7)
595599
} ibuffer_t;
596600

597601
typedef struct packed {
@@ -608,6 +612,7 @@ package VX_gpu_pkg;
608612
logic [NUM_REGS_BITS-1:0] rs1;
609613
logic [NUM_REGS_BITS-1:0] rs2;
610614
logic [NUM_REGS_BITS-1:0] rs3;
615+
logic [NUM_REGS_BITS-1:0] rs4;
611616
} scoreboard_t;
612617

613618
typedef struct packed {
@@ -624,6 +629,7 @@ package VX_gpu_pkg;
624629
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs1_data;
625630
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs2_data;
626631
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs3_data;
632+
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs4_data; // TCU sparse: metadata from a0-a7
627633
logic sop;
628634
logic eop;
629635
} operands_t;
@@ -642,6 +648,7 @@ package VX_gpu_pkg;
642648
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs1_data;
643649
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs2_data;
644650
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs3_data;
651+
logic [`SIMD_WIDTH-1:0][`XLEN-1:0] rs4_data; // TCU sparse: metadata from a0-a7
645652
logic sop;
646653
logic eop;
647654
} dispatch_t;

hw/rtl/core/VX_decode.sv

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,8 @@ module VX_decode import VX_gpu_pkg::*; #(
4646
reg [EX_BITS-1:0] ex_type;
4747
reg [INST_OP_BITS-1:0] op_type;
4848
op_args_t op_args;
49-
reg [NUM_REGS_BITS-1:0] rd_v, rs1_v, rs2_v, rs3_v;
50-
reg use_rd, use_rs1, use_rs2, use_rs3;
49+
reg [NUM_REGS_BITS-1:0] rd_v, rs1_v, rs2_v, rs3_v, rs4_v;
50+
reg use_rd, use_rs1, use_rs2, use_rs3, use_rs4;
5151
reg is_wstall;
5252

5353
wire [31:0] instr = fetch_if.data.instr;
@@ -70,6 +70,8 @@ module VX_decode import VX_gpu_pkg::*; #(
7070
`UNUSED_VAR (use_rs1)
7171
`UNUSED_VAR (use_rs2)
7272
`UNUSED_VAR (use_rs3)
73+
`UNUSED_VAR (use_rs4)
74+
`UNUSED_VAR (rs4_v)
7375

7476
wire is_itype_sh = funct3[0] && ~funct3[1];
7577
wire is_fpu_csr = (u_12 <= `VX_CSR_FCSR);
@@ -149,10 +151,12 @@ module VX_decode import VX_gpu_pkg::*; #(
149151
rs1_v = 'x;
150152
rs2_v = 'x;
151153
rs3_v = 'x;
154+
rs4_v = 'x;
152155
use_rd = 0;
153156
use_rs1 = 0;
154157
use_rs2 = 0;
155158
use_rs3 = 0;
159+
use_rs4 = 0;
156160
is_wstall = 0;
157161

158162
case (opcode)
@@ -518,17 +522,25 @@ module VX_decode import VX_gpu_pkg::*; #(
518522
`ifdef EXT_TCU_ENABLE
519523
7'h02: begin
520524
case (funct3)
521-
3'h0: begin // WMMA
525+
3'h0: begin // WMMA (dense/sparse)
522526
ex_type = EX_TCU;
523527
op_type = INST_OP_BITS'(INST_TCU_WMMA);
524-
op_args.tcu.fmt_s = rs1[3:0];
525-
op_args.tcu.fmt_d = rd[3:0];
526-
op_args.tcu.step_m = '0;
527-
op_args.tcu.step_n = '0;
528+
op_args.tcu.fmt_s = rs1[3:0];
529+
op_args.tcu.fmt_d = rd[3:0];
530+
op_args.tcu.step_m = '0;
531+
op_args.tcu.step_n = '0;
532+
op_args.tcu.step_k = '0;
533+
op_args.tcu.sparsity_degree = rs2[1:0];
528534
`USED_IREG (rd);
529535
`USED_IREG (rs1);
530536
`USED_IREG (rs2);
531537
`USED_IREG (rs3);
538+
if (rs2[1:0] != 2'b00) begin
539+
// Sparse WMMA metadata source is an integer reg (a0..),
540+
// while exact per-uop rs4 is resolved later in VX_tcu_uops.
541+
use_rs4 = 1'b1;
542+
rs4_v = make_reg_num(REG_TYPE_I, 5'(10)); // a0 base
543+
end
532544
end
533545
default:;
534546
endcase
@@ -544,7 +556,7 @@ module VX_decode import VX_gpu_pkg::*; #(
544556
// disable write to integer register r0
545557
wire wb = use_rd && (rd_v != 0);
546558

547-
wire [2:0] used_rs = {use_rs3, use_rs2, use_rs1};
559+
wire [NUM_SRC_OPDS-1:0] used_rs = {use_rs4, use_rs3, use_rs2, use_rs1};
548560

549561
VX_elastic_buffer #(
550562
.DATAW (OUT_DATAW),
@@ -554,8 +566,8 @@ module VX_decode import VX_gpu_pkg::*; #(
554566
.reset (reset),
555567
.valid_in (fetch_if.valid),
556568
.ready_in (fetch_if.ready),
557-
.data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, used_rs, rd_v, rs1_v, rs2_v, rs3_v}),
558-
.data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args, decode_if.data.wb, decode_if.data.used_rs, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3}),
569+
.data_in ({fetch_if.data.uuid, fetch_if.data.wid, fetch_if.data.tmask, fetch_if.data.PC, ex_type, op_type, op_args, wb, used_rs, rd_v, rs1_v, rs2_v, rs3_v, rs4_v}),
570+
.data_out ({decode_if.data.uuid, decode_if.data.wid, decode_if.data.tmask, decode_if.data.PC, decode_if.data.ex_type, decode_if.data.op_type, decode_if.data.op_args, decode_if.data.wb, decode_if.data.used_rs, decode_if.data.rd, decode_if.data.rs1, decode_if.data.rs2, decode_if.data.rs3, decode_if.data.rs4}),
559571
.valid_out (decode_if.valid),
560572
.ready_out (decode_if.ready)
561573
);

hw/rtl/core/VX_dispatch.sv

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,14 @@ module VX_dispatch import VX_gpu_pkg::*; #(
5353
operands_if.data.sid,
5454
operands_if.data.tmask,
5555
operands_if.data.PC,
56-
operands_if.data.op_type,
56+
INST_ALU_BITS'(operands_if.data.op_type),
5757
operands_if.data.op_args,
5858
operands_if.data.wb,
5959
operands_if.data.rd,
6060
operands_if.data.rs1_data,
6161
operands_if.data.rs2_data,
6262
operands_if.data.rs3_data,
63+
operands_if.data.rs4_data,
6364
operands_if.data.sop,
6465
operands_if.data.eop
6566
}),

hw/rtl/core/VX_dispatch_unit.sv

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
4747
localparam DATA_REGS_OFF = 1 + 1;
4848

4949
typedef struct packed {
50-
logic [2:0][NUM_LANES-1:0][`XLEN-1:0] rsdata;
50+
logic [NUM_SRC_OPDS-1:0][NUM_LANES-1:0][`XLEN-1:0] rsdata;
5151
logic [NUM_LANES-1:0] tmask;
5252
} packet_t;
5353

@@ -63,7 +63,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
6363

6464
wire [BLOCK_SIZE-1:0] block_ready;
6565
wire [BLOCK_SIZE-1:0][NUM_LANES-1:0] block_tmask;
66-
wire [BLOCK_SIZE-1:0][2:0][NUM_LANES-1:0][`XLEN-1:0] block_rsdata;
66+
wire [BLOCK_SIZE-1:0][NUM_SRC_OPDS-1:0][NUM_LANES-1:0][`XLEN-1:0] block_rsdata;
6767
wire [BLOCK_SIZE-1:0][LPID_WIDTH-1:0] block_pid;
6868
wire [BLOCK_SIZE-1:0] block_sop;
6969
wire [BLOCK_SIZE-1:0] block_eop;
@@ -121,12 +121,13 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
121121
wire dispatch_eop = dispatch_data[issue_idx][0];
122122

123123
wire [`SIMD_WIDTH-1:0] dispatch_tmask;
124-
wire [2:0][`SIMD_WIDTH-1:0][`XLEN-1:0] dispatch_rsdata;
124+
wire [NUM_SRC_OPDS-1:0][`SIMD_WIDTH-1:0][`XLEN-1:0] dispatch_rsdata;
125125

126126
assign dispatch_tmask = dispatch_data[issue_idx][DATA_TMASK_OFF +: `SIMD_WIDTH];
127-
assign dispatch_rsdata[0] = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `SIMD_WIDTH * `XLEN +: `SIMD_WIDTH * `XLEN];
128-
assign dispatch_rsdata[1] = dispatch_data[issue_idx][DATA_REGS_OFF + 1 * `SIMD_WIDTH * `XLEN +: `SIMD_WIDTH * `XLEN];
129-
assign dispatch_rsdata[2] = dispatch_data[issue_idx][DATA_REGS_OFF + 0 * `SIMD_WIDTH * `XLEN +: `SIMD_WIDTH * `XLEN];
127+
assign dispatch_rsdata[0] = dispatch_data[issue_idx][DATA_REGS_OFF + 3 * `SIMD_WIDTH * `XLEN +: `SIMD_WIDTH * `XLEN];
128+
assign dispatch_rsdata[1] = dispatch_data[issue_idx][DATA_REGS_OFF + 2 * `SIMD_WIDTH * `XLEN +: `SIMD_WIDTH * `XLEN];
129+
assign dispatch_rsdata[2] = dispatch_data[issue_idx][DATA_REGS_OFF + 1 * `SIMD_WIDTH * `XLEN +: `SIMD_WIDTH * `XLEN];
130+
assign dispatch_rsdata[3] = dispatch_data[issue_idx][DATA_REGS_OFF + 0 * `SIMD_WIDTH * `XLEN +: `SIMD_WIDTH * `XLEN];
130131

131132
wire valid_p, ready_p;
132133

@@ -141,6 +142,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
141142
assign packets[i].rsdata[0][j] = dispatch_rsdata[0][k];
142143
assign packets[i].rsdata[1][j] = dispatch_rsdata[1][k];
143144
assign packets[i].rsdata[2][j] = dispatch_rsdata[2][k];
145+
assign packets[i].rsdata[3][j] = dispatch_rsdata[3][k];
144146
end
145147
end
146148

@@ -220,6 +222,7 @@ module VX_dispatch_unit import VX_gpu_pkg::*; #(
220222
block_rsdata[block_idx][0],
221223
block_rsdata[block_idx][1],
222224
block_rsdata[block_idx][2],
225+
block_rsdata[block_idx][3],
223226
warp_pid,
224227
warp_sop,
225228
warp_eop}),

hw/rtl/core/VX_ibuffer.sv

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ module VX_ibuffer import VX_gpu_pkg::*; #(
6262
decode_if.data.rd,
6363
decode_if.data.rs1,
6464
decode_if.data.rs2,
65-
decode_if.data.rs3
65+
decode_if.data.rs3,
66+
decode_if.data.rs4
6667
}),
6768
.ready_in (ibuf_ready_in[w]),
6869
.valid_out(uop_sequencer_if.valid),

hw/rtl/core/VX_issue_slice.sv

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
183183
VX_trace_pkg::trace_reg_idx(1, ibuffer_if[i].data.rs2);
184184
`TRACE(1, (", rs3="))
185185
VX_trace_pkg::trace_reg_idx(1, ibuffer_if[i].data.rs3);
186+
`TRACE(1, (", rs4="))
187+
VX_trace_pkg::trace_reg_idx(1, ibuffer_if[i].data.rs4);
186188
`TRACE(1, (", "))
187189
VX_trace_pkg::trace_op_args(1, ibuffer_if[i].data.ex_type, ibuffer_if[i].data.op_type, ibuffer_if[i].data.op_args);
188190
`TRACE(1, (" (#%0d)\n", ibuffer_if[i].data.uuid))
@@ -202,6 +204,17 @@ module VX_issue_slice import VX_gpu_pkg::*; #(
202204
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs2_data, `SIMD_WIDTH)
203205
`TRACE(1, (", rs3_data="))
204206
`TRACE_ARRAY1D(1, "0x%0h", operands_if.data.rs3_data, `SIMD_WIDTH)
207+
`TRACE(1, (", rs4_data="))
208+
`TRACE(1, ("{"))
209+
begin : g_trace_rs4_lo8
210+
integer k_rs4;
211+
for (k_rs4 = `SIMD_WIDTH - 1; k_rs4 >= 0; k_rs4--) begin
212+
if (k_rs4 != `SIMD_WIDTH - 1)
213+
`TRACE(1, (", "));
214+
`TRACE(1, ("0b%08b", operands_if.data.rs4_data[k_rs4][7:0]));
215+
end
216+
end
217+
`TRACE(1, ("}"))
205218
`TRACE(1, (", "))
206219
VX_trace_pkg::trace_op_args(1, operands_if.data.ex_type, operands_if.data.op_type, operands_if.data.op_args);
207220
`TRACE(1, (", sop=%b, eop=%b (#%0d)\n", operands_if.data.sop, operands_if.data.eop, operands_if.data.uuid))

hw/rtl/core/VX_opc_unit.sv

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,10 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
8484
wire has_collision_st1;
8585

8686
wire [NUM_SRC_OPDS-1:0][NUM_REGS_BITS-1:0] src_regs;
87-
assign src_regs = {scoreboard_if.data.rs3, scoreboard_if.data.rs2, scoreboard_if.data.rs1};
87+
// Map indices so that src_regs[0]=rs1, [1]=rs2, [2]=rs3, [3]=rs4 (when present).
88+
assign src_regs =
89+
{scoreboard_if.data.rs4, scoreboard_if.data.rs3, scoreboard_if.data.rs2, scoreboard_if.data.rs1};
90+
// {scoreboard_if.data.rs3, scoreboard_if.data.rs2, scoreboard_if.data.rs1};
8891

8992
for (genvar i = 0; i < NUM_SRC_OPDS; ++i) begin : g_gpr_rd_reg
9093
assign req_addr_in[i] = src_regs[i][NUM_REGS_BITS-1 -: REG_REM_BITS];
@@ -322,6 +325,7 @@ module VX_opc_unit import VX_gpu_pkg::*; #(
322325
operands_if.data.op_type,
323326
operands_if.data.op_args,
324327
operands_if.data.rd,
328+
operands_if.data.rs4_data,
325329
operands_if.data.rs3_data,
326330
operands_if.data.rs2_data,
327331
operands_if.data.rs1_data,

hw/rtl/core/VX_pe_switch.sv

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,12 @@ module VX_pe_switch import VX_gpu_pkg::*; #(
3131
);
3232
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
3333
localparam PID_WIDTH = `UP(PID_BITS);
34-
localparam REQ_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + INST_ALU_BITS + $bits(op_args_t) + 1 + NUM_REGS_BITS + (3 * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
35-
localparam RSP_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS + NUM_REGS_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
34+
localparam REQ_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS
35+
+ INST_ALU_BITS + $bits(op_args_t) + 1 + NUM_REGS_BITS
36+
+ (NUM_SRC_OPDS * NUM_LANES * `XLEN) + PID_WIDTH + 1 + 1;
37+
localparam RSP_DATAW = UUID_WIDTH + NW_WIDTH + NUM_LANES + PC_BITS
38+
+ NUM_REGS_BITS + 1 + NUM_LANES * `XLEN
39+
+ PID_WIDTH + 1 + 1;
3640

3741
wire [PE_COUNT-1:0] pe_req_valid;
3842
wire [PE_COUNT-1:0][REQ_DATAW-1:0] pe_req_data;

0 commit comments

Comments
 (0)