Skip to content

Commit ad4ae15

Browse files
committed
[hardware] 🐛 Fix EMUL handling when reshffling more regs
1 parent 2325449 commit ad4ae15

1 file changed

Lines changed: 50 additions & 26 deletions

File tree

hardware/src/ara_dispatcher.sv

Lines changed: 50 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,14 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
165165
// If the reg was not written, the content is unknown. No need to reshuffle
166166
// when writing with != EEW
167167
logic [31:0] eew_valid_d, eew_valid_q;
168+
// Help for reshuffling
169+
rvv_pkg::vlmul_e emul_vs1_d, emul_vs1_q, emul_vs2_d, emul_vs2_q;
168170
// Save eew information before reshuffling
169171
rvv_pkg::vew_e eew_old_buffer_d, eew_old_buffer_q, eew_new_buffer_d, eew_new_buffer_q;
170172
// Helpers to handle reshuffling with LMUL > 1
171-
logic [2:0] rs_lmul_cnt_d, rs_lmul_cnt_q;
172-
logic [2:0] rs_lmul_cnt_limit_d, rs_lmul_cnt_limit_q;
173-
logic rs_mask_request_d, rs_mask_request_q;
173+
logic [2:0] reg_lmul_cnt_d, reg_lmul_cnt_q;
174+
logic [2:0] reg_lmul_cnt_limit_d, reg_lmul_cnt_limit_q;
175+
logic reg_mask_request_d, reg_mask_request_q;
174176
// Save vreg to be reshuffled before reshuffling
175177
logic [4:0] vs_buffer_d, vs_buffer_q;
176178
// Keep track of the registers to be reshuffled |vs1|vs2|vd|
@@ -191,13 +193,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
191193
eew_new_buffer_q <= rvv_pkg::EW8;
192194
vs_buffer_q <= '0;
193195
reshuffle_req_q <= '0;
194-
rs_lmul_cnt_q <= '0;
195-
rs_lmul_cnt_limit_q <= '0;
196-
rs_mask_request_q <= 1'b0;
196+
reg_lmul_cnt_q <= '0;
197+
reg_lmul_cnt_limit_q <= '0;
198+
reg_mask_request_q <= 1'b0;
197199
reshuffle_eew_vs1_q <= rvv_pkg::EW8;
198200
reshuffle_eew_vs2_q <= rvv_pkg::EW8;
199201
reshuffle_eew_vd_q <= rvv_pkg::EW8;
200202
pending_seg_mem_op_q <= 1'b0;
203+
emul_vs1_q <= LMUL_1;
204+
emul_vs2_q <= LMUL_1;
201205
end else begin
202206
state_q <= state_d;
203207
state_qq <= state_q;
@@ -207,13 +211,15 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
207211
eew_new_buffer_q <= eew_new_buffer_d;
208212
vs_buffer_q <= vs_buffer_d;
209213
reshuffle_req_q <= reshuffle_req_d;
210-
rs_lmul_cnt_q <= rs_lmul_cnt_d;
211-
rs_lmul_cnt_limit_q <= rs_lmul_cnt_limit_d;
212-
rs_mask_request_q <= rs_mask_request_d;
214+
reg_lmul_cnt_q <= reg_lmul_cnt_d;
215+
reg_lmul_cnt_limit_q <= reg_lmul_cnt_limit_d;
216+
reg_mask_request_q <= reg_mask_request_d;
213217
reshuffle_eew_vs1_q <= reshuffle_eew_vs1_d;
214218
reshuffle_eew_vs2_q <= reshuffle_eew_vs2_d;
215219
reshuffle_eew_vd_q <= reshuffle_eew_vd_d;
216220
pending_seg_mem_op_q <= pending_seg_mem_op_d;
221+
emul_vs1_q <= emul_vs1_d;
222+
emul_vs2_q <= emul_vs2_d;
217223
end
218224
end
219225

@@ -385,9 +391,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
385391

386392
pending_seg_mem_op_d = pending_seg_mem_op_q;
387393

388-
rs_lmul_cnt_d = '0;
389-
rs_lmul_cnt_limit_d = '0;
390-
rs_mask_request_d = 1'b0;
394+
reg_lmul_cnt_d = '0;
395+
reg_lmul_cnt_limit_d = '0;
396+
reg_mask_request_d = 1'b0;
391397

392398
illegal_insn = 1'b0;
393399
illegal_insn_load = 1'b0;
@@ -452,6 +458,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
452458

453459
emul_vs1 = ara_req.emul;
454460
emul_vs2 = ara_req.emul;
461+
emul_vs1_d = emul_vs1_q;
462+
emul_vs2_d = emul_vs2_q;
455463

456464
// Saturation in any lane will raise vxsat flag
457465
csr_vxsat_d |= |vxsat_flag_i;
@@ -489,9 +497,9 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
489497
acc_resp_o.resp_valid = 1'b0;
490498

491499
// Handle LMUL > 1
492-
rs_lmul_cnt_d = rs_lmul_cnt_q;
493-
rs_lmul_cnt_limit_d = rs_lmul_cnt_limit_q;
494-
rs_mask_request_d = 1'b0;
500+
reg_lmul_cnt_d = reg_lmul_cnt_q;
501+
reg_lmul_cnt_limit_d = reg_lmul_cnt_limit_q;
502+
reg_mask_request_d = 1'b0;
495503

496504
// Every single reshuffle request refers to LMUL == 1
497505
ara_req.emul = LMUL_1;
@@ -502,7 +510,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
502510
// These generate a reshuffle request to Ara's backend
503511
// When LMUL > 1, not all the regs that compose a large
504512
// register should always be reshuffled
505-
ara_req_valid = ~rs_mask_request_q;
513+
ara_req_valid = ~reg_mask_request_q;
506514
ara_req.use_scalar_op = 1'b1;
507515
ara_req.vs2 = vs_buffer_q;
508516
ara_req.eew_vs2 = eew_old_buffer_q;
@@ -525,8 +533,8 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
525533
// Backend ready - Decide what to do next
526534
if (ara_req_ready_i) begin
527535
// Register completely reshuffled
528-
if (rs_lmul_cnt_q == rs_lmul_cnt_limit_q) begin
529-
rs_lmul_cnt_d = 0;
536+
if (reg_lmul_cnt_q == reg_lmul_cnt_limit_q) begin
537+
reg_lmul_cnt_d = 0;
530538

531539
// Delete the already processed vector register from the notebook -> |vs1|vs2|vd|
532540
unique casez (reshuffle_req_q)
@@ -544,11 +552,13 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
544552
eew_old_buffer_d = eew_q[insn.vmem_type.rs2];
545553
eew_new_buffer_d = reshuffle_eew_vs2_q;
546554
vs_buffer_d = insn.varith_type.rs2;
555+
reg_lmul_cnt_limit_d = emul_vs2_q;
547556
end
548557
3'b100: begin
549558
eew_old_buffer_d = eew_q[insn.vmem_type.rs1];
550559
eew_new_buffer_d = reshuffle_eew_vs1_q;
551560
vs_buffer_d = insn.varith_type.rs1;
561+
reg_lmul_cnt_limit_d = emul_vs1_q;
552562
end
553563
default:;
554564
endcase
@@ -570,7 +580,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
570580
// The register is not completely reshuffled (LMUL > 1)
571581
end else begin
572582
// Count up
573-
rs_lmul_cnt_d = rs_lmul_cnt_q + 1;
583+
reg_lmul_cnt_d = reg_lmul_cnt_q + 1;
574584

575585
// Prepare the information to reshuffle the vector registers during the next cycles
576586
// Since LMUL > 1, we should go on and check if the next register needs a reshuffle
@@ -595,7 +605,7 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
595605
endcase
596606

597607
// Mask the next request if we don't need to reshuffle the next reg
598-
if (eew_new_buffer_d == eew_old_buffer_d) rs_mask_request_d = 1'b1;
608+
if (eew_new_buffer_d == eew_old_buffer_d) reg_mask_request_d = 1'b1;
599609
end
600610
end
601611
end
@@ -1534,16 +1544,19 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
15341544
case (insn.varith_type.rs1)
15351545
5'b00000: begin
15361546
ara_req.op = ara_pkg::VMVXS;
1547+
ara_req.use_vs1 = 1'b0;
15371548
ara_req.vl = 1;
15381549
// Scalar source
15391550
emul_vs2 = LMUL_1;
15401551
end
15411552
5'b10000: begin
15421553
ara_req.op = ara_pkg::VCPOP;
1554+
ara_req.use_vs1 = 1'b0;
15431555
ara_req.eew_vs2 = eew_q[ara_req.vs2];
15441556
end
15451557
5'b10001: begin
15461558
ara_req.op = ara_pkg::VFIRST;
1559+
ara_req.use_vs1 = 1'b0;
15471560
ara_req.eew_vs2 = eew_q[ara_req.vs2];
15481561
end
15491562
default :;
@@ -3753,27 +3766,38 @@ module ara_dispatcher import ara_pkg::*; import rvv_pkg::*; #(
37533766
endcase
37543767

37553768
// Mask the next request if we don't need to reshuffle the next reg
3756-
if (eew_new_buffer_d == eew_old_buffer_d) rs_mask_request_d = 1'b1;
3769+
if (eew_new_buffer_d == eew_old_buffer_d) reg_mask_request_d = 1'b1;
37573770
end
37583771

37593772
// Reshuffle if at least one of the three registers needs a reshuffle
37603773
if (|reshuffle_req_d) begin
37613774
// Instruction is of one of the RVV types
37623775
automatic rvv_instruction_t insn = rvv_instruction_t'(instr.instr);
3776+
automatic rvv_pkg::vlmul_e reshuffle_emul;
37633777

37643778
// Stall the interface, and inject a reshuffling instruction
37653779
acc_resp_o.req_ready = 1'b0;
37663780
acc_resp_o.resp_valid = 1'b0;
37673781
ara_req_valid = 1'b0;
37683782

37693783
// Initialize the reshuffle counter limit to handle LMUL > 1
3770-
unique case (ara_req.emul)
3771-
LMUL_2: rs_lmul_cnt_limit_d = 1;
3772-
LMUL_4: rs_lmul_cnt_limit_d = 3;
3773-
LMUL_8: rs_lmul_cnt_limit_d = 7;
3774-
default: rs_lmul_cnt_limit_d = 0;
3784+
unique casez (reshuffle_req_d)
3785+
3'b??1: reshuffle_emul = ara_req.emul;
3786+
3'b?10: reshuffle_emul = emul_vs2;
3787+
3'b100: reshuffle_emul = emul_vs1;
3788+
endcase
3789+
3790+
unique case (reshuffle_emul)
3791+
LMUL_2: reg_lmul_cnt_limit_d = 1;
3792+
LMUL_4: reg_lmul_cnt_limit_d = 3;
3793+
LMUL_8: reg_lmul_cnt_limit_d = 7;
3794+
default: reg_lmul_cnt_limit_d = 0;
37753795
endcase
37763796

3797+
// Save vs1 and vs2 emul for reshuffling. vd, if needed, has been saved already.
3798+
emul_vs1_d = emul_vs1;
3799+
emul_vs2_d = emul_vs2;
3800+
37773801
// Save info for next reshuffles
37783802
reshuffle_eew_vs1_d = ara_req.eew_vs1;
37793803
reshuffle_eew_vs2_d = ara_req.eew_vs2;

0 commit comments

Comments
 (0)