Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions hw/ip/snitch/src/snitch.sv
Original file line number Diff line number Diff line change
Expand Up @@ -1182,6 +1182,18 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
VFMIN_R_S,
VFMAX_S,
VFMAX_R_S,
VMFEQ_S,
VMFEQ_R_S,
VMFNE_S,
VMFNE_R_S,
VMFLT_S,
VMFLT_R_S,
VMFLE_S,
VMFLE_R_S,
VMFGT_S,
VMFGT_R_S,
VMFGE_S,
VMFGE_R_S,
VFSQRT_S,
VFMAC_S,
VFMAC_R_S,
Expand Down Expand Up @@ -1348,6 +1360,18 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
VFDIV_R_H,
VFMIN_H,
VFMIN_R_H,
VMFEQ_H,
VMFEQ_R_H,
VMFNE_H,
VMFNE_R_H,
VMFLT_H,
VMFLT_R_H,
VMFLE_H,
VMFLE_R_H,
VMFGT_H,
VMFGT_R_H,
VMFGE_H,
VMFGE_R_H,
VFMAX_H,
VFMAX_R_H,
VFSQRT_H,
Expand Down Expand Up @@ -1601,6 +1625,18 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
VFDIV_R_B,
VFMIN_B,
VFMIN_R_B,
VMFEQ_B,
VMFEQ_R_B,
VMFNE_B,
VMFNE_R_B,
VMFLT_B,
VMFLT_R_B,
VMFLE_B,
VMFLE_R_B,
VMFGT_B,
VMFGT_R_B,
VMFGE_B,
VMFGE_R_B,
VFMAX_B,
VFMAX_R_B,
VFSQRT_B,
Expand Down Expand Up @@ -2387,6 +2423,10 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
riscv_instr::VFSUB_VV,
riscv_instr::VFMIN_VV,
riscv_instr::VFMAX_VV,
riscv_instr::VMFEQ_VV,
riscv_instr::VMFNE_VV,
riscv_instr::VMFLT_VV,
riscv_instr::VMFLE_VV,
riscv_instr::VFSGNJ_VV,
riscv_instr::VFSGNJN_VV,
riscv_instr::VFSGNJX_VV,
Expand Down Expand Up @@ -2506,6 +2546,12 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #(
riscv_instr::VFSUB_VF,
riscv_instr::VFMIN_VF,
riscv_instr::VFMAX_VF,
riscv_instr::VMFEQ_VF,
riscv_instr::VMFNE_VF,
riscv_instr::VMFLT_VF,
riscv_instr::VMFLE_VF,
riscv_instr::VMFGT_VF,
riscv_instr::VMFGE_VF,
riscv_instr::VFSGNJ_VF,
riscv_instr::VFSGNJN_VF,
riscv_instr::VFSGNJX_VF,
Expand Down
55 changes: 55 additions & 0 deletions hw/ip/spatz/src/spatz_decoder.sv
Original file line number Diff line number Diff line change
Expand Up @@ -855,6 +855,16 @@ module spatz_decoder
riscv_instr::VFMIN_VF,
riscv_instr::VFMAX_VV,
riscv_instr::VFMAX_VF,
riscv_instr::VMFEQ_VV,
riscv_instr::VMFEQ_VF,
riscv_instr::VMFNE_VV,
riscv_instr::VMFNE_VF,
riscv_instr::VMFLT_VV,
riscv_instr::VMFLT_VF,
riscv_instr::VMFLE_VV,
riscv_instr::VMFLE_VF,
riscv_instr::VMFGT_VF,
riscv_instr::VMFGE_VF,
riscv_instr::VFSGNJ_VV,
riscv_instr::VFSGNJ_VF,
riscv_instr::VFSGNJN_VV,
Expand Down Expand Up @@ -978,6 +988,51 @@ module spatz_decoder
spatz_req.rm = fpnew_pkg::RTZ;
end

riscv_instr::VMFEQ_VV,
riscv_instr::VMFEQ_VF: begin
spatz_req.op = VFCMP;
spatz_req.rm = fpnew_pkg::RDN;
end

riscv_instr::VMFNE_VV,
riscv_instr::VMFNE_VF: begin
spatz_req.op = VFCMP;
spatz_req.rm = fpnew_pkg::RUP;
end

riscv_instr::VMFLT_VV,
riscv_instr::VMFLT_VF: begin
spatz_req.op = VFCMP;
spatz_req.rm = fpnew_pkg::RTZ;
end

riscv_instr::VMFGT_VF: begin
spatz_req.op = VFCMP;
spatz_req.rm = fpnew_pkg::RTZ;
//Switch the operands
spatz_req.vs2 = arith_s2;
spatz_req.use_vs2 = 1'b1;
spatz_req.rs1 = decoder_req_i.rs1;
spatz_req.use_vs1 = 1'b0;

end

riscv_instr::VMFLE_VV,
riscv_instr::VMFLE_VF: begin
spatz_req.op = VFCMP;
spatz_req.rm = fpnew_pkg::RNE;
end

riscv_instr::VMFGE_VF: begin
spatz_req.op = VFCMP;
spatz_req.rm = fpnew_pkg::RNE;
//Switch the operands
spatz_req.vs2 = arith_s2;
spatz_req.use_vs2 = 1'b1;
spatz_req.rs1 = decoder_req_i.rs1;
spatz_req.use_vs1 = 1'b0;
end

riscv_instr::VFMUL_VV,
riscv_instr::VFMUL_VF: spatz_req.op = VFMUL;
riscv_instr::VFMACC_VV,
Expand Down
6 changes: 6 additions & 0 deletions hw/ip/spatz/src/spatz_fpu_sequencer.sv
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,12 @@ module spatz_fpu_sequencer
riscv_instr::VFSUB_VF,
riscv_instr::VFMIN_VF,
riscv_instr::VFMAX_VF,
riscv_instr::VMFEQ_VF,
riscv_instr::VMFNE_VF,
riscv_instr::VMFLT_VF,
riscv_instr::VMFLE_VF,
riscv_instr::VMFGT_VF,
riscv_instr::VMFGE_VF,
riscv_instr::VFSGNJ_VF,
riscv_instr::VFSGNJN_VF,
riscv_instr::VFSGNJX_VF,
Expand Down
76 changes: 70 additions & 6 deletions hw/ip/spatz/src/spatz_vfu.sv
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,10 @@ module spatz_vfu
logic [N_FU*ELENB-1:0] result_valid;
logic result_ready;

// it represents the VRF word index
logic [$clog2(NrWordsPerVector):0] word_idx_d, word_idx_q;
`FF(word_idx_q, word_idx_d, '0)

always_comb begin: control_proc
// Maintain state
vl_d = vl_q;
Expand Down Expand Up @@ -445,7 +449,7 @@ module spatz_vfu
word_issued = spatz_req_valid && &(in_ready | ~valid_operations) && operands_ready && !stall;

// Are we ready to accept a result?
result_ready = &(result_valid | ~pending_results) && ((result_tag.wb && vfu_rsp_ready_i) || vrf_wvalid_i);
result_ready = &(result_valid | ~pending_results) && ((result_tag.wb && vfu_rsp_ready_i) || vrf_wvalid_i || (spatz_req.op == VFCMP && !result_tag.last));

// Initialize the pointers
reduction_pointer_d = '0;
Expand Down Expand Up @@ -758,12 +762,12 @@ module spatz_vfu
if (word_issued) begin
vreg_addr_d[0] = vreg_addr_d[0] + (!spatz_req.op_arith.widen_vs2 || widening_upper_q);
vreg_addr_d[1] = vreg_addr_d[1] + (!spatz_req.op_arith.widen_vs1 || widening_upper_q);
vreg_addr_d[2] = vreg_addr_d[2] + (!spatz_req.op_arith.is_reduction && (!spatz_req.op_arith.is_narrowing || narrowing_upper_q));
vreg_addr_d[2] = vreg_addr_d[2] + (!spatz_req.op_arith.is_reduction && (!spatz_req.op_arith.is_narrowing || narrowing_upper_q) && (spatz_req.op != VFCMP));
end
end else if (spatz_req_valid && vl_q < spatz_req.vl && word_issued) begin
vreg_addr_d[0] = vreg_addr_q[0] + (!spatz_req.op_arith.widen_vs2 || widening_upper_q);
vreg_addr_d[1] = vreg_addr_q[1] + (!spatz_req.op_arith.widen_vs1 || widening_upper_q);
vreg_addr_d[2] = vreg_addr_q[2] + (!spatz_req.op_arith.is_reduction && (!spatz_req.op_arith.is_narrowing || narrowing_upper_q));
vreg_addr_d[2] = vreg_addr_q[2] + (!spatz_req.op_arith.is_reduction && (!spatz_req.op_arith.is_narrowing || narrowing_upper_q) && (spatz_req.op != VFCMP));
end
end: vreg_addr_proc

Expand All @@ -784,6 +788,12 @@ module spatz_vfu
if (result_tag.narrowing) begin
// Only write half of the elements
vreg_wbe = result_tag.narrowing_upper ? {{(N_FU*ELENB/2){1'b1}}, {(N_FU*ELENB/2){1'b0}}} : {{(N_FU*ELENB/2){1'b0}}, {(N_FU*ELENB/2){1'b1}}};
end else if (spatz_req.op == VFCMP) begin
// every vector element requires 1 bit of wbe --> ceil(vl/8)
automatic logic [$clog2((MAXVL+7)/8+1)-1:0] mask_bytes;
vreg_we = result_tag.last;
mask_bytes = (spatz_req.vl + 7) >> 3;
vreg_wbe = (mask_bytes >= N_FU*ELENB) ? '1 : vrf_be_t'((vrf_be_t'(1) << mask_bytes) - 1);
end
end

Expand All @@ -799,7 +809,22 @@ module spatz_vfu
end
end : operand_req_proc

logic [N_FU*ELEN-1:0] vreg_wdata;
logic vfcmp_result_accepted;
assign vfcmp_result_accepted = (spatz_req.op == VFCMP) && &(result_valid | ~pending_results) && result_ready;

always_comb begin : VRF_cnt_proc
word_idx_d = word_idx_q;
if (spatz_req.op != VFCMP)
word_idx_d = '0;
else if (vfcmp_result_accepted) begin
if (result_tag.last)
word_idx_d = '0;
else
word_idx_d = word_idx_q + 1;
end
end

logic [N_FU*ELEN-1:0] vreg_wdata, wdata_d, wdata_q;
always_comb begin: align_result
// Data from the FU to be written to the VRF
// For reductions, if the result is present in the buffer used for intra-lane reductions
Expand All @@ -819,14 +844,50 @@ module spatz_vfu
end
default:;
endcase
end else if (spatz_req.op == VFCMP) begin
// default
vreg_wdata = '0;
unique case (spatz_req.vtype.vsew)
EW_8: begin
for (int i = 0; i < VRFWordWidth/8; i++)
vreg_wdata[i+(VRFWordWidth/8*word_idx_q)] = result[i*8];
end
EW_16: begin
for (int i = 0; i < VRFWordWidth/16; i++)
vreg_wdata[i+(VRFWordWidth/16*word_idx_q)] = result[i*16];
end
EW_32: begin
for (int i = 0; i < VRFWordWidth/32; i++)
vreg_wdata[i+(VRFWordWidth/32*word_idx_q)] = result[i*32];
end
EW_64: begin
for (int i = 0; i < VRFWordWidth/64; i++)
vreg_wdata[i+(VRFWordWidth/64*word_idx_q)] = result[i*64];
end
default:;
endcase
end
end

always_comb begin : wdata_proc
wdata_d = wdata_q;
if (spatz_req.op != VFCMP) begin
wdata_d = '0;
end else if (vfcmp_result_accepted) begin
if (result_tag.last)
wdata_d = '0;
else
wdata_d = wdata_q | vreg_wdata;
end
end

`FF(wdata_q, wdata_d, '0)

// Register file signals
assign vrf_re_o = vreg_r_req;
assign vrf_we_o = vreg_we;
assign vrf_wbe_o = vreg_wbe;
assign vrf_wdata_o = vreg_wdata;
assign vrf_wdata_o = (spatz_req.op == VFCMP) ? (wdata_q | vreg_wdata) : vreg_wdata;
assign vrf_id_o = {result_tag.id, {3{spatz_req.id}}};

//////////
Expand Down Expand Up @@ -1089,6 +1150,9 @@ module spatz_vfu
VFCMP : begin
fpu_op = fpnew_pkg::CMP;
fpu_dst_fmt = fpu_src_fmt;
if (spatz_req.rm == fpnew_pkg::RUP)
// Boolean result inverted
fpu_op_mode = 1'b1;
end

VF2F: fpu_op = fpnew_pkg::F2F;
Expand Down Expand Up @@ -1188,7 +1252,7 @@ module spatz_vfu
`FFL(fpu_int_fmt_q, fpu_int_fmt, int_fpu_in_valid && int_fpu_in_ready, fpnew_pkg::INT8)
`FFL(fpu_op_mode_q, fpu_op_mode, int_fpu_in_valid && int_fpu_in_ready, 1'b0)
`FFL(fpu_vectorial_op_q, fpu_vectorial_op, int_fpu_in_valid && int_fpu_in_ready, 1'b0)
`FFL(rm_q, spatz_req.rm, int_fpu_in_valid && int_fpu_in_ready, fpnew_pkg::RNE)
`FFL(rm_q, (spatz_req.op == VFCMP && spatz_req.rm == fpnew_pkg::RUP) ? fpnew_pkg::RDN : spatz_req.rm, int_fpu_in_valid && int_fpu_in_ready, fpnew_pkg::RNE)
`FFL(input_tag_q, input_tag, int_fpu_in_valid && int_fpu_in_ready, '{vsew: EW_8, default: '0})
`FFL(fpu_in_valid_q, int_fpu_in_valid, int_fpu_in_ready, 1'b0)
assign int_fpu_in_ready = !fpu_in_valid_q || fpu_in_valid_q && fpu_in_ready_d;
Expand Down
7 changes: 7 additions & 0 deletions sw/riscvTests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@ add_snitch_test(vfrsub isa/rv64uv/vfrsub.c)
add_snitch_test(vfmin isa/rv64uv/vfmin.c)
add_snitch_test(vfmax isa/rv64uv/vfmax.c)

add_snitch_test(vmfeq isa/rv64uv/vmfeq.c)
add_snitch_test(vmfne isa/rv64uv/vmfne.c)
add_snitch_test(vmflt isa/rv64uv/vmflt.c)
add_snitch_test(vmfle isa/rv64uv/vmfle.c)
add_snitch_test(vmfgt isa/rv64uv/vmfgt.c)
add_snitch_test(vmfge isa/rv64uv/vmfge.c)

add_snitch_test(vfmul isa/rv64uv/vfmul.c)
add_snitch_test(vfmacc isa/rv64uv/vfmacc.c)
add_snitch_test(vfnmacc isa/rv64uv/vfnmacc.c)
Expand Down
Loading
Loading