Skip to content

Commit fc4e557

Browse files
committed
build fixes
1 parent e0dc77a commit fc4e557

8 files changed

Lines changed: 51 additions & 42 deletions

File tree

hw/rtl/tcu/VX_tcu_tbuf.sv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ module VX_tcu_tbuf import VX_gpu_pkg::*, VX_tcu_pkg::*; #(
6161
input wire [1:0] req_cd_nregs,
6262
input wire [`XLEN-1:0] req_desc_a,
6363
input wire [`XLEN-1:0] req_desc_b,
64+
input wire req_a_is_smem,
6465

6566
// LMEM read port
6667
VX_mem_bus_if.master tcu_lmem_if,
@@ -134,6 +135,7 @@ module VX_tcu_tbuf import VX_gpu_pkg::*, VX_tcu_pkg::*; #(
134135
.req_fmt_s (req_fmt_s),
135136
.req_desc_a (req_desc_a),
136137
.req_desc_b (req_desc_b),
138+
.req_a_is_smem(req_a_is_smem),
137139
.tcu_lmem_if (tcu_lmem_if),
138140
.tbuf_hit (tbuf_hit),
139141
.tbuf_ready (tbuf_ready),

hw/rtl/tcu/VX_tcu_tbuf_fetch.sv

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ module VX_tcu_tbuf_fetch import VX_gpu_pkg::*, VX_tcu_pkg::*; #(
5454
input wire [3:0] req_fmt_s,
5555
input wire [`XLEN-1:0] req_desc_a,
5656
input wire [`XLEN-1:0] req_desc_b,
57+
input wire req_a_is_smem,
5758

5859
// LMEM bank-parallel read port (1-cycle latency, pipelined)
5960
VX_mem_bus_if.master tcu_lmem_if,
@@ -131,6 +132,7 @@ module VX_tcu_tbuf_fetch import VX_gpu_pkg::*, VX_tcu_pkg::*; #(
131132
logic [`XLEN-1:0] slot_desc_b;
132133
logic slot_fetch_done;
133134
logic alloc_pending;
135+
logic slot_a_from_smem;
134136
logic [BANK_ADDR_WIDTH-1:0] slot_a_row_base;
135137
logic [BANK_ADDR_WIDTH-1:0] slot_b_row_base;
136138

@@ -147,8 +149,10 @@ module VX_tcu_tbuf_fetch import VX_gpu_pkg::*, VX_tcu_pkg::*; #(
147149

148150
wire is_first_uop = (req_step_m == '0) && (req_step_n == '0) && (req_step_k == '0);
149151

150-
// Descriptor match: always validate against current slot contents.
151-
wire desc_match = (slot_desc_a == req_desc_a) && (slot_desc_b == req_desc_b);
152+
// Descriptor match: validate against current slot contents.
153+
// RS mode (a_from_smem=0): A comes from registers, only check B descriptor.
154+
wire desc_match = (slot_desc_b == req_desc_b)
155+
&& (req_a_is_smem ? (slot_desc_a == req_desc_a) : 1'b1);
152156

153157
// Hit: slot is valid, data ready, and descriptors match.
154158
assign tbuf_hit = slot_valid && slot_fetch_done && desc_match;
@@ -334,7 +338,8 @@ module VX_tcu_tbuf_fetch import VX_gpu_pkg::*, VX_tcu_pkg::*; #(
334338
req_ctr_r <= '0;
335339
rsp_ctr_r <= '0;
336340
req_inflight_r <= 1'b0;
337-
send_state_r <= SEND_FETCH_A;
341+
// RS mode: A comes from registers, skip FETCH_A
342+
send_state_r <= slot_a_from_smem ? SEND_FETCH_A : SEND_FETCH_B;
338343
end
339344
end
340345
// -----------------------------------------------------------------
@@ -404,6 +409,7 @@ module VX_tcu_tbuf_fetch import VX_gpu_pkg::*, VX_tcu_pkg::*; #(
404409
if (alloc_en) begin
405410
slot_valid <= 1'b1;
406411
slot_fetch_done <= 1'b0;
412+
slot_a_from_smem <= req_a_is_smem;
407413
slot_desc_a <= req_desc_a;
408414
slot_desc_b <= req_desc_b;
409415
slot_a_row_base <= desc_a_row_base;

hw/rtl/tcu/VX_tcu_unit.sv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ module VX_tcu_unit import VX_gpu_pkg::*, VX_tcu_pkg::*; #(
126126
.req_cd_nregs (per_block_execute_if[block_idx].data.op_args.tcu.cd_nregs),
127127
.req_desc_a (per_block_execute_if[block_idx].data.rs1_data[0]),
128128
.req_desc_b (per_block_execute_if[block_idx].data.rs2_data[0]),
129+
.req_a_is_smem (per_block_execute_if[block_idx].data.op_args.tcu.a_from_smem),
129130
.tcu_lmem_if (per_block_lmem_if[block_idx]),
130131
// Tile data outputs
131132
.tbuf_rs1_data (tbuf_rs1_data[block_idx]),

hw/rtl/tcu/VX_tcu_uops.sv

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,6 @@ module VX_tcu_uops import VX_tcu_pkg::*, VX_gpu_pkg::*; (
241241
: (k_index == `UP(LG_K)'(TCU_K_STEPS - 1));
242242
`else
243243
wire wmma_is_first_k = (k_index == '0);
244-
`else
245244
wire wmma_is_last_k = (k_index == `UP(LG_K)'(TCU_K_STEPS - 1));
246245
`endif
247246

runtime/opae/vortex.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -550,11 +550,11 @@ class vx_device {
550550
uint64_t dev_caps_;
551551
uint64_t isa_caps_;
552552
uint64_t global_mem_size_;
553-
uint64_t clock_rate_;
554553
uint64_t staging_wsid_;
555554
uint64_t staging_ioaddr_;
556555
uint8_t* staging_ptr_;
557556
uint64_t staging_size_;
557+
uint64_t clock_rate_;
558558
};
559559

560560
#include <callbacks.inc>

sim/simx/decode.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,7 @@ Instr::Ptr Emulator::decode(uint32_t code, uint32_t /*wid*/, uint64_t uuid) {
623623
instr->set_args(IntrBrArgs{funct3, addr});
624624
instr->set_src_reg(0, rs1, RegType::Integer);
625625
instr->set_src_reg(1, rs2, RegType::Integer);
626+
instr->set_wstall(true);
626627
} break;
627628
case Opcode::JAL: {
628629
auto unordered = code >> shift_funct3;
@@ -635,6 +636,7 @@ Instr::Ptr Emulator::decode(uint32_t code, uint32_t /*wid*/, uint64_t uuid) {
635636
instr->set_op_type(BrType::JAL);
636637
instr->set_args(IntrBrArgs{0, addr});
637638
instr->set_dest_reg(rd, RegType::Integer);
639+
instr->set_wstall(true);
638640
} break;
639641
case Opcode::JALR: {
640642
auto imm12 = code >> shift_rs2;
@@ -643,6 +645,7 @@ Instr::Ptr Emulator::decode(uint32_t code, uint32_t /*wid*/, uint64_t uuid) {
643645
instr->set_args(IntrBrArgs{0, addr});
644646
instr->set_dest_reg(rd, RegType::Integer);
645647
instr->set_src_reg(0, rs1, RegType::Integer);
648+
instr->set_wstall(true);
646649
} break;
647650
case Opcode::L:
648651
case Opcode::FL:
@@ -756,6 +759,7 @@ Instr::Ptr Emulator::decode(uint32_t code, uint32_t /*wid*/, uint64_t uuid) {
756759
auto imm12 = code >> shift_rs2;
757760
instr->set_op_type(BrType::SYS);
758761
instr->set_args(IntrBrArgs{0, imm12});
762+
instr->set_wstall(true);
759763
}
760764
} break;
761765
case Opcode::FCI: {
@@ -953,34 +957,40 @@ Instr::Ptr Emulator::decode(uint32_t code, uint32_t /*wid*/, uint64_t uuid) {
953957
case 0: // TMC
954958
instr->set_op_type(WctlType::TMC);
955959
instr->set_src_reg(0, rs1, RegType::Integer);
960+
instr->set_wstall(true);
956961
break;
957962
case 1: // WSPAWN
958963
instr->set_op_type(WctlType::WSPAWN);
959964
instr->set_src_reg(0, rs1, RegType::Integer);
960965
instr->set_src_reg(1, rs2, RegType::Integer);
966+
instr->set_wstall(true);
961967
break;
962968
case 2: // SPLIT
963969
instr->set_op_type(WctlType::SPLIT);
964970
instr->set_dest_reg(rd, RegType::Integer);
965971
instr->set_src_reg(0, rs1, RegType::Integer);
966972
wctlArgs.is_cond_neg = (rs2 != 0);
973+
instr->set_wstall(true);
967974
break;
968975
case 3: // JOIN
969976
instr->set_op_type(WctlType::JOIN);
970977
instr->set_src_reg(0, rs1, RegType::Integer);
978+
instr->set_wstall(true);
971979
break;
972-
case 4: // BAR
980+
case 4: // BAR (sync)
973981
instr->set_op_type(WctlType::BAR);
974982
instr->set_src_reg(0, rs1, RegType::Integer);
975983
instr->set_src_reg(1, rs2, RegType::Integer);
976984
wctlArgs.is_sync_bar = 1;
977985
wctlArgs.is_bar_arrive = 0;
986+
instr->set_wstall(true);
978987
break;
979988
case 5: // PRED
980989
instr->set_op_type(WctlType::PRED);
981990
instr->set_src_reg(0, rs1, RegType::Integer);
982991
instr->set_src_reg(1, rs2, RegType::Integer);
983992
wctlArgs.is_cond_neg = (rd != 0);
993+
instr->set_wstall(true);
984994
break;
985995
case 6: // BAR ARRIVE / WAIT
986996
instr->set_op_type(WctlType::BAR);
@@ -989,9 +999,11 @@ Instr::Ptr Emulator::decode(uint32_t code, uint32_t /*wid*/, uint64_t uuid) {
989999
instr->set_src_reg(1, rs2, RegType::Integer);
9901000
wctlArgs.is_sync_bar = 0;
9911001
wctlArgs.is_bar_arrive = (rd != 0);
1002+
instr->set_wstall(rd == 0); // stall on wait, not on arrive
9921003
break;
9931004
case 7: // WSYNC
9941005
instr->set_op_type(WctlType::WSYNC);
1006+
instr->set_wstall(true);
9951007
break;
9961008
default:
9971009
std::abort();
@@ -1046,7 +1058,7 @@ Instr::Ptr Emulator::decode(uint32_t code, uint32_t /*wid*/, uint64_t uuid) {
10461058
instr->set_op_type(TcuType::WMMA);
10471059
instr->set_args(IntrTcuArgs{is_sparse, 0, 0, fmt_s, fmt_d, 0, 0, 0});
10481060
instr->set_macro_op();
1049-
1061+
instr->set_wstall(true);
10501062
} break;
10511063
#ifdef TCU_WGMMA_ENABLE
10521064
case 1: { // WGMMA_SYNC — single macro Instr, sequencer expands to micro-ops
@@ -1057,7 +1069,7 @@ Instr::Ptr Emulator::decode(uint32_t code, uint32_t /*wid*/, uint64_t uuid) {
10571069
instr->set_op_type(TcuType::WGMMA);
10581070
instr->set_args(IntrTcuArgs{is_sparse, is_a_smem ? 1u : 0u, cd_nregs, fmt_s, fmt_d, 0, 0, 0});
10591071
instr->set_macro_op();
1060-
1072+
instr->set_wstall(true);
10611073
} break;
10621074
#endif // TCU_WGMMA_ENABLE
10631075
default:

sim/simx/emulator.cpp

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -269,24 +269,8 @@ void Emulator::decode(instr_trace_t* trace) {
269269
// Conservative writeback: true if destination register exists
270270
trace->wb = (instr->get_dest_reg().type != RegType::None);
271271

272-
// Determine is_wstall: mirrors RTL VX_decode.sv is_wstall.
273-
// Branches, system calls, and SFU warp-control (except async barrier arrive)
274-
// stall the warp until commit. All other instructions (ALU, FPU, LSU) do not.
275-
{
276-
bool wstall = false;
277-
if (trace->fu_type == FUType::ALU && std::holds_alternative<BrType>(trace->op_type)) {
278-
wstall = true;
279-
} else if (trace->fu_type == FUType::SFU) {
280-
wstall = true;
281-
if (auto* wctl = std::get_if<WctlType>(&trace->op_type)) {
282-
if (*wctl == WctlType::BAR) {
283-
auto& args = std::get<IntrWctlArgs>(instr->get_args());
284-
wstall = !args.is_bar_arrive;
285-
}
286-
}
287-
}
288-
instr->set_wstall(wstall);
289-
}
272+
// is_wstall is set during instruction decode (decode.cpp) to mirror
273+
// RTL VX_decode.sv is_wstall. Transfer to trace for pipeline use.
290274
trace->fetch_stall = instr->is_wstall();
291275
}
292276

tests/regression/common.mk

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,6 @@ CXXFLAGS += $(CONFIGS)
7171

7272
LDFLAGS += -L$(VORTEX_RT_PATH) -lvortex
7373

74-
# Auto-rebuild when CONFIGS changes (avoids manual 'make clean')
75-
CONFIGS_STAMP := .configs.stamp
76-
CURRENT_CONFIGS := $(CONFIGS)
77-
PREV_CONFIGS := $(shell cat $(CONFIGS_STAMP) 2>/dev/null)
78-
ifneq ($(CURRENT_CONFIGS),$(PREV_CONFIGS))
79-
$(shell echo '$(CURRENT_CONFIGS)' > $(CONFIGS_STAMP))
80-
endif
81-
8274
# Debugging
8375
ifdef DEBUG
8476
CXXFLAGS += -g -O0
@@ -98,8 +90,20 @@ endif
9890
endif
9991
endif
10092

93+
CONFIG_STAMP = config.stamp
94+
10195
all: $(PROJECT) kernel.vxbin kernel.dump
10296

97+
# Force rebuild when CONFIGS (defines) change between runs.
98+
$(CONFIG_STAMP): FORCE
99+
@printf '%s\n' '$(VX_CFLAGS)' '$(CXXFLAGS)' > $@.tmp
100+
@if ! cmp -s $@.tmp $@; then \
101+
mv $@.tmp $@; \
102+
else \
103+
rm $@.tmp; \
104+
fi
105+
FORCE:
106+
103107
kernel.dump: kernel.elf
104108
$(VX_DP) -D $< > $@
105109

@@ -108,26 +112,26 @@ kernel.vxbin: kernel.elf
108112

109113
$(VORTEX_KN_PATH)/lib$(KERNEL_LIB).a:
110114
$(MAKE) -C $(VORTEX_KN_PATH)
111-
115+
112116
$(VORTEX_RT_PATH)/libvortex.so:
113117
$(MAKE) -C $(VORTEX_RT_PATH)
114118

115119
ifneq ($(filter %.S,$(VX_SRCS)),)
116-
kernel.elf: $(VX_SRCS) $(VORTEX_KN_PATH)/lib$(KERNEL_LIB).a $(CONFIGS_STAMP)
117-
$(VX_CXX) $(VX_CFLAGS) $(filter-out $(CONFIGS_STAMP),$^) $(VX_LDFLAGS) -o $@
120+
kernel.elf: $(VX_SRCS) $(VORTEX_KN_PATH)/lib$(KERNEL_LIB).a $(CONFIG_STAMP)
121+
$(VX_CXX) $(VX_CFLAGS) $(filter-out $(CONFIG_STAMP),$^) $(VX_LDFLAGS) -o $@
118122
else
119-
vx_start.o: $(VX_SRCS) $(VORTEX_KN_PATH)/lib$(KERNEL_LIB).a $(CONFIGS_STAMP)
123+
vx_start.o: $(VX_SRCS) $(VORTEX_KN_PATH)/lib$(KERNEL_LIB).a $(CONFIG_STAMP)
120124
$(VX_CXX) $(VX_CFLAGS) -c $(VX_SRCS)
121125
$(VX_CXX) $(VX_CFLAGS) -DNEED_GP -DNEED_TLS -DNEED_INITFINI $(VX_KMU_FLAG) -c $(VX_STARTUP_SRC) -o $@
122126
$(VX_CXX) $(VX_CFLAGS) $@ $(VX_APP_OBJS) $(VX_LDFLAGS) -o $@.elf
123127
$(VX_CXX) $(VX_CFLAGS) $$($(KERNEL_STARTUP) $(VX_DP) $@.elf) $(VX_KMU_FLAG) -c $(VX_STARTUP_SRC) -o $@ && rm -f $@.elf
124128

125-
kernel.elf: vx_start.o $(VX_SRCS) $(VORTEX_KN_PATH)/lib$(KERNEL_LIB).a $(CONFIGS_STAMP)
129+
kernel.elf: vx_start.o $(VX_SRCS) $(VORTEX_KN_PATH)/lib$(KERNEL_LIB).a $(CONFIG_STAMP)
126130
$(VX_CXX) $(VX_CFLAGS) vx_start.o $(VX_APP_OBJS) $(VX_LDFLAGS) -o $@
127131
endif
128132

129-
$(PROJECT): $(SRCS) $(VORTEX_RT_PATH)/libvortex.so $(CONFIGS_STAMP)
130-
$(CXX) $(CXXFLAGS) $(filter-out $(CONFIGS_STAMP),$^) $(LDFLAGS) -o $@
133+
$(PROJECT): $(SRCS) $(VORTEX_RT_PATH)/libvortex.so $(CONFIG_STAMP)
134+
$(CXX) $(CXXFLAGS) $(filter-out $(CONFIG_STAMP),$^) $(LDFLAGS) -o $@
131135

132136
run-simx: $(PROJECT) kernel.vxbin
133137
LD_LIBRARY_PATH=$(VORTEX_RT_PATH):$(LD_LIBRARY_PATH) VORTEX_DRIVER=simx ./$(PROJECT) $(OPTS)
@@ -154,9 +158,10 @@ clean-kernel:
154158
rm -rf *.elf *.vxbin *.dump
155159

156160
clean-host:
157-
rm -rf $(PROJECT) *.o *.log .depend $(CONFIGS_STAMP)
161+
rm -rf $(PROJECT) *.o *.log .depend
158162

159163
clean: clean-kernel clean-host
164+
rm -f $(CONFIG_STAMP) $(CONFIG_STAMP).tmp
160165

161166
ifneq ($(MAKECMDGOALS),clean)
162167
-include .depend

0 commit comments

Comments
 (0)