@@ -9,48 +9,118 @@ class VecUnit::Impl {
99public:
1010 Impl (VecUnit* simobject, const Arch& /* arch*/ )
1111 : simobject_(simobject)
12+ , num_lanes_(1 ) // Should the vec_unit have more than 1 lane?
13+ , pending_reqs_(1 )
1214 {
1315 this ->clear ();
1416 }
1517
1618 ~Impl () {}
1719
1820 void clear () {
21+ pending_reqs_.clear ();
1922 perf_stats_ = PerfStats ();
2023 }
2124
2225 void tick () {
26+ // Handle memory response
27+ for (uint32_t t = 0 ; t < num_lanes_; ++t) {
28+ auto & mem_rsp_port = simobject_->MemRsps .at (t);
29+ if (mem_rsp_port.empty ())
30+ continue ;
31+
32+ auto & mem_rsp = mem_rsp_port.front ();
33+ auto & entry = pending_reqs_.at (mem_rsp.tag );
34+ auto trace = entry.trace ;
35+
36+ assert (entry.count );
37+ --entry.count ;
38+ if (0 == entry.count ) {
39+ simobject_->Output .push (trace, (vl_ / num_lanes_) * 3 );
40+ pending_reqs_.release (mem_rsp.tag );
41+ }
42+ mem_rsp_port.pop ();
43+ }
44+
45+ for (int i = 0 , n = pending_reqs_.size (); i < n; ++i) {
46+ if (pending_reqs_.contains (i))
47+ perf_stats_.latency += pending_reqs_.at (i).count ;
48+ }
49+
50+ if (simobject_->Input .empty ())
51+ return ;
52+
53+ auto trace = simobject_->Input .front ();
54+
55+ if (pending_reqs_.full ()) {
56+ if (!trace->log_once (true )) {
57+ DT (3 , " *** VecUnit queue stall: " << *trace);
58+ }
59+ ++perf_stats_.stalls ;
60+ return ;
61+ } else {
62+ trace->log_once (false );
63+ }
64+
65+ auto trace_data = std::dynamic_pointer_cast<TraceData>(trace->data );
66+ uint32_t addr_count = 0 ;
67+ for (auto & mem_addr : trace_data->mem_addrs ) {
68+ addr_count += mem_addr.size ();
69+ }
70+
71+ if (addr_count != 0 ) {
72+ auto tag = pending_reqs_.allocate ({trace, addr_count});
73+ for (uint32_t t = 0 ; t < num_lanes_; ++t) {
74+ if (!trace->tmask .test (t))
75+ continue ;
76+
77+ auto & mem_req_port = simobject_->MemReqs .at (t);
78+ for (auto & mem_addr : trace_data->mem_addrs .at (t)) {
79+ MemReq mem_req;
80+ mem_req.addr = mem_addr.addr ;
81+ mem_req.write = (trace->lsu_type == LsuType::STORE);
82+ mem_req.tag = tag;
83+ mem_req.cid = trace->cid ;
84+ mem_req.uuid = trace->uuid ;
85+ mem_req_port.push (mem_req, (vl_ / num_lanes_));
86+ DT (3 , " VecUnit mem-req: addr=0x" << std::hex << mem_addr.addr << " , tag=" << tag << " , tid=" << t << " , " << trace);
87+ ++perf_stats_.reads ;
88+ }
89+ }
90+ } else {
91+ simobject_->Output .push (trace, 1 );
92+ }
93+
94+ simobject_->Input .pop ();
2395 }
2496
25- /*
26- void load(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata) {
27- }
28-
29- void store(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata) {
30- }
31-
32- void execute(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata, std::vector<reg_data_t> &rddata) {
33- }
34- */
35-
3697 const PerfStats& perf_stats () const {
3798 return perf_stats_;
3899 }
39100
40101private:
41102
103+ struct pending_req_t {
104+ instr_trace_t * trace;
105+ uint32_t count;
106+ };
107+
42108 VecUnit* simobject_;
43109 std::vector<std::vector<Byte>> vreg_file_;
44110 vtype_t vtype_;
45111 uint32_t vl_;
46112 Word vlmax_;
47- PerfStats perf_stats_;
113+ uint32_t num_lanes_;
114+ HashTable<pending_req_t > pending_reqs_;
115+ PerfStats perf_stats_;
48116};
49117
50118VecUnit::VecUnit (const SimContext& ctx,
51119 const char * name,
52120 const Arch &arch)
53121 : SimObject<VecUnit>(ctx, name)
122+ , MemReqs(1 , this )
123+ , MemRsps(1 , this )
54124 , Input(this )
55125 , Output(this )
56126 , impl_(new Impl(this , arch))
@@ -68,20 +138,6 @@ void VecUnit::tick() {
68138 impl_->tick ();
69139}
70140
71- /*
72- void VecUnit::load(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata) {
73- return impl_->load(instr, wid, rsdata);
74- }
75-
76- void VecUnit::store(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata) {
77- return impl_->store(instr, wid, rsdata);
78- }
79-
80- void VecUnit::execute(const Instr &instr, uint32_t wid, std::vector<reg_data_t[3]> &rsdata, std::vector<reg_data_t> &rddata) {
81- return impl_->execute(instr, wid, rsdata, rddata);
82- }
83- */
84-
85141const VecUnit::PerfStats& VecUnit::perf_stats () const {
86142 return impl_->perf_stats ();
87143}
0 commit comments