@@ -433,22 +433,31 @@ impl<T> CacheVec<T> {
433433/// `KIND` (a `CacheKind` discriminant cast to `u8`) controls whether the L2
434434/// decoded-instruction array is allocated and which methods are meaningful.
435435///
436- /// - `tags`: one u32 per cache line (use `get_tag`/`set_tag` for typed access)
437- /// - `data`: entire cache as `SIZE/8` u64 chunks
438- /// - `instrs`: L2 only — one DecodedInstr per 4-byte word (`SIZE/4` entries); empty otherwise
439- struct Cache < const SIZE : usize , const LINE : usize , const KIND : u8 > {
440- tags : CacheVec < u32 > ,
441- data : CacheVec < u64 > ,
442- /// L2 decoded-instruction slots (SIZE/4 entries). Empty for L1-I and L1-D.
436+ /// - `tags`: `TAGS` u32 tags inline in the struct (no heap indirection; TAGS = SIZE/LINE)
437+ /// - `data`: `DATA` u64 chunks inline in the struct (no heap indirection; DATA = SIZE/8)
438+ /// - `instrs`: L2 only — heap Vec of SIZE/4 DecodedInstr slots (6MB, contains fn ptrs)
439+ ///
440+ /// `TAGS` and `DATA` are redundant with `SIZE`/`LINE` but required as explicit const generics
441+ /// because stable Rust cannot use arithmetic on generic params in array length positions.
442+ struct Cache < const SIZE : usize , const LINE : usize , const KIND : u8 ,
443+ const TAGS : usize , const DATA : usize > {
444+ /// Heap-allocated tag array — one u32 per cache line. Use `get_tag`/`set_tag` for typed access.
445+ tags : UnsafeCell < Box < [ u32 ; TAGS ] > > ,
446+ /// Heap-allocated data array — entire cache contents as u64 chunks.
447+ data : UnsafeCell < Box < [ u64 ; DATA ] > > ,
448+ /// L2 decoded-instruction slots (SIZE/4 entries). Empty Vec for L1-I and L1-D.
443449 instrs : CacheVec < DecodedInstr > ,
444450 /// Signals the decode thread to stop (kept for Drop compatibility).
445451 stop : Arc < AtomicBool > ,
446452}
447453
448- unsafe impl < const SIZE : usize , const LINE : usize , const KIND : u8 > Send for Cache < SIZE , LINE , KIND > { }
449- unsafe impl < const SIZE : usize , const LINE : usize , const KIND : u8 > Sync for Cache < SIZE , LINE , KIND > { }
454+ unsafe impl < const SIZE : usize , const LINE : usize , const KIND : u8 ,
455+ const TAGS : usize , const DATA : usize > Send for Cache < SIZE , LINE , KIND , TAGS , DATA > { }
456+ unsafe impl < const SIZE : usize , const LINE : usize , const KIND : u8 ,
457+ const TAGS : usize , const DATA : usize > Sync for Cache < SIZE , LINE , KIND , TAGS , DATA > { }
450458
451- impl < const SIZE : usize , const LINE : usize , const KIND : u8 > Cache < SIZE , LINE , KIND > {
459+ impl < const SIZE : usize , const LINE : usize , const KIND : u8 ,
460+ const TAGS : usize , const DATA : usize > Cache < SIZE , LINE , KIND , TAGS , DATA > {
452461 // ---- Compile-time geometry constants ----
453462 const NUM_LINES : usize = SIZE / LINE ;
454463 const LINE_SHIFT : u32 = ctz ( LINE ) ;
@@ -471,8 +480,10 @@ impl<const SIZE: usize, const LINE: usize, const KIND: u8> Cache<SIZE, LINE, KIN
471480 Vec :: new ( )
472481 } ;
473482 Self {
474- tags : CacheVec :: new ( vec ! [ 0u32 ; Self :: NUM_LINES ] ) ,
475- data : CacheVec :: new ( vec ! [ 0u64 ; SIZE / 8 ] ) ,
483+ // SAFETY: u32/u64 are valid at all-zero bit patterns. Box::new_zeroed avoids
484+ // constructing the array on the stack before moving to the heap.
485+ tags : UnsafeCell :: new ( unsafe { Box :: new_zeroed ( ) . assume_init ( ) } ) ,
486+ data : UnsafeCell :: new ( unsafe { Box :: new_zeroed ( ) . assume_init ( ) } ) ,
476487 instrs : CacheVec :: new ( instrs) ,
477488 stop : Arc :: new ( AtomicBool :: new ( false ) ) ,
478489 }
@@ -498,41 +509,50 @@ impl<const SIZE: usize, const LINE: usize, const KIND: u8> Cache<SIZE, LINE, KIN
498509 ( line_idx << Self :: CHUNKS_PER_LINE_SHIFT ) + chunk_offset
499510 }
500511
512+ #[ inline( always) ]
513+ fn tags ( & self ) -> & [ u32 ; TAGS ] { unsafe { & * * self . tags . get ( ) } }
514+ #[ inline( always) ]
515+ fn tags_mut ( & self ) -> & mut [ u32 ; TAGS ] { unsafe { & mut * * self . tags . get ( ) } }
516+ #[ inline( always) ]
517+ fn data ( & self ) -> & [ u64 ; DATA ] { unsafe { & * * self . data . get ( ) } }
518+ #[ inline( always) ]
519+ fn data_mut ( & self ) -> & mut [ u64 ; DATA ] { unsafe { & mut * * self . data . get ( ) } }
520+
501521 /// Read the tag at `idx` as a typed bitfield struct.
502522 #[ inline( always) ]
503523 fn get_tag < T : From < u32 > > ( & self , idx : usize ) -> T {
504- T :: from ( self . tags . get ( ) [ idx] )
524+ T :: from ( unsafe { * self . tags ( ) . get_unchecked ( idx) } )
505525 }
506526
507527 /// Write a typed bitfield tag to `idx`.
508528 #[ inline( always) ]
509529 fn set_tag < T : Into < u32 > > ( & self , idx : usize , tag : T ) {
510- self . tags . get_mut ( ) [ idx] = tag. into ( ) ;
530+ unsafe { * self . tags_mut ( ) . get_unchecked_mut ( idx) = tag. into ( ) ; }
511531 }
512532
513533 /// View cache data as a flat &[u32] (two per u64, big-endian word order).
514534 /// XOR word index with 1 to address naturally on a little-endian host.
515535 /// Used by the I-cache to store l2.instrs slot indices.
516536 #[ inline( always) ]
517537 fn data_as_words ( & self ) -> & [ u32 ] {
518- let slice = self . data . get ( ) ;
519- unsafe { std:: slice:: from_raw_parts ( slice . as_ptr ( ) as * const u32 , slice . len ( ) * 2 ) }
538+ let arr = self . data ( ) ;
539+ unsafe { std:: slice:: from_raw_parts ( arr . as_ptr ( ) as * const u32 , SIZE / 4 ) }
520540 }
521541
522542 /// View cache data as a flat &[u16] (big-endian halfword order within each u64).
523543 /// XOR halfword index with 3 to convert MIPS big-endian address to host offset.
524544 #[ inline( always) ]
525545 fn data_as_halves ( & self ) -> & [ u16 ] {
526- let slice = self . data . get ( ) ;
527- unsafe { std:: slice:: from_raw_parts ( slice . as_ptr ( ) as * const u16 , slice . len ( ) * 4 ) }
546+ let arr = self . data ( ) ;
547+ unsafe { std:: slice:: from_raw_parts ( arr . as_ptr ( ) as * const u16 , SIZE / 2 ) }
528548 }
529549
530550 /// View cache data as a flat &[u8] (big-endian byte order within each u64).
531551 /// XOR byte index with 7 to convert MIPS big-endian address to host offset.
532552 #[ inline( always) ]
533553 fn data_as_bytes ( & self ) -> & [ u8 ] {
534- let slice = self . data . get ( ) ;
535- unsafe { std:: slice:: from_raw_parts ( slice . as_ptr ( ) as * const u8 , slice . len ( ) * 8 ) }
554+ let arr = self . data ( ) ;
555+ unsafe { std:: slice:: from_raw_parts ( arr . as_ptr ( ) as * const u8 , SIZE ) }
536556 }
537557}
538558
@@ -555,13 +575,13 @@ pub struct R4000Cache {
555575 downstream : Arc < dyn BusDevice > ,
556576
557577 // L1 Instruction Cache (16 KB, 16-byte lines)
558- ic : Cache < IC_SIZE , IC_LINE , { CacheKind :: Insn as u8 } > ,
578+ ic : ICache ,
559579
560580 // L1 Data Cache (16 KB, 16-byte lines)
561- dc : Cache < DC_SIZE , DC_LINE , { CacheKind :: Data as u8 } > ,
581+ dc : DCache ,
562582
563583 // L2 Unified Cache (1 MB, 128-byte lines)
564- l2 : Cache < L2_SIZE , L2_LINE , { CacheKind :: L2 as u8 } > ,
584+ l2 : L2Cache ,
565585
566586 // Load-Linked / Store-Conditional support
567587 llbit : UnsafeCell < bool > ,
@@ -593,9 +613,10 @@ unsafe impl Send for R4000Cache {}
593613unsafe impl Sync for R4000Cache { }
594614
595615// Type aliases for the concrete cache instances, for brevity in R4000Cache impls.
596- type ICache = Cache < IC_SIZE , IC_LINE , { CacheKind :: Insn as u8 } > ;
597- type DCache = Cache < DC_SIZE , DC_LINE , { CacheKind :: Data as u8 } > ;
598- type L2Cache = Cache < L2_SIZE , L2_LINE , { CacheKind :: L2 as u8 } > ;
616+ // TAGS = SIZE/LINE (one tag per cache line), DATA = SIZE/8 (one u64 per 8 bytes).
617+ type ICache = Cache < IC_SIZE , IC_LINE , { CacheKind :: Insn as u8 } , { IC_SIZE / IC_LINE } , { IC_SIZE / 8 } > ;
618+ type DCache = Cache < DC_SIZE , DC_LINE , { CacheKind :: Data as u8 } , { DC_SIZE / DC_LINE } , { DC_SIZE / 8 } > ;
619+ type L2Cache = Cache < L2_SIZE , L2_LINE , { CacheKind :: L2 as u8 } , { L2_SIZE / L2_LINE } , { L2_SIZE / 8 } > ;
599620
600621impl R4000Cache {
601622 pub fn new ( downstream : Arc < dyn BusDevice > ) -> Self {
@@ -778,16 +799,16 @@ impl R4000Cache {
778799 /// L2 line. The caller iterates over `l1_lines_per_l2` indices starting here,
779800 /// stepping by 1 (indices wrap naturally via the cache mask).
780801 #[ inline]
781- fn l2_idx_to_l1_base_idx < const L1_SIZE : usize , const L1_LINE : usize , const L1_KIND : u8 > (
782- & self , l2_idx : usize , pidx : u32 , _l1 : & Cache < L1_SIZE , L1_LINE , L1_KIND >
802+ fn l2_idx_to_l1_base_idx < const L1_SIZE : usize , const L1_LINE : usize , const L1_KIND : u8 , const L1_TAGS : usize , const L1_DATA : usize > (
803+ & self , l2_idx : usize , pidx : u32 , _l1 : & Cache < L1_SIZE , L1_LINE , L1_KIND , L1_TAGS , L1_DATA >
783804 ) -> usize {
784805 // Physical bits of the L2 line start address that are below bit 12 (page boundary)
785806 // These bits are the same in VA and PA, so we can derive them from the L2 index.
786807 let phys_sub_bits = ( l2_idx << L2Cache :: LINE_SHIFT as usize ) & 0xFFF ;
787808 // Reconstruct the virtual address bits used for L1 indexing
788809 let virt_index_bits = ( ( pidx as usize ) << L2_PIDX_VADDR_SHIFT as usize ) | phys_sub_bits;
789- ( virt_index_bits >> Cache :: < L1_SIZE , L1_LINE , L1_KIND > :: LINE_SHIFT as usize )
790- & Cache :: < L1_SIZE , L1_LINE , L1_KIND > :: NUM_LINES_MASK
810+ ( virt_index_bits >> Cache :: < L1_SIZE , L1_LINE , L1_KIND , L1_TAGS , L1_DATA > :: LINE_SHIFT as usize )
811+ & Cache :: < L1_SIZE , L1_LINE , L1_KIND , L1_TAGS , L1_DATA > :: NUM_LINES_MASK
791812 }
792813
793814 /// Check if the given physical address overlaps with the Load Linked address.
@@ -945,8 +966,8 @@ impl R4000Cache {
945966 }
946967
947968 // Write data from L1-D to L2
948- let dc_data = self . dc . data . get ( ) ;
949- let l2_data = self . l2 . data . get_mut ( ) ;
969+ let dc_data = self . dc . data ( ) ;
970+ let l2_data = self . l2 . data_mut ( ) ;
950971
951972 let l1_start_chunk = l1_idx << DCache :: CHUNKS_PER_LINE_SHIFT ;
952973
@@ -1027,7 +1048,7 @@ impl R4000Cache {
10271048 println ! ( "[CACHE DEBUG] writeback_l2_line: {} idx={}, phys_addr=0x{:08x}, ptag=0x{:05x}, cs={}, WRITING TO MEMORY" ,
10281049 self . tracking_label_l2_idx( idx) , idx, phys_addr, tag. ptag( ) , cs) ;
10291050 // Dump the L2 line data being written
1030- let l2_data = self . l2 . data . get ( ) ;
1051+ let l2_data = self . l2 . data ( ) ;
10311052 let start_chunk = idx << L2Cache :: CHUNKS_PER_LINE_SHIFT ;
10321053 println ! ( " L2 line data being written (16 x u64):" ) ;
10331054 for i in 0 ..L2Cache :: CHUNKS_PER_LINE {
@@ -1040,7 +1061,7 @@ impl R4000Cache {
10401061 // An L2 writeback/eviction is not a coherency action and must not break LL/SC.
10411062
10421063 // Now write L2 data to memory
1043- let l2_data = self . l2 . data . get ( ) ;
1064+ let l2_data = self . l2 . data ( ) ;
10441065 let start_chunk = idx << L2Cache :: CHUNKS_PER_LINE_SHIFT ;
10451066
10461067 for i in 0 ..L2Cache :: CHUNKS_PER_LINE {
@@ -1074,7 +1095,7 @@ impl R4000Cache {
10741095 let line_base = phys_addr & !( L2Cache :: LINE_MASK as u64 ) ;
10751096
10761097 // Fill line from memory
1077- let l2_data = self . l2 . data . get_mut ( ) ;
1098+ let l2_data = self . l2 . data_mut ( ) ;
10781099 let start_chunk = l2_idx << L2Cache :: CHUNKS_PER_LINE_SHIFT ;
10791100
10801101 let instrs_start = l2_idx << L2Cache :: INSTR_SHIFT ;
@@ -1163,7 +1184,7 @@ impl R4000Cache {
11631184 let ic_line_base = phys_addr & !( ICache :: LINE_MASK as u64 ) ;
11641185 let l2_word_offset = ( ( ic_line_base as usize ) & L2Cache :: LINE_MASK ) >> 2 ;
11651186 let l2_instrs_base = ( l2_idx << L2Cache :: INSTR_SHIFT ) + l2_word_offset;
1166- let ic_data = self . ic . data . get_mut ( ) ;
1187+ let ic_data = self . ic . data_mut ( ) ;
11671188 let ic_data_base = ic_idx * ICache :: CHUNKS_PER_LINE ;
11681189 for i in 0 ..ICache :: CHUNKS_PER_LINE {
11691190 let idx0 = ( l2_instrs_base + i * 2 ) as u32 ;
@@ -1219,8 +1240,8 @@ impl R4000Cache {
12191240 let l2_line_base = l2_idx << L2Cache :: CHUNKS_PER_LINE_SHIFT ;
12201241 let offset_in_l2_line = ( ( dc_line_base & ( L2Cache :: LINE_MASK as u64 ) ) >> 3 ) as usize ;
12211242
1222- let l2_data = self . l2 . data . get ( ) ;
1223- let dc_data = self . dc . data . get_mut ( ) ;
1243+ let l2_data = self . l2 . data ( ) ;
1244+ let dc_data = self . dc . data_mut ( ) ;
12241245 let dc_start_chunk = dc_idx << DCache :: CHUNKS_PER_LINE_SHIFT ;
12251246
12261247 for i in 0 ..DCache :: CHUNKS_PER_LINE {
@@ -1333,7 +1354,7 @@ impl MipsCache for R4000Cache {
13331354 1 => self . dc . data_as_bytes ( ) [ data_idx * 8 + ( ( phys_addr as usize & 7 ) ^ 7 ) ] as u64 ,
13341355 2 => self . dc . data_as_halves ( ) [ data_idx * 4 + ( ( phys_addr as usize & 7 ) >> 1 ^ 3 ) ] as u64 ,
13351356 4 => self . dc . data_as_words ( ) [ data_idx * 2 + ( ( phys_addr as usize & 7 ) >> 2 ^ 1 ) ] as u64 ,
1336- 8 => self . dc . data . get ( ) [ data_idx] ,
1357+ 8 => self . dc . data ( ) [ data_idx] ,
13371358 _ => return BusRead64 :: err ( ) ,
13381359 } ;
13391360 BusRead64 :: ok ( data)
@@ -1369,7 +1390,7 @@ impl MipsCache for R4000Cache {
13691390
13701391 // Write to L1-D cache
13711392 let data_idx = self . dc . get_data_index ( virt_addr) ;
1372- let dc_data = self . dc . data . get_mut ( ) ;
1393+ let dc_data = self . dc . data_mut ( ) ;
13731394 let current = dc_data[ data_idx] ;
13741395 dc_data[ data_idx] = ( current & !mask) | ( val & mask) ;
13751396
@@ -1763,7 +1784,7 @@ impl MipsCache for R4000Cache {
17631784 _ => "Unknown" ,
17641785 } ;
17651786
1766- let dc_data = self . dc . data . get ( ) ;
1787+ let dc_data = self . dc . data ( ) ;
17671788 let start = idx << DCache :: CHUNKS_PER_LINE_SHIFT ;
17681789
17691790 let mut s = format ! ( "L1-D Line 0x{:x}: Tag=0x{:06x} CS={} ({}) D={}\n Data:" ,
@@ -1790,7 +1811,7 @@ impl MipsCache for R4000Cache {
17901811 _ => "Reserved" ,
17911812 } ;
17921813
1793- let l2_data = self . l2 . data . get ( ) ;
1814+ let l2_data = self . l2 . data ( ) ;
17941815 let start = idx << L2Cache :: CHUNKS_PER_LINE_SHIFT ;
17951816
17961817 let mut s = format ! ( "L2 Line 0x{:x}: Tag=0x{:05x} CS={} ({})\n Data:" ,
@@ -1808,12 +1829,12 @@ impl MipsCache for R4000Cache {
18081829 }
18091830
18101831 fn power_on ( & self ) {
1811- self . ic . tags . get_mut ( ) . fill ( 0 ) ;
1812- self . ic . data . get_mut ( ) . fill ( 0 ) ;
1813- self . dc . tags . get_mut ( ) . fill ( 0 ) ;
1814- self . dc . data . get_mut ( ) . fill ( 0 ) ;
1815- self . l2 . tags . get_mut ( ) . fill ( 0 ) ;
1816- self . l2 . data . get_mut ( ) . fill ( 0 ) ;
1832+ self . ic . tags_mut ( ) . fill ( 0 ) ;
1833+ self . ic . data_mut ( ) . fill ( 0 ) ;
1834+ self . dc . tags_mut ( ) . fill ( 0 ) ;
1835+ self . dc . data_mut ( ) . fill ( 0 ) ;
1836+ self . l2 . tags_mut ( ) . fill ( 0 ) ;
1837+ self . l2 . data_mut ( ) . fill ( 0 ) ;
18171838 for s in self . l2 . instrs . get_mut ( ) . iter_mut ( ) {
18181839 s. decoded = false ;
18191840 s. raw = 0 ;
@@ -1845,12 +1866,12 @@ impl Drop for R4000Cache {
18451866
18461867impl Resettable for R4000Cache {
18471868 fn power_on ( & self ) {
1848- self . ic . tags . get_mut ( ) . fill ( 0 ) ;
1849- self . ic . data . get_mut ( ) . fill ( 0 ) ;
1850- self . dc . tags . get_mut ( ) . fill ( 0 ) ;
1851- self . dc . data . get_mut ( ) . fill ( 0 ) ;
1852- self . l2 . tags . get_mut ( ) . fill ( 0 ) ;
1853- self . l2 . data . get_mut ( ) . fill ( 0 ) ;
1869+ self . ic . tags_mut ( ) . fill ( 0 ) ;
1870+ self . ic . data_mut ( ) . fill ( 0 ) ;
1871+ self . dc . tags_mut ( ) . fill ( 0 ) ;
1872+ self . dc . data_mut ( ) . fill ( 0 ) ;
1873+ self . l2 . tags_mut ( ) . fill ( 0 ) ;
1874+ self . l2 . data_mut ( ) . fill ( 0 ) ;
18541875 for s in self . l2 . instrs . get_mut ( ) . iter_mut ( ) {
18551876 s. decoded = false ;
18561877 s. raw = 0 ;
@@ -1865,17 +1886,15 @@ impl Resettable for R4000Cache {
18651886// ---- snapshot helpers + MipsCache save/load override ----
18661887
18671888impl R4000Cache {
1868- fn save_cache_inner < const S : usize , const L : usize , const K : u8 > ( c : & Cache < S , L , K > ) -> ( Vec < u32 > , Vec < u64 > ) {
1869- ( c. tags . get ( ) . clone ( ) , c. data . get ( ) . clone ( ) )
1889+ fn save_cache_inner < const S : usize , const L : usize , const K : u8 , const TG : usize , const DA : usize > ( c : & Cache < S , L , K , TG , DA > ) -> ( Vec < u32 > , Vec < u64 > ) {
1890+ ( c. tags ( ) . to_vec ( ) , c. data ( ) . to_vec ( ) )
18701891 }
18711892
1872- fn load_cache_inner < const S : usize , const L : usize , const K : u8 > ( c : & Cache < S , L , K > , tags : & [ u32 ] , data : & [ u64 ] ) {
1873- let t = c. tags . get_mut ( ) ;
1874- let tl = tags. len ( ) . min ( t. len ( ) ) ;
1875- t[ ..tl] . copy_from_slice ( & tags[ ..tl] ) ;
1876- let d = c. data . get_mut ( ) ;
1877- let dl = data. len ( ) . min ( d. len ( ) ) ;
1878- d[ ..dl] . copy_from_slice ( & data[ ..dl] ) ;
1893+ fn load_cache_inner < const S : usize , const L : usize , const K : u8 , const TG : usize , const DA : usize > ( c : & Cache < S , L , K , TG , DA > , tags : & [ u32 ] , data : & [ u64 ] ) {
1894+ let tl = tags. len ( ) . min ( TG ) ;
1895+ c. tags_mut ( ) [ ..tl] . copy_from_slice ( & tags[ ..tl] ) ;
1896+ let dl = data. len ( ) . min ( DA ) ;
1897+ c. data_mut ( ) [ ..dl] . copy_from_slice ( & data[ ..dl] ) ;
18791898 }
18801899
18811900 pub fn save_cache_state ( & self ) -> toml:: Value {
@@ -1919,7 +1938,7 @@ impl R4000Cache {
19191938
19201939 // Rebuild l2.instrs from restored l2.data
19211940 {
1922- let l2_data_slice: Vec < u64 > = self . l2 . data . get ( ) . clone ( ) ;
1941+ let l2_data_slice = self . l2 . data ( ) ;
19231942 let l2_instrs = self . l2 . instrs . get_mut ( ) ;
19241943 for line in 0 ..L2Cache :: NUM_LINES {
19251944 let chunks_start = line << L2Cache :: CHUNKS_PER_LINE_SHIFT ;
0 commit comments