44//!
55//! ## Format Changes (v0.7.0+)
66//!
7- //! **BREAKING CHANGE**: The VBQ index is now embedded at the end of VBQ files instead of
8- //! being stored in separate `.vqi` files. This improves portability and eliminates the
9- //! need to manage auxiliary files.
7+ //! **BREAKING CHANGE**: The VBQ index is now embedded at the end of VBQ files,
8+ //! improving portability and eliminating the need to manage auxiliary files.
109//!
1110//! ## Embedded Index Structure
1211//!
2928//!
3029//! ## Key Changes from v0.6.x
3130//!
32- //! - Index moved from separate `.vqi` files into VBQ files
31+ //! - Index is now embedded in VBQ files
3332//! - Cumulative record counts changed from `u32` to `u64`
3433//! - Support for files with more than 4 billion records
3534
3635use std:: {
3736 fs:: File ,
38- io:: { BufReader , BufWriter , Cursor , Read , Write } ,
37+ io:: { Cursor , Read , Write } ,
3938 path:: Path ,
4039} ;
4140
@@ -374,9 +373,10 @@ impl IndexHeader {
374373/// `IndexHeader` and a collection of `BlockRange` entries, one for each block in
375374/// the file.
376375///
377- /// The index can be created by scanning a VBQ file or loaded from a previously
378- /// created index file. Once loaded, it provides information about block locations,
379- /// sizes, and record counts.
376+ /// The index is embedded at the end of VBQ files and can be loaded using
377+ /// `MmapReader::load_index()` or created by scanning a VBQ file using
378+ /// `BlockIndex::from_vbq()`. Once loaded, it provides information about block
379+ /// locations, sizes, and record counts.
380380///
381381/// # Examples
382382///
@@ -388,10 +388,6 @@ impl IndexHeader {
388388/// let vbq_path = Path::new("example.vbq");
389389/// let index = BlockIndex::from_vbq(vbq_path).unwrap();
390390///
391- /// // Save the index for future use
392- /// let index_path = Path::new("example.vbq.vqi");
393- /// index.save_to_path(index_path).unwrap();
394- ///
395391/// // Use the index with a reader for parallel processing
396392/// let reader = MmapReader::new(vbq_path).unwrap();
397393/// println!("File contains {} blocks", index.n_blocks());
@@ -430,54 +426,18 @@ impl BlockIndex {
430426 /// # Examples
431427 ///
432428 /// ```rust,no_run
433- /// use binseq::vbq::BlockIndex;
429+ /// use binseq::vbq::{ BlockIndex, MmapReader} ;
434430 /// use std::path::Path;
435431 ///
436- /// let index = BlockIndex::from_path(Path::new("example.vbq.vqi")).unwrap();
432+ /// let reader = MmapReader::new(Path::new("example.vbq")).unwrap();
433+ /// let index = reader.load_index().unwrap();
437434 /// println!("The file contains {} blocks", index.n_blocks());
438435 /// ```
439436 #[ must_use]
440437 pub fn n_blocks ( & self ) -> usize {
441438 self . ranges . len ( )
442439 }
443440
444- /// Writes the collection of `BlockRange` to a file
445- /// Saves the index to a file
446- ///
447- /// This writes the index header and all block ranges to a file, which can be loaded
448- /// later to avoid rescanning the VBQ file. The index is compressed to reduce
449- /// storage space.
450- ///
451- /// # Parameters
452- ///
453- /// * `path` - The path where the index file should be saved
454- ///
455- /// # Returns
456- ///
457- /// * `Ok(())` - If the index was successfully saved
458- /// * `Err(_)` - If an error occurred during saving
459- ///
460- /// # Examples
461- ///
462- /// ```rust,no_run
463- /// use binseq::vbq::BlockIndex;
464- /// use std::path::Path;
465- ///
466- /// // Create an index from a VBQ file
467- /// let index = BlockIndex::from_vbq(Path::new("example.vbq")).unwrap();
468- ///
469- /// // Save it for future use
470- /// index.save_to_path(Path::new("example.vbq.vqi")).unwrap();
471- /// ```
472- pub fn save_to_path < P : AsRef < Path > > ( & self , path : P ) -> Result < ( ) > {
473- let mut writer = File :: create ( path) . map ( BufWriter :: new) ?;
474- self . header . write_bytes ( & mut writer) ?;
475- let mut writer = Encoder :: new ( writer, 3 ) ?. auto_finish ( ) ;
476- self . write_range ( & mut writer) ?;
477- writer. flush ( ) ?;
478- Ok ( ( ) )
479- }
480-
481441 /// Write the index to an output buffer
482442 pub fn write_bytes < W : Write > ( & self , writer : & mut W ) -> Result < ( ) > {
483443 self . header . write_bytes ( writer) ?;
@@ -490,9 +450,8 @@ impl BlockIndex {
490450 /// Write the collection of `BlockRange` to an output handle
491451 /// Writes all block ranges to the provided writer
492452 ///
493- /// This method is used internally by `save_to_path` to write the block ranges
494- /// to an index file. It can also be used to serialize an index to any destination
495- /// that implements `Write`.
453+ /// This method is used internally to write the block ranges to the embedded index.
454+ /// It can also be used to serialize an index to any destination that implements `Write`.
496455 ///
497456 /// # Parameters
498457 ///
@@ -524,8 +483,8 @@ impl BlockIndex {
524483 /// Creates a new index by scanning a VBQ file
525484 ///
526485 /// This method memory-maps the specified VBQ file and scans it block by block
527- /// to create an index. The index can then be saved to a file for future use, enabling
528- /// efficient random access without rescanning the file .
486+ /// to create an index. This is primarily used internally when embedding the index
487+ /// into VBQ files during the write process .
529488 ///
530489 /// # Parameters
531490 ///
@@ -545,9 +504,6 @@ impl BlockIndex {
545504 /// // Create an index from a VBQ file
546505 /// let index = BlockIndex::from_vbq(Path::new("example.vbq")).unwrap();
547506 ///
548- /// // Save the index for future use
549- /// index.save_to_path(Path::new("example.vbq.vqi")).unwrap();
550- ///
551507 /// // Get statistics about the file
552508 /// println!("File contains {} blocks", index.n_blocks());
553509 ///
@@ -603,45 +559,6 @@ impl BlockIndex {
603559 Ok ( index)
604560 }
605561
606- /// Reads an index from a path
607- ///
608- /// # Panics
609- /// Panics if the path is not a valid UTF-8 string.
610- pub fn from_path < P : AsRef < Path > > ( path : P ) -> Result < Self > {
611- let Some ( upstream_file) = path. as_ref ( ) . to_str ( ) . unwrap ( ) . strip_suffix ( ".vqi" ) else {
612- return Err ( IndexError :: MissingUpstreamFile (
613- path. as_ref ( ) . to_string_lossy ( ) . to_string ( ) ,
614- )
615- . into ( ) ) ;
616- } ;
617- let upstream_handle = File :: open ( upstream_file) ?;
618- let mmap = unsafe { memmap2:: Mmap :: map ( & upstream_handle) ? } ;
619- let file_size = mmap. len ( ) as u64 ;
620-
621- let mut file_handle = File :: open ( path) . map ( BufReader :: new) ?;
622- let index_header = IndexHeader :: from_reader ( & mut file_handle) ?;
623- if index_header. bytes != file_size {
624- return Err ( IndexError :: ByteSizeMismatch ( file_size, index_header. bytes ) . into ( ) ) ;
625- }
626- let buffer = {
627- let mut buffer = Vec :: new ( ) ;
628- let mut decoder = Decoder :: new ( file_handle) ?;
629- decoder. read_to_end ( & mut buffer) ?;
630- buffer
631- } ;
632-
633- let mut ranges = Self :: new ( index_header) ;
634- let mut pos = 0 ;
635- while pos < buffer. len ( ) {
636- let bound = pos + SIZE_BLOCK_RANGE ;
637- let range = BlockRange :: from_bytes ( & buffer[ pos..bound] ) ;
638- ranges. add_range ( range) ;
639- pos += SIZE_BLOCK_RANGE ;
640- }
641-
642- Ok ( ranges)
643- }
644-
645562 pub fn from_bytes ( bytes : & [ u8 ] ) -> Result < Self > {
646563 let index_header = IndexHeader :: from_bytes ( bytes) ?;
647564 let buffer = {
@@ -676,10 +593,11 @@ impl BlockIndex {
676593 /// # Examples
677594 ///
678595 /// ```rust,no_run
679- /// use binseq::vbq::BlockIndex ;
596+ /// use binseq::vbq::MmapReader ;
680597 /// use std::path::Path;
681598 ///
682- /// let index = BlockIndex::from_path(Path::new("example.vbq.vqi")).unwrap();
599+ /// let reader = MmapReader::new(Path::new("example.vbq")).unwrap();
600+ /// let index = reader.load_index().unwrap();
683601 ///
684602 /// // Examine the ranges to determine which blocks to process
685603 /// for (i, range) in index.ranges().iter().enumerate() {
0 commit comments