Skip to content

Commit 2fc8ea5

Browse files
committed
feat: add btree cursor iterator
1 parent 1b5364d commit 2fc8ea5

14 files changed

Lines changed: 583 additions & 900 deletions

File tree

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ bloomfilter = "1.0"
2626
fs2 = "0.4.3"
2727
bincode = "1.3.3"
2828
crc32fast = "1.4.2"
29+
glob = "0.3.1"
2930

3031
[build-dependencies]
3132
hooky-rs = "1.0.0"

src/btree/mod.rs

Lines changed: 242 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
use crate::{
22
error::{Error, Result},
3-
page::{io::BlockIO, spec::PageNumber, Page},
3+
page::{
4+
io::BlockIO,
5+
spec::{LocationOffset, PageNumber},
6+
Page,
7+
},
48
Either,
59
};
10+
use glob::Pattern;
611
use node::BTreeNode;
712
use serde::{de::DeserializeOwned, Serialize};
813
use std::{
@@ -15,8 +20,7 @@ use std::{
1520
pub mod node;
1621
pub mod spec;
1722
use spec::{
18-
BTreeBlockHeader, BTreeCell, BTreePageHeader, PageType, BTREE_BLOCK_ALLOC_SIZE,
19-
BTREE_PAGE_HEADER_SIZE,
23+
BTreeBlockHeader, BTreeCell, BTreePageHeader, BTreePair, PageType, BTREE_PAGE_HEADER_SIZE,
2024
};
2125

2226
pub const MAX_BRANCHING_FACTOR: u16 = 100;
@@ -48,48 +52,47 @@ impl<
4852
where
4953
P: AsRef<Path>,
5054
{
51-
let io: BlockIO =
52-
BlockIO::new(block_path, BTREE_BLOCK_ALLOC_SIZE as u16).map_err(Error::IoError)?;
55+
let mut io: BlockIO = BlockIO::new(block_path).map_err(Error::IoError)?;
5356

54-
if io.exists().map_err(Error::IoError)? {
55-
Self::open_block(io)
56-
} else {
57-
Self::create_block(io)
58-
}
59-
}
57+
let mut metadata_page: Page<BTreeBlockHeader> =
58+
io.read_metadata_page().map_err(Error::IoError)?;
6059

61-
fn open_block(mut io: BlockIO) -> Result<Self> {
62-
let bytes = io.read_alloc_data().map_err(Error::IoError)?;
63-
let block_header: BTreeBlockHeader =
64-
bincode::deserialize(&bytes).map_err(Error::SerializeError)?;
60+
match metadata_page.is_empty() {
61+
false => {
62+
let block_header = metadata_page.read(0)?.unwrap();
6563

66-
Ok(Self {
67-
io,
68-
root: block_header.root,
69-
b: block_header.b,
70-
_k: PhantomData,
71-
_v: PhantomData,
72-
})
64+
Ok(Self {
65+
io,
66+
root: block_header.root,
67+
b: MAX_BRANCHING_FACTOR,
68+
_k: PhantomData,
69+
_v: PhantomData,
70+
})
71+
}
72+
73+
true => Self::create_block(io),
74+
}
7375
}
7476

7577
fn create_block(mut io: BlockIO) -> Result<Self> {
7678
let mut root: Page<BTreeCell<K, V>> = Page::create(BTREE_PAGE_HEADER_SIZE).unwrap();
7779
let metadata = BTreePageHeader::new(PageType::Leaf, None);
7880
root.write_special(&metadata.to_bytes()).unwrap();
7981

80-
io.write_page(0, &root).unwrap();
82+
let root_page = io.write_new_page(&root).unwrap();
8183

82-
let block_header = BTreeBlockHeader {
83-
b: MAX_BRANCHING_FACTOR,
84-
root: 0,
85-
};
84+
let block_header = BTreeBlockHeader { root: root_page };
8685

87-
io.alloc_data(&bincode::serialize(&block_header).map_err(Error::SerializeError)?)
88-
.map_err(Error::IoError)?;
86+
let mut metadata_page: Page<BTreeBlockHeader> =
87+
io.read_metadata_page().map_err(Error::IoError)?;
88+
89+
metadata_page.insert(0, block_header)?;
90+
91+
io.write_metadata_page(&metadata_page).unwrap();
8992

9093
Ok(Self {
9194
io,
92-
root: 0,
95+
root: root_page,
9396
b: MAX_BRANCHING_FACTOR,
9497
_k: PhantomData,
9598
_v: PhantomData,
@@ -106,6 +109,22 @@ impl<
106109
page_number: PageNumber,
107110
parents: &mut Vec<PageNumber>,
108111
) -> Result<Search> {
112+
self.search_from_subtree_by(
113+
|page| page.binary_search_by_key(key, |e| e.key.clone()),
114+
page_number,
115+
parents,
116+
)
117+
}
118+
119+
pub fn search_from_subtree_by<F>(
120+
&mut self,
121+
mut f: F,
122+
page_number: PageNumber,
123+
parents: &mut Vec<PageNumber>,
124+
) -> Result<Search>
125+
where
126+
F: FnMut(&mut Page<BTreeCell<K, V>>) -> Either<u16, u16>,
127+
{
109128
let mut node: BTreeNode<K, V> = self
110129
.io
111130
.read_page(page_number.into())
@@ -115,7 +134,7 @@ impl<
115134
match node.header.kind {
116135
PageType::Internal | PageType::Root => {
117136
parents.push(page_number);
118-
let index = node.page.binary_search_by_key(key, |e| e.key.clone());
137+
let index = f(&mut node.page);
119138

120139
if index.is_left() {
121140
return Ok(Search {
@@ -128,11 +147,11 @@ impl<
128147

129148
let next_page = node.child(next_cell)?.unwrap();
130149

131-
self.search_from_subtree(key, next_page, parents)
150+
self.search_from_subtree_by(f, next_page, parents)
132151
}
133152

134153
PageType::Leaf => {
135-
let index = node.page.binary_search_by_key(key, |e| e.key.clone());
154+
let index = f(&mut node.page);
136155

137156
Ok(Search {
138157
index,
@@ -287,6 +306,14 @@ impl<
287306
Ok(keys)
288307
}
289308

309+
pub fn len(&mut self) -> Result<usize> {
310+
Ok(self.cells_from_subtree(self.root)?.len())
311+
}
312+
313+
pub fn is_empty(&mut self) -> Result<bool> {
314+
Ok(self.len()? == 0)
315+
}
316+
290317
pub fn cells_from_subtree(&mut self, page_number: PageNumber) -> Result<Vec<BTreeCell<K, V>>> {
291318
let mut node: BTreeNode<K, V> = self
292319
.io
@@ -437,16 +464,172 @@ impl<
437464
fn set_root(&mut self, page_number: PageNumber) -> Result<()> {
438465
self.root = page_number;
439466

440-
let block_header = BTreeBlockHeader {
441-
b: MAX_BRANCHING_FACTOR,
442-
root: self.root,
467+
let block_header = BTreeBlockHeader { root: self.root };
468+
469+
let mut metadata_page: Page<BTreeBlockHeader> =
470+
self.io.read_metadata_page().map_err(Error::IoError)?;
471+
472+
metadata_page.insert(0, block_header)?;
473+
474+
self.io.write_metadata_page(&metadata_page).unwrap();
475+
476+
Ok(())
477+
}
478+
}
479+
480+
impl<
481+
K: Serialize + DeserializeOwned + PartialOrd + Ord + Clone + Debug,
482+
V: Serialize + DeserializeOwned + PartialOrd + Ord + Clone + Debug,
483+
> IntoIterator for BTree<K, V>
484+
{
485+
type Item = BTreePair<K, V>;
486+
type IntoIter = BTreeIterator<K, V>;
487+
488+
fn into_iter(self) -> Self::IntoIter {
489+
BTreeIterator::new(self)
490+
}
491+
}
492+
493+
pub struct BTreeIterator<K, V> {
494+
btree: BTree<K, V>,
495+
page: PageNumber,
496+
cell_index: LocationOffset,
497+
parents: Vec<PageNumber>,
498+
}
499+
500+
impl<
501+
K: Serialize + DeserializeOwned + PartialOrd + Ord + Clone + Debug,
502+
V: Serialize + DeserializeOwned + PartialOrd + Ord + Clone + Debug,
503+
> BTreeIterator<K, V>
504+
{
505+
pub fn new(btree: BTree<K, V>) -> Self {
506+
let mut iter = Self {
507+
page: btree.root,
508+
btree,
509+
cell_index: 0,
510+
parents: Vec::new(),
443511
};
444512

445-
self.io
446-
.alloc_data(&bincode::serialize(&block_header).map_err(Error::SerializeError)?)
447-
.map_err(Error::IoError)?;
513+
iter.move_to_leftmost().unwrap();
514+
515+
iter
516+
}
517+
518+
fn move_to_leftmost(&mut self) -> Result<()> {
519+
let mut node: BTreeNode<K, V> = self
520+
.btree
521+
.io
522+
.read_page(self.page.into())
523+
.map_err(Error::IoError)?
524+
.try_into()?;
525+
526+
while !node.is_leaf() {
527+
self.parents.push(self.page);
528+
529+
self.page = node.child(0)?.unwrap();
530+
531+
let next_node: BTreeNode<K, V> = self
532+
.btree
533+
.io
534+
.read_page(self.page.into())
535+
.map_err(Error::IoError)?
536+
.try_into()?;
537+
538+
node = next_node
539+
}
540+
541+
self.cell_index = 0;
542+
448543
Ok(())
449544
}
545+
546+
pub fn into_btree(self) -> BTree<K, V> {
547+
self.btree
548+
}
549+
}
550+
551+
impl<
552+
K: Serialize + DeserializeOwned + PartialOrd + Ord + Clone + Debug,
553+
V: Serialize + DeserializeOwned + PartialOrd + Ord + Clone + Debug,
554+
> Iterator for BTreeIterator<K, V>
555+
{
556+
type Item = BTreePair<K, V>;
557+
558+
fn next(&mut self) -> Option<Self::Item> {
559+
let mut node: BTreeNode<K, V> = self
560+
.btree
561+
.io
562+
.read_page(self.page.into())
563+
.unwrap()
564+
.try_into()
565+
.unwrap();
566+
567+
if node.is_empty() && node.is_leaf() {
568+
return None;
569+
}
570+
571+
let cell = node.page.read(self.cell_index).unwrap();
572+
573+
if node.is_leaf() && self.cell_index + 1 < node.len() {
574+
self.cell_index += 1;
575+
return Some(cell.unwrap().to_pair());
576+
}
577+
578+
if !node.is_leaf() && self.cell_index < node.len() {
579+
self.parents.push(self.page);
580+
self.page = node.child(self.cell_index + 1).unwrap().unwrap();
581+
self.move_to_leftmost().unwrap();
582+
583+
return Some(cell.unwrap().to_pair());
584+
}
585+
586+
let mut found_branch = false;
587+
588+
while !self.parents.is_empty() && !found_branch {
589+
let parent_page = self.parents.pop().unwrap();
590+
let mut parent: BTreeNode<K, V> = self
591+
.btree
592+
.io
593+
.read_page(parent_page.into())
594+
.unwrap()
595+
.try_into()
596+
.unwrap();
597+
598+
let index = parent.iter_children().position(|c| c == self.page).unwrap() as u16;
599+
600+
self.page = parent_page;
601+
602+
if index < parent.len() {
603+
self.cell_index = index;
604+
found_branch = true;
605+
}
606+
}
607+
608+
if self.parents.is_empty() && !found_branch {
609+
return None;
610+
}
611+
612+
Some(cell.unwrap().to_pair())
613+
}
614+
}
615+
616+
impl<V: Serialize + DeserializeOwned + PartialOrd + Ord + Clone + Debug> BTree<String, V> {
617+
/// O(n) worst-case complexity
618+
pub fn find_pattern(&mut self, key_pattern: &str) -> Result<Vec<BTreePair<String, V>>> {
619+
let results = self
620+
.cells_from_subtree(self.root)?
621+
.into_iter()
622+
.filter_map(|c| {
623+
if Pattern::new(key_pattern).unwrap().matches(&c.key) {
624+
Some(c.to_pair())
625+
} else {
626+
None
627+
}
628+
})
629+
.collect::<Vec<BTreePair<String, V>>>();
630+
631+
Ok(results)
632+
}
450633
}
451634

452635
#[cfg(test)]
@@ -455,16 +638,28 @@ mod btree_tests {
455638

456639
#[test]
457640
fn create_btree() {
458-
let mut btree = BTree::<u32, u32>::new("test_data/btree").unwrap();
641+
let mut btree = BTree::<u32, u32>::new("test_data/btree.db").unwrap();
459642

460-
btree.insert(12, 12 * 2).unwrap();
461-
btree.insert(6, 6 * 2).unwrap();
462-
btree.insert(2, 2 * 2).unwrap();
463-
btree.insert(16, 16 * 2).unwrap();
464-
465-
assert_eq!(btree.values().unwrap(), vec![4, 12, 24, 32]);
643+
for i in 0u32..100 {
644+
btree.insert(i, i * 2).unwrap();
645+
}
466646

467647
let value = btree.get(&2).unwrap().unwrap();
468-
assert_eq!(value, 4)
648+
assert_eq!(value, 4);
649+
650+
let value = btree.get(&10).unwrap().unwrap();
651+
assert_eq!(value, 20);
652+
653+
let value = btree.get(&50).unwrap().unwrap();
654+
assert_eq!(value, 100);
655+
656+
let value = btree.get(&75).unwrap().unwrap();
657+
assert_eq!(value, 150);
658+
659+
let value = btree.get(&99).unwrap().unwrap();
660+
assert_eq!(value, 198);
661+
662+
let length = btree.len().unwrap();
663+
assert_eq!(length, 100);
469664
}
470665
}

0 commit comments

Comments
 (0)