Skip to content

Commit 8001553

Browse files
committed
Initial attempt at efficient range queries across multiple Set objects.
1 parent 9ef03c0 commit 8001553

3 files changed

Lines changed: 52 additions & 4 deletions

File tree

fstwrapper/src/set.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,13 +146,28 @@ pub extern "C" fn fst_set_make_opbuilder(ptr: *mut Set) -> *mut set::OpBuilder<'
146146
}
147147
make_free_fn!(fst_set_opbuilder_free, *mut set::OpBuilder);
148148

149+
#[no_mangle]
150+
pub extern "C" fn fst_set_make_opstreambuilder(ptr: *mut set::Stream) -> *mut set::OpBuilder<'static> {
151+
let stream = ref_from_ptr!(ptr);
152+
let ob = set::OpBuilder::new().add(stream);
153+
to_raw_ptr(ob)
154+
}
155+
make_free_fn!(fst_set_opstreambuilder_free, *mut set::OpBuilder);
156+
149157
#[no_mangle]
150158
pub extern "C" fn fst_set_opbuilder_push(ptr: *mut set::OpBuilder, set_ptr: *mut Set) {
151159
let set = ref_from_ptr!(set_ptr);
152160
let ob = mutref_from_ptr!(ptr);
153161
ob.push(set);
154162
}
155163

164+
#[no_mangle]
165+
pub extern "C" fn fst_set_opbuilder_push_stream(ptr: *mut set::OpBuilder, stream_ptr: *mut set::Stream) {
166+
let stream = ref_from_ptr!(stream_ptr);
167+
let ob = mutref_from_ptr!(ptr);
168+
ob.push(stream);
169+
}
170+
156171
#[no_mangle]
157172
pub extern "C" fn fst_set_opbuilder_union(ptr: *mut set::OpBuilder)
158173
-> *mut set::Union {

rust_fst/_build_ffi.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
typedef struct SetLevStream SetLevStream;
4646
typedef struct SetRegexStream SetRegexStream;
4747
typedef struct SetOpBuilder SetOpBuilder;
48+
typedef struct SetOpStreamBuilder SetOpStreamBuilder;
4849
typedef struct SetUnion SetUnion;
4950
typedef struct SetIntersection SetIntersection;
5051
typedef struct SetDifference SetDifference;
@@ -80,6 +81,9 @@
8081
char* fst_set_regexstream_next(SetRegexStream*);
8182
void fst_set_regexstream_free(SetRegexStream*);
8283
84+
void fst_set_opbuilder_push_stream(SetOpStreamBuilder*, SetStream*);
85+
void fst_set_opstreambuilder_free(SetOpStreamBuilder*);
86+
8387
void fst_set_opbuilder_push(SetOpBuilder*, Set*);
8488
void fst_set_opbuilder_free(SetOpBuilder*);
8589
SetUnion* fst_set_opbuilder_union(SetOpBuilder*);

rust_fst/set.py

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,20 @@ def get_set(self):
5656

5757

5858
class OpBuilder(object):
59-
def __init__(self, set_ptr):
59+
def __init__(self, ptr, as_stream=False):
6060
# NOTE: No need for `ffi.gc`, since the struct will be free'd
6161
# once we call union/intersection/difference
62-
self._ptr = lib.fst_set_make_opbuilder(set_ptr)
62+
self.as_stream = as_stream
63+
if self.as_stream:
64+
self._ptr = lib.fst_set_make_opstreambuilder(ptr)
65+
else:
66+
self._ptr = lib.fst_set_make_opbuilder(ptr)
6367

64-
def push(self, set_ptr):
65-
lib.fst_set_opbuilder_push(self._ptr, set_ptr)
68+
def push(self, ptr):
69+
if self.as_stream:
70+
lib.fst_set_opbuilder_push_stream(self._ptr, ptr)
71+
else:
72+
lib.fst_set_opbuilder_push(self._ptr, ptr)
6673

6774
def union(self):
6875
stream_ptr = lib.fst_set_opbuilder_union(self._ptr)
@@ -220,6 +227,28 @@ def _make_opbuilder(self, *others):
220227
opbuilder.push(oth._ptr)
221228
return opbuilder
222229

230+
def _make_opstreambuilder(self, *streams):
231+
if not streams:
232+
raise ValueError("Must pass at least one stream")
233+
opbuilder = OpBuilder(streams[0]._ptr, as_stream=True)
234+
for item in streams[1:]:
235+
opbuilder.push(item._ptr)
236+
return opbuilder
237+
238+
def range_union(self, str_slice, *others):
239+
""" Get an iterator over a range of keys in the union of this set and others.
240+
241+
:param others: List of :py:class:`Set` objects
242+
:returns: Iterator over all keys in all sets in lexicographical
243+
order
244+
"""
245+
items = [self] + list(others)
246+
streams = [
247+
item[str_slice]
248+
for item in items
249+
]
250+
return self._make_opstreambuilder(*streams).union()
251+
223252
def union(self, *others):
224253
""" Get an iterator over the keys in the union of this set and others.
225254

0 commit comments

Comments
 (0)