Skip to content

Commit e10511a

Browse files
authored
Merge pull request #27 from jmalkin/use_properties
API cleanup
2 parents 86fbdee + 9b76c29 commit e10511a

26 files changed

Lines changed: 319 additions & 205 deletions

include/quantile_conditional.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ template<typename T, typename SK, typename std::enable_if<std::is_trivial<T>::va
8484
void add_vector_update(nb::class_<SK>& clazz) {
8585
clazz.def(
8686
"update",
87-
[](SK& sk, nb::ndarray<T, nb::c_contig> items) {
87+
[](SK& sk, nb::ndarray<T> items) {
8888
if (items.ndim() != 1) {
8989
throw std::invalid_argument("input data must have only one dimension. Found: "
9090
+ std::to_string(items.ndim()));

src/count_wrapper.cpp

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,14 @@ void bind_count_min_sketch(nb::module_ &m, const char* name) {
3030
using namespace datasketches;
3131

3232
nb::class_<count_min_sketch<W>>(m, name)
33-
.def(nb::init<uint8_t, uint32_t, uint64_t>(), nb::arg("num_hashes"), nb::arg("num_buckets"), nb::arg("seed")=DEFAULT_SEED)
34-
.def(nb::init<const count_min_sketch<W>&>())
33+
.def(nb::init<uint8_t, uint32_t, uint64_t>(), nb::arg("num_hashes"), nb::arg("num_buckets"), nb::arg("seed")=DEFAULT_SEED,
34+
"Creates an instance of a CountMin sketch\n\n"
35+
":param num_hashes: Number of rows in the sketch\n:type num_hashes: int\n"
36+
":param num_buckets: Number of columns in the sketch\n:type num_buckets: int\n"
37+
":param seed: Hash seed to use\n:type seed: int, optional"
38+
)
39+
// using nun_hashes (rows), num_buckets (columns), and hash seed `seed`.)
40+
.def("__copy__", [](const count_min_sketch<W>& sk){ return count_min_sketch<W>(sk); })
3541
.def_static("suggest_num_buckets", &count_min_sketch<W>::suggest_num_buckets, nb::arg("relative_error"),
3642
"Suggests the number of buckets needed to achieve an accuracy within the provided "
3743
"relative_error. For example, when relative_error = 0.05, the returned frequency estimates "
@@ -50,16 +56,16 @@ void bind_count_min_sketch(nb::module_ &m, const char* name) {
5056
"Produces a string summary of the sketch")
5157
.def("is_empty", &count_min_sketch<W>::is_empty,
5258
"Returns True if the sketch has seen no items, otherwise False")
53-
.def("get_num_hashes", &count_min_sketch<W>::get_num_hashes,
54-
"Returns the configured number of hashes for the sketch")
55-
.def("get_num_buckets", &count_min_sketch<W>::get_num_buckets,
56-
"Returns the configured number of buckets for the sketch")
57-
.def("get_seed", &count_min_sketch<W>::get_seed,
58-
"Returns the base hash seed for the sketch")
59+
.def_prop_ro("num_hashes", &count_min_sketch<W>::get_num_hashes,
60+
"The configured number of hashes for the sketch")
61+
.def_prop_ro("num_buckets", &count_min_sketch<W>::get_num_buckets,
62+
"The configured number of buckets for the sketch")
63+
.def_prop_ro("seed", &count_min_sketch<W>::get_seed,
64+
"The base hash seed for the sketch")
5965
.def("get_relative_error", &count_min_sketch<W>::get_relative_error,
6066
"Returns the maximum permissible error for any frequency estimate query")
61-
.def("get_total_weight", &count_min_sketch<W>::get_total_weight,
62-
"Returns the total weight currently inserted into the stream")
67+
.def_prop_ro("total_weight", &count_min_sketch<W>::get_total_weight,
68+
"The total weight currently inserted into the stream")
6369
.def("update", static_cast<void (count_min_sketch<W>::*)(int64_t, W)>(&count_min_sketch<W>::update), nb::arg("item"), nb::arg("weight")=1.0,
6470
"Updates the sketch with the given 64-bit integer value")
6571
.def("update", static_cast<void (count_min_sketch<W>::*)(const std::string&, W)>(&count_min_sketch<W>::update), nb::arg("item"), nb::arg("weight")=1.0,
@@ -73,9 +79,9 @@ void bind_count_min_sketch(nb::module_ &m, const char* name) {
7379
.def("get_upper_bound", static_cast<W (count_min_sketch<W>::*)(const std::string&) const>(&count_min_sketch<W>::get_upper_bound), nb::arg("item"),
7480
"Returns an upper bound on the estimate for the provided string")
7581
.def("get_lower_bound", static_cast<W (count_min_sketch<W>::*)(int64_t) const>(&count_min_sketch<W>::get_lower_bound), nb::arg("item"),
76-
"Returns an lower bound on the estimate for the given 64-bit integer value")
82+
"Returns a lower bound on the estimate for the given 64-bit integer value")
7783
.def("get_lower_bound", static_cast<W (count_min_sketch<W>::*)(const std::string&) const>(&count_min_sketch<W>::get_lower_bound), nb::arg("item"),
78-
"Returns an lower bound on the estimate for the provided string")
84+
"Returns a lower bound on the estimate for the provided string")
7985
.def("merge", &count_min_sketch<W>::merge, nb::arg("other"),
8086
"Merges the provided other sketch into this one")
8187
.def("get_serialized_size_bytes", &count_min_sketch<W>::get_serialized_size_bytes,

src/cpc_wrapper.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,14 @@ void init_cpc(nb::module_ &m) {
3131
using namespace datasketches;
3232

3333
nb::class_<cpc_sketch>(m, "cpc_sketch")
34-
.def(nb::init<uint8_t, uint64_t>(), nb::arg("lg_k")=cpc_constants::DEFAULT_LG_K, nb::arg("seed")=DEFAULT_SEED)
35-
.def(nb::init<const cpc_sketch&>())
34+
.def(nb::init<uint8_t, uint64_t>(), nb::arg("lg_k")=cpc_constants::DEFAULT_LG_K, nb::arg("seed")=DEFAULT_SEED,
35+
"Creates a new CPC sketch\n\n"
36+
":param lg_k: base 2 logarithm of the number of bins in the sketch\n"
37+
":type lg_k: int, optional\n"
38+
":param seed: seed value for the hash function\n"
39+
":type seed: int, optional"
40+
)
41+
.def("__copy__", [](const cpc_sketch& sk){ return cpc_sketch(sk); })
3642
.def("__str__", &cpc_sketch::to_string,
3743
"Produces a string summary of the sketch")
3844
.def("to_string", &cpc_sketch::to_string,
@@ -43,8 +49,8 @@ void init_cpc(nb::module_ &m) {
4349
"Updates the sketch with the given 64-bit floating point")
4450
.def<void (cpc_sketch::*)(const std::string&)>("update", &cpc_sketch::update, nb::arg("datum"),
4551
"Updates the sketch with the given string")
46-
.def("get_lg_k", &cpc_sketch::get_lg_k,
47-
"Returns configured lg_k of this sketch")
52+
.def_prop_ro("lg_k", &cpc_sketch::get_lg_k,
53+
"Configured lg_k of this sketch")
4854
.def("is_empty", &cpc_sketch::is_empty,
4955
"Returns True if the sketch is empty, otherwise False")
5056
.def("get_estimate", &cpc_sketch::get_estimate,
@@ -70,7 +76,6 @@ void init_cpc(nb::module_ &m) {
7076

7177
nb::class_<cpc_union>(m, "cpc_union")
7278
.def(nb::init<uint8_t, uint64_t>(), nb::arg("lg_k"), nb::arg("seed")=DEFAULT_SEED)
73-
.def(nb::init<const cpc_union&>())
7479
.def("update", (void (cpc_union::*)(const cpc_sketch&)) &cpc_union::update, nb::arg("sketch"),
7580
"Updates the union with the provided CPC sketch")
7681
.def("get_result", &cpc_union::get_result,

src/density_wrapper.cpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -42,21 +42,27 @@ void bind_density_sketch(nb::module_ &m, const char* name) {
4242
{ K holder(kernel);
4343
new (sk) density_sketch<T, K>(k, dim, holder);
4444
},
45-
nb::arg("k"), nb::arg("dim"), nb::arg("kernel"))
45+
nb::arg("k"), nb::arg("dim"), nb::arg("kernel"),
46+
"Creates a new density sketch\n\n"
47+
":param k: controls the size and error of the sketch\n:type k: int\n"
48+
":param dim: dimension of the input data\n:type dim: int\n"
49+
":param kernel: instance of a kernel\n:type kernel: KernelFunction\n"
50+
)
51+
.def("__copy__", [](const density_sketch<T,K>& sk){ return density_sketch<T,K>(sk); })
4652
.def("update", static_cast<void (density_sketch<T, K>::*)(const std::vector<T>&)>(&density_sketch<T, K>::update), nb::arg("vector"),
4753
"Updates the sketch with the given vector")
4854
.def("merge", static_cast<void (density_sketch<T, K>::*)(const density_sketch<T, K>&)>(&density_sketch<T, K>::merge), nb::arg("sketch"),
4955
"Merges the provided sketch into this one")
5056
.def("is_empty", &density_sketch<T, K>::is_empty,
5157
"Returns True if the sketch is empty, otherwise False")
52-
.def("get_k", &density_sketch<T, K>::get_k,
53-
"Returns the configured parameter k")
54-
.def("get_dim", &density_sketch<T, K>::get_dim,
55-
"Returns the configured parameter dim")
56-
.def("get_n", &density_sketch<T, K>::get_n,
57-
"Returns the length of the input stream")
58-
.def("get_num_retained", &density_sketch<T, K>::get_num_retained,
59-
"Returns the number of retained items (samples) in the sketch")
58+
.def_prop_ro("k", &density_sketch<T, K>::get_k,
59+
"The configured parameter k")
60+
.def_prop_ro("dim", &density_sketch<T, K>::get_dim,
61+
"The configured parameter dim")
62+
.def_prop_ro("n", &density_sketch<T, K>::get_n,
63+
"The length of the input stream")
64+
.def_prop_ro("num_retained", &density_sketch<T, K>::get_num_retained,
65+
"The number of retained items (samples) in the sketch")
6066
.def("is_estimation_mode", &density_sketch<T, K>::is_estimation_mode,
6167
"Returns True if the sketch is in estimation mode, otherwise False")
6268
.def("get_estimate", &density_sketch<T, K>::get_estimate, nb::arg("point"),
@@ -101,9 +107,12 @@ void init_density(nb::module_ &m) {
101107
prepare_numpy();
102108

103109
// generic kernel function
104-
nb::class_<kernel_function, KernelFunction>(m, "KernelFunction")
110+
nb::class_<kernel_function, KernelFunction>(m, "KernelFunction",
111+
"KernelFunction provicdes a generic base class from which user-defined kernels must inherit. \
112+
The class contains only a __call__ method that must be overridden.")
105113
.def(nb::init())
106-
.def("__call__", &kernel_function::operator(), nb::arg("a"), nb::arg("b"))
114+
.def("__call__", &kernel_function::operator(), nb::arg("a"), nb::arg("b"),
115+
"A method to evaluate a kernel with given inputs a and b.")
107116
;
108117

109118
// the old sketch names can almost be defined, but the kernel_function_holder won't work in init()

src/ebpps_wrapper.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,11 @@ void bind_ebpps_sketch(nb::module_ &m, const char* name) {
3434
using namespace datasketches;
3535

3636
nb::class_<ebpps_sketch<T>>(m, name)
37-
.def(nb::init<uint32_t>(), nb::arg("k"))
37+
.def(nb::init<uint32_t>(), nb::arg("k"),
38+
"Creates a new EBPPS sketch instance\n\n"
39+
":param k: Maximum number of samples in the sketch\n:type k: int\n"
40+
)
41+
.def("__copy__", [](const ebpps_sketch<T>& sk){ return ebpps_sketch<T>(sk); })
3842
.def("__str__", &ebpps_sketch<T>::to_string,
3943
"Produces a string summary of the sketch")
4044
.def("to_string",
@@ -51,11 +55,11 @@ void bind_ebpps_sketch(nb::module_ &m, const char* name) {
5155
.def("merge", (void (ebpps_sketch<T>::*)(const ebpps_sketch<T>&)) &ebpps_sketch<T>::merge,
5256
nb::arg("sketch"), "Merges the sketch with the given sketch")
5357
.def_prop_ro("k", &ebpps_sketch<T>::get_k,
54-
"Returns the sketch's maximum configured sample size")
58+
"The sketch's maximum configured sample size")
5559
.def_prop_ro("n", &ebpps_sketch<T>::get_n,
56-
"Returns the total stream length")
60+
"The total stream length")
5761
.def_prop_ro("c", &ebpps_sketch<T>::get_c,
58-
"Returns the expected number of samples returned upon a call to get_result() or the creation of an iterator. "
62+
"The expected number of samples returned upon a call to get_result() or the creation of an iterator. "
5963
"The number is a floating point value, where the fractional portion represents the probability of including "
6064
"a \"partial item\" from the sample. The value C should be no larger than the sketch's configured value of k, "
6165
"although numerical precision limitations mean it may exceed k by double precision floating point error margins in certain cases.")

src/fi_wrapper.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,13 @@ void bind_fi_sketch(nb::module_ &m, const char* name) {
4545
using namespace datasketches;
4646

4747
auto fi_class = nb::class_<frequent_items_sketch<T, W, H, E>>(m, name)
48-
.def(nb::init<uint8_t>(), nb::arg("lg_max_k"))
48+
.def(nb::init<uint8_t>(), nb::arg("lg_max_k"),
49+
"Creates an instance of the sketch\n\n"
50+
":param lg_max_k: base 2 logarithm of the maximum size of the internal hash map of the sketch. Maximum "
51+
"capacity is 0.75 of this value, which is the maximum number of distinct items the sketch can contain.\n"
52+
":type lg_max_k: int\n"
53+
)
54+
.def("__copy__", [](const frequent_items_sketch<T, W, H, E>& sk){ return frequent_items_sketch<T,W,H,E>(sk); })
4955
.def("__str__", &frequent_items_sketch<T, W, H, E>::to_string, nb::arg("print_items")=false,
5056
"Produces a string summary of the sketch")
5157
.def("to_string", &frequent_items_sketch<T, W, H, E>::to_string, nb::arg("print_items")=false,
@@ -56,10 +62,10 @@ void bind_fi_sketch(nb::module_ &m, const char* name) {
5662
"Merges the given sketch into this one")
5763
.def("is_empty", &frequent_items_sketch<T, W, H, E>::is_empty,
5864
"Returns True if the sketch is empty, otherwise False")
59-
.def("get_num_active_items", &frequent_items_sketch<T, W, H, E>::get_num_active_items,
60-
"Returns the number of active items in the sketch")
61-
.def("get_total_weight", &frequent_items_sketch<T, W, H, E>::get_total_weight,
62-
"Returns the sum of the weights (frequencies) in the stream seen so far by the sketch")
65+
.def_prop_ro("num_active_items", &frequent_items_sketch<T, W, H, E>::get_num_active_items,
66+
"The number of active items in the sketch")
67+
.def_prop_ro("total_weight", &frequent_items_sketch<T, W, H, E>::get_total_weight,
68+
"The sum of the weights (frequencies) in the stream seen so far by the sketch")
6369
.def("get_estimate", &frequent_items_sketch<T, W, H, E>::get_estimate, nb::arg("item"),
6470
"Returns the estimate of the weight (frequency) of the given item.\n"
6571
"Note: The true frequency of a item would be the sum of the counts as a result of the "
@@ -68,8 +74,8 @@ void bind_fi_sketch(nb::module_ &m, const char* name) {
6874
"Returns the guaranteed lower bound weight (frequency) of the given item.")
6975
.def("get_upper_bound", &frequent_items_sketch<T, W, H, E>::get_upper_bound, nb::arg("item"),
7076
"Returns the guaranteed upper bound weight (frequency) of the given item.")
71-
.def("get_sketch_epsilon", (double (frequent_items_sketch<T, W, H, E>::*)(void) const) &frequent_items_sketch<T, W, H, E>::get_epsilon,
72-
"Returns the epsilon value used by the sketch to compute error")
77+
.def_prop_ro("epsilon", (double (frequent_items_sketch<T, W, H, E>::*)(void) const) &frequent_items_sketch<T, W, H, E>::get_epsilon,
78+
"The epsilon value used by the sketch to compute error")
7379
.def(
7480
"get_frequent_items",
7581
[](const frequent_items_sketch<T, W, H, E>& sk, frequent_items_error_type err_type, uint64_t threshold) {

src/hll_wrapper.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,23 @@ void init_hll(nb::module_ &m) {
3434
.export_values();
3535

3636
nb::class_<hll_sketch>(m, "hll_sketch")
37-
.def(nb::init<uint8_t>(), nb::arg("lg_k"))
38-
.def(nb::init<uint8_t, target_hll_type>(), nb::arg("lg_k"), nb::arg("tgt_type"))
39-
.def(nb::init<uint8_t, target_hll_type, bool>(), nb::arg("lg_k"), nb::arg("tgt_type"), nb::arg("start_max_size")=false)
37+
.def(nb::init<uint8_t, target_hll_type, bool>(), nb::arg("lg_k"), nb::arg("tgt_type")=HLL_8, nb::arg("start_max_size")=false,
38+
"Constructs a new HLL sketch\n\n"
39+
":param lg_config_k: A full sketch can hold 2^lg_config_k rows. Must be between 7 and 21, inclusive,\n"
40+
":type lg_config_k: int\n"
41+
":param tgt_type: The HLL mode to use, if/when the sketch reaches estimation mode\n"
42+
":type tgt_type: tgt_hll_type\n"
43+
":param start_full_size: Indicates whether to start in HLL mode, keeping memory use constant (if HLL_6 or "
44+
"HLL_8) at the cost of much higher initial memory use. Default (and recommended) is False.\n"
45+
":type start_full_size: bool"
46+
)
4047
.def("__str__", (std::string (hll_sketch::*)(bool,bool,bool,bool) const) &hll_sketch::to_string,
41-
nb::arg("summary")=true, nb::arg("detail")=false, nb::arg("aux_detail")=false, nb::arg("all")=false,
4248
"Produces a string summary of the sketch")
4349
.def("to_string", (std::string (hll_sketch::*)(bool,bool,bool,bool) const) &hll_sketch::to_string,
4450
nb::arg("summary")=true, nb::arg("detail")=false, nb::arg("aux_detail")=false, nb::arg("all")=false,
4551
"Produces a string summary of the sketch")
4652
.def_prop_ro("lg_config_k", &hll_sketch::get_lg_config_k, "Configured lg_k value for the sketch")
47-
.def_prop_ro("tgt_type", &hll_sketch::get_target_type, "Returns the HLL type (4, 6, or 8) when in estimation mode")
53+
.def_prop_ro("tgt_type", &hll_sketch::get_target_type, "The HLL type (4, 6, or 8) when in estimation mode")
4854
.def("get_estimate", &hll_sketch::get_estimate,
4955
"Estimate of the distinct count of the input stream")
5056
.def("get_lower_bound", &hll_sketch::get_lower_bound, nb::arg("num_std_devs"),
@@ -97,9 +103,12 @@ void init_hll(nb::module_ &m) {
97103
);
98104

99105
nb::class_<hll_union>(m, "hll_union")
100-
.def(nb::init<uint8_t>(), nb::arg("lg_max_k"))
106+
.def(nb::init<uint8_t>(), nb::arg("lg_max_k"),
107+
"Construct an hll_union object if the given size.\n\n"
108+
":param lg_max_k: The maximum size, in log2, of k. Must be between 7 and 21, inclusive.\n"
109+
":type lg_max_k: int"
110+
)
101111
.def_prop_ro("lg_config_k", &hll_union::get_lg_config_k, "Configured lg_k value for the union")
102-
.def_prop_ro("tgt_type", &hll_union::get_target_type, "Returns the HLL type (4, 6, or 8) when in estimation mode")
103112
.def("get_estimate", &hll_union::get_estimate,
104113
"Estimate of the distinct count of the input stream")
105114
.def("get_lower_bound", &hll_union::get_lower_bound, nb::arg("num_std_devs"),

src/kll_wrapper.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,24 +37,28 @@ void bind_kll_sketch(nb::module_ &m, const char* name) {
3737
using namespace datasketches;
3838

3939
auto kll_class = nb::class_<kll_sketch<T, C>>(m, name)
40-
.def(nb::init<uint16_t>(), nb::arg("k")=kll_constants::DEFAULT_K)
41-
.def(nb::init<const kll_sketch<T, C>&>())
40+
.def(nb::init<uint16_t>(), nb::arg("k")=kll_constants::DEFAULT_K,
41+
"Creates a KLL sketch instance with the given value of k.\n\n"
42+
":param k: Controls the size/accuracy trade-off of the sketch. Default is 200.\n"
43+
":type k: int, optional"
44+
)
45+
.def("__copy__", [](const kll_sketch<T, C>& sk){ return kll_sketch<T, C>(sk); })
4246
.def("update", static_cast<void (kll_sketch<T, C>::*)(const T&)>(&kll_sketch<T, C>::update), nb::arg("item"),
4347
"Updates the sketch with the given value")
4448
.def("merge", (void (kll_sketch<T, C>::*)(const kll_sketch<T, C>&)) &kll_sketch<T, C>::merge, nb::arg("sketch"),
4549
"Merges the provided sketch into this one")
46-
.def("__str__", &kll_sketch<T, C>::to_string, nb::arg("print_levels")=false, nb::arg("print_items")=false,
50+
.def("__str__", &kll_sketch<T, C>::to_string,
4751
"Produces a string summary of the sketch")
4852
.def("to_string", &kll_sketch<T, C>::to_string, nb::arg("print_levels")=false, nb::arg("print_items")=false,
4953
"Produces a string summary of the sketch")
5054
.def("is_empty", &kll_sketch<T, C>::is_empty,
5155
"Returns True if the sketch is empty, otherwise False")
52-
.def("get_k", &kll_sketch<T, C>::get_k,
53-
"Returns the configured parameter k")
54-
.def("get_n", &kll_sketch<T, C>::get_n,
55-
"Returns the length of the input stream")
56-
.def("get_num_retained", &kll_sketch<T, C>::get_num_retained,
57-
"Returns the number of retained items (samples) in the sketch")
56+
.def_prop_ro("k", &kll_sketch<T, C>::get_k,
57+
"The configured parameter k")
58+
.def_prop_ro("n", &kll_sketch<T, C>::get_n,
59+
"The length of the input stream")
60+
.def_prop_ro("num_retained", &kll_sketch<T, C>::get_num_retained,
61+
"The number of retained items (samples) in the sketch")
5862
.def("is_estimation_mode", &kll_sketch<T, C>::is_estimation_mode,
5963
"Returns True if the sketch is in estimation mode, otherwise False")
6064
.def("get_min_value", &kll_sketch<T, C>::get_min_item,

0 commit comments

Comments
 (0)