Skip to content

Commit eb103f4

Browse files
Merge pull request #404 from apache/cleanup-before-5.0.0
Cleanup before 5.0.0
2 parents 4a23934 + e602aec commit eb103f4

48 files changed

Lines changed: 286 additions & 466 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

common/include/common_defs.hpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,19 @@ static const uint64_t DEFAULT_SEED = 9001;
3535

3636
enum resize_factor { X1 = 0, X2, X4, X8 };
3737

38-
template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
39-
template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
40-
41-
// thread-safe random bit
42-
static thread_local std::independent_bits_engine<std::mt19937, 1, uint32_t>
43-
random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()
44-
+ std::hash<std::thread::id>{}(std::this_thread::get_id())));
38+
template<typename A> using string = std::basic_string<char, std::char_traits<char>, typename std::allocator_traits<A>::template rebind_alloc<char>>;
4539

4640
// common random declarations
4741
namespace random_utils {
4842
static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
4943
static thread_local std::mt19937_64 rand(rd());
5044
static thread_local std::uniform_real_distribution<> next_double(0.0, 1.0);
5145

46+
// thread-safe random bit
47+
static thread_local std::independent_bits_engine<std::mt19937, 1, uint32_t>
48+
random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()
49+
+ std::hash<std::thread::id>{}(std::this_thread::get_id())));
50+
5251
inline void override_seed(uint64_t s) {
5352
rand.seed(s);
5453
}

common/include/count_zeros.hpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222

2323
#include <cstdint>
2424

25-
#include <stdio.h>
26-
2725
namespace datasketches {
2826

2927
static const uint8_t byte_leading_zeros_table[256] = {

common/include/quantiles_sorted_view.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,6 @@ class quantiles_sorted_view<T, C, A>::const_iterator: public quantiles_sorted_vi
214214
using Base = typename quantiles_sorted_view<T, C, A>::Container::const_iterator;
215215
using value_type = typename std::conditional<std::is_arithmetic<T>::value, typename Base::value_type, std::pair<const T&, const uint64_t>>::type;
216216

217-
const_iterator(const Base& it, const Base& begin): Base(it), begin(begin) {}
218-
219217
template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
220218
const value_type operator*() const { return Base::operator*(); }
221219

@@ -239,6 +237,9 @@ class quantiles_sorted_view<T, C, A>::const_iterator: public quantiles_sorted_vi
239237

240238
private:
241239
Base begin;
240+
241+
friend class quantiles_sorted_view<T, C, A>;
242+
const_iterator(const Base& it, const Base& begin): Base(it), begin(begin) {}
242243
};
243244

244245
} /* namespace datasketches */

cpc/include/cpc_common.hpp

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,45 +26,39 @@
2626

2727
namespace datasketches {
2828

29+
/// CPC constants
2930
namespace cpc_constants {
30-
const uint8_t MIN_LG_K = 4;
31-
const uint8_t MAX_LG_K = 26;
32-
const uint8_t DEFAULT_LG_K = 11;
31+
/// min log2 of K
32+
const uint8_t MIN_LG_K = 4;
33+
/// max log2 of K
34+
const uint8_t MAX_LG_K = 26;
35+
/// default log2 of K
36+
const uint8_t DEFAULT_LG_K = 11;
3337
}
3438

35-
// TODO: Redundant and deprecated. Will be removed in next major version release.
36-
static const uint8_t CPC_MIN_LG_K = cpc_constants::MIN_LG_K;
37-
static const uint8_t CPC_MAX_LG_K = cpc_constants::MAX_LG_K;
38-
static const uint8_t CPC_DEFAULT_LG_K = cpc_constants::DEFAULT_LG_K;
39-
40-
template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
41-
template<typename A> using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
42-
template<typename A> using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
43-
template<typename A> using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>;
44-
45-
template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
46-
template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
47-
template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>;
48-
4939
// forward declaration
5040
template<typename A> class u32_table;
5141

5242
template<typename A>
5343
struct compressed_state {
44+
using vector_u32 = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
45+
5446
explicit compressed_state(const A& allocator): table_data(allocator), table_data_words(0), table_num_entries(0),
5547
window_data(allocator), window_data_words(0) {}
56-
vector_u32<A> table_data;
48+
vector_u32 table_data;
5749
uint32_t table_data_words;
5850
uint32_t table_num_entries; // can be different from the number of entries in the sketch in hybrid mode
59-
vector_u32<A> window_data;
51+
vector_u32 window_data;
6052
uint32_t window_data_words;
6153
};
6254

6355
template<typename A>
6456
struct uncompressed_state {
57+
using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
58+
6559
explicit uncompressed_state(const A& allocator): table(allocator), window(allocator) {}
6660
u32_table<A> table;
67-
vector_u8<A> window;
61+
vector_bytes window;
6862
};
6963

7064
} /* namespace datasketches */

cpc/include/cpc_compressor.hpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ inline cpc_compressor<A>& get_compressor();
4747
template<typename A>
4848
class cpc_compressor {
4949
public:
50+
using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
51+
using vector_u32 = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
52+
5053
void compress(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
5154
void uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
5255

@@ -126,17 +129,17 @@ class cpc_compressor {
126129
uint16_t* make_decoding_table(const uint16_t* encoding_table, unsigned num_byte_values);
127130
void validate_decoding_table(const uint16_t* decoding_table, const uint16_t* encoding_table) const;
128131

129-
void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const;
132+
void compress_surprising_values(const vector_u32& pairs, uint8_t lg_k, compressed_state<A>& result) const;
130133
void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const;
131134

132-
vector_u32<A> uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs, uint8_t lg_k, const A& allocator) const;
133-
void uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
135+
vector_u32 uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs, uint8_t lg_k, const A& allocator) const;
136+
void uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_bytes& window, uint8_t lg_k, uint32_t num_coupons) const;
134137

135138
static size_t safe_length_for_compressed_pair_buf(uint32_t k, uint32_t num_pairs, uint8_t num_base_bits);
136139
static size_t safe_length_for_compressed_window_buf(uint32_t k);
137140
static uint8_t determine_pseudo_phase(uint8_t lg_k, uint32_t c);
138141

139-
static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator);
142+
static inline vector_u32 tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator);
140143
static inline uint8_t golomb_choose_number_of_base_bits(uint32_t k, uint64_t count);
141144
};
142145

cpc/include/cpc_compressor_impl.hpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompress
183183
template<typename A>
184184
void cpc_compressor<A>::compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
185185
if (source.sliding_window.size() > 0) throw std::logic_error("unexpected sliding window");
186-
vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
186+
vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
187187
u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
188188
compress_surprising_values(pairs, source.get_lg_k(), result);
189189
}
@@ -192,7 +192,7 @@ template<typename A>
192192
void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
193193
if (source.window_data.size() > 0) throw std::logic_error("unexpected sliding window");
194194
if (source.table_data.size() == 0) throw std::logic_error("table is expected");
195-
vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
195+
vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
196196
lg_k, source.table_data.get_allocator());
197197
target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k, pairs.get_allocator());
198198
}
@@ -204,12 +204,12 @@ void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source
204204
if (source.sliding_window.size() == 0) throw std::logic_error("no sliding window");
205205
if (source.window_offset != 0) throw std::logic_error("window_offset != 0");
206206
const uint32_t k = 1 << source.get_lg_k();
207-
vector_u32<A> pairs_from_table = source.surprising_value_table.unwrapping_get_items();
207+
vector_u32 pairs_from_table = source.surprising_value_table.unwrapping_get_items();
208208
const uint32_t num_pairs_from_table = static_cast<uint32_t>(pairs_from_table.size());
209209
if (num_pairs_from_table > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, num_pairs_from_table);
210210
const uint32_t num_pairs_from_window = source.get_num_coupons() - num_pairs_from_table; // because the window offset is zero
211211

212-
vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table, source.get_allocator());
212+
vector_u32 all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table, source.get_allocator());
213213

214214
u32_table<A>::merge(
215215
pairs_from_table.data(), 0, pairs_from_table.size(),
@@ -224,7 +224,7 @@ template<typename A>
224224
void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
225225
if (source.window_data.size() > 0) throw std::logic_error("window is not expected");
226226
if (source.table_data.size() == 0) throw std::logic_error("table is expected");
227-
vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
227+
vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
228228
lg_k, source.table_data.get_allocator());
229229

230230
// In the hybrid flavor, some of these pairs actually
@@ -250,7 +250,7 @@ void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& sour
250250
template<typename A>
251251
void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
252252
compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
253-
vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
253+
vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
254254
if (pairs.size() > 0) {
255255
// Here we subtract 8 from the column indices. Because they are stored in the low 6 bits
256256
// of each row_col pair, and because no column index is less than 8 for a "Pinned" sketch,
@@ -277,7 +277,7 @@ void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& sour
277277
target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
278278
} else {
279279
if (source.table_data.size() == 0) throw std::logic_error("table is expected");
280-
vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
280+
vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
281281
lg_k, source.table_data.get_allocator());
282282
// undo the compressor's 8-column shift
283283
for (uint32_t i = 0; i < num_pairs; i++) {
@@ -291,7 +291,7 @@ void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& sour
291291
template<typename A>
292292
void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
293293
compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
294-
vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
294+
vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
295295
if (pairs.size() > 0) {
296296
// Here we apply a complicated transformation to the column indices, which
297297
// changes the implied ordering of the pairs, so we must do it before sorting.
@@ -330,7 +330,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
330330
target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
331331
} else {
332332
if (source.table_data.size() == 0) throw std::logic_error("table is expected");
333-
vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
333+
vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
334334
lg_k, source.table_data.get_allocator());
335335

336336
const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
@@ -356,7 +356,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
356356
}
357357

358358
template<typename A>
359-
void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const {
359+
void cpc_compressor<A>::compress_surprising_values(const vector_u32& pairs, uint8_t lg_k, compressed_state<A>& result) const {
360360
const uint32_t k = 1 << lg_k;
361361
const uint32_t num_pairs = static_cast<uint32_t>(pairs.size());
362362
const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
@@ -374,10 +374,10 @@ void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, u
374374
}
375375

376376
template<typename A>
377-
vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs,
378-
uint8_t lg_k, const A& allocator) const {
377+
auto cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs,
378+
uint8_t lg_k, const A& allocator) const -> vector_u32 {
379379
const uint32_t k = 1 << lg_k;
380-
vector_u32<A> pairs(num_pairs, 0, allocator);
380+
vector_u32 pairs(num_pairs, 0, allocator);
381381
const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
382382
low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
383383
return pairs;
@@ -399,7 +399,7 @@ void cpc_compressor<A>::compress_sliding_window(const uint8_t* window, uint8_t l
399399
}
400400

401401
template<typename A>
402-
void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_u8<A>& window,
402+
void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_bytes& window,
403403
uint8_t lg_k, uint32_t num_coupons) const {
404404
const uint32_t k = 1 << lg_k;
405405
window.resize(k); // zeroing not needed here (unlike the Hybrid Flavor)
@@ -722,10 +722,10 @@ void write_unary(
722722
// The empty space that this leaves at the beginning of the output array
723723
// will be filled in later by the caller.
724724
template<typename A>
725-
vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get,
726-
uint32_t empty_space, const A& allocator) {
725+
auto cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get,
726+
uint32_t empty_space, const A& allocator) -> vector_u32 {
727727
const size_t output_length = empty_space + num_pairs_to_get;
728-
vector_u32<A> pairs(output_length, 0, allocator);
728+
vector_u32 pairs(output_length, 0, allocator);
729729
size_t pair_index = empty_space;
730730
for (unsigned row_index = 0; row_index < k; row_index++) {
731731
uint8_t byte = window[row_index];

cpc/include/cpc_sketch.hpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
namespace datasketches {
3535

36-
// forward-declarations
36+
// forward declarations
3737
template<typename A> class cpc_sketch_alloc;
3838
template<typename A> class cpc_union_alloc;
3939

@@ -64,6 +64,8 @@ template<typename A>
6464
class cpc_sketch_alloc {
6565
public:
6666
using allocator_type = A;
67+
using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
68+
using vector_u64 = std::vector<uint64_t, typename std::allocator_traits<A>::template rebind_alloc<uint64_t>>;
6769

6870
/**
6971
* Creates an instance of the sketch given the lg_k parameter and hash seed.
@@ -204,10 +206,6 @@ class cpc_sketch_alloc {
204206
*/
205207
void serialize(std::ostream& os) const;
206208

207-
// This is a convenience alias for users
208-
// The type returned by the following serialize method
209-
using vector_bytes = vector_u8<A>;
210-
211209
/**
212210
* This method serializes the sketch as a vector of bytes.
213211
* An optional header can be reserved in front of the sketch.
@@ -278,7 +276,7 @@ class cpc_sketch_alloc {
278276
uint32_t num_coupons; // the number of coupons collected so far
279277

280278
u32_table<A> surprising_value_table;
281-
vector_u8<A> sliding_window;
279+
vector_bytes sliding_window;
282280
uint8_t window_offset; // derivable from num_coupons, but made explicit for speed
283281
uint8_t first_interesting_column; // This is part of a speed optimization
284282

@@ -287,7 +285,7 @@ class cpc_sketch_alloc {
287285

288286
// for deserialization and cpc_union::get_result()
289287
cpc_sketch_alloc(uint8_t lg_k, uint32_t num_coupons, uint8_t first_interesting_column, u32_table<A>&& table,
290-
vector_u8<A>&& window, bool has_hip, double kxp, double hip_est_accum, uint64_t seed);
288+
vector_bytes&& window, bool has_hip, double kxp, double hip_est_accum, uint64_t seed);
291289

292290
inline void row_col_update(uint32_t row_col);
293291
inline void update_sparse(uint32_t row_col);
@@ -310,7 +308,7 @@ class cpc_sketch_alloc {
310308
static inline uint8_t determine_correct_offset(uint8_t lg_k, uint64_t c);
311309

312310
// this produces a full-size k-by-64 bit matrix
313-
vector_u64<A> build_bit_matrix() const;
311+
vector_u64 build_bit_matrix() const;
314312

315313
static uint8_t get_preamble_ints(uint32_t num_coupons, bool has_hip, bool has_table, bool has_window);
316314
inline void write_hip(std::ostream& os) const;

0 commit comments

Comments
 (0)