This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch cpc_allocator in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
commit c91bf6a92e73eaef02d99170020299b4af8acc0f Author: AlexanderSaydakov <[email protected]> AuthorDate: Tue Jan 19 12:44:47 2021 -0800 support allocator instance --- cpc/include/cpc_common.hpp | 3 +++ cpc/include/cpc_compressor.hpp | 4 ++-- cpc/include/cpc_compressor_impl.hpp | 47 ++++++++++++++++++++++--------------- cpc/include/cpc_sketch.hpp | 13 ++++++---- cpc/include/cpc_sketch_impl.hpp | 33 +++++++++++++++----------- cpc/include/cpc_union.hpp | 4 ++-- cpc/include/cpc_union_impl.hpp | 12 +++++----- cpc/include/u32_table.hpp | 6 ++--- cpc/include/u32_table_impl.hpp | 18 +++++++------- 9 files changed, 80 insertions(+), 60 deletions(-) diff --git a/cpc/include/cpc_common.hpp b/cpc/include/cpc_common.hpp index 9a766b8..cde110f 100644 --- a/cpc/include/cpc_common.hpp +++ b/cpc/include/cpc_common.hpp @@ -44,6 +44,8 @@ template<typename A> class u32_table; template<typename A> struct compressed_state { + explicit compressed_state(const A& allocator): table_data(allocator), table_data_words(0), table_num_entries(0), + window_data(allocator), window_data_words(0) {} vector_u32<A> table_data; uint32_t table_data_words; uint32_t table_num_entries; // can be different from the number of entries in the sketch in hybrid mode @@ -53,6 +55,7 @@ struct compressed_state { template<typename A> struct uncompressed_state { + explicit uncompressed_state(const A& allocator): table(allocator), window(allocator) {} u32_table<A> table; vector_u8<A> window; }; diff --git a/cpc/include/cpc_compressor.hpp b/cpc/include/cpc_compressor.hpp index 55fa3b8..73db797 100644 --- a/cpc/include/cpc_compressor.hpp +++ b/cpc/include/cpc_compressor.hpp @@ -129,14 +129,14 @@ private: void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const; void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const; - vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const; + vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k, const A& allocator) const; void uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const; static size_t safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits); static size_t safe_length_for_compressed_window_buf(uint64_t k); static uint8_t determine_pseudo_phase(uint8_t lg_k, uint64_t c); - static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space); + static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator); static inline uint64_t golomb_choose_number_of_base_bits(uint64_t k, uint64_t count); }; diff --git a/cpc/include/cpc_compressor_impl.hpp b/cpc/include/cpc_compressor_impl.hpp index b951b05..e3398c8 100644 --- a/cpc/include/cpc_compressor_impl.hpp +++ b/cpc/include/cpc_compressor_impl.hpp @@ -160,7 +160,7 @@ template<typename A> void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint64_t num_coupons) const { switch (cpc_sketch_alloc<A>::determine_flavor(lg_k, num_coupons)) { case cpc_sketch_alloc<A>::flavor::EMPTY: - target.table = u32_table<A>(2, 6 + lg_k); + target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator()); break; case cpc_sketch_alloc<A>::flavor::SPARSE: uncompress_sparse_flavor(source, target, lg_k); @@ -191,8 +191,9 @@ template<typename A> void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const { if (source.window_data.size() > 0) throw std::logic_error("unexpected sliding window"); if (source.table_data.size() == 0) throw std::logic_error("table is expected"); - vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, lg_k); - target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k); + vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, + lg_k, source.table_data.get_allocator()); + target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k, pairs.get_allocator()); } // This is complicated because it effectively builds a Sparse version @@ -206,7 +207,7 @@ void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source if (pairs_from_table.size() > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, pairs_from_table.size()); const size_t num_pairs_from_window = source.get_num_coupons() - pairs_from_table.size(); // because the window offset is zero - vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, pairs_from_table.size()); + vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, pairs_from_table.size(), source.get_allocator()); u32_table<A>::merge( pairs_from_table.data(), 0, pairs_from_table.size(), @@ -221,7 +222,8 @@ template<typename A> void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const { if (source.window_data.size() > 0) throw std::logic_error("window is not expected"); if (source.table_data.size() == 0) throw std::logic_error("table is expected"); - vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, lg_k); + vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, + lg_k, source.table_data.get_allocator()); // In the hybrid flavor, some of these pairs actually // belong in the window, so we will separate them out, @@ -240,7 +242,7 @@ void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& sour pairs[next_true_pair++] = row_col; // move true pair down } } - target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k); + target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k, pairs.get_allocator()); } template<typename A> @@ -264,21 +266,23 @@ void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source } template<typename A> -void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const { +void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, + uint8_t lg_k, uint32_t num_coupons) const { if (source.window_data.size() == 0) throw std::logic_error("window is expected"); uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons); const size_t num_pairs = source.table_num_entries; if (num_pairs == 0) { - target.table = u32_table<A>(2, 6 + lg_k); + target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator()); } else { if (source.table_data.size() == 0) throw std::logic_error("table is expected"); - vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, lg_k); + vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, + lg_k, source.table_data.get_allocator()); // undo the compressor's 8-column shift for (size_t i = 0; i < num_pairs; i++) { if ((pairs[i] & 63) >= 56) throw std::logic_error("(pairs[i] & 63) >= 56"); pairs[i] += 8; } - target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k); + target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator()); } } @@ -314,15 +318,17 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc } template<typename A> -void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const { +void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, + uint8_t lg_k, uint32_t num_coupons) const { if (source.window_data.size() == 0) throw std::logic_error("window is expected"); uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons); const size_t num_pairs = source.table_num_entries; if (num_pairs == 0) { - target.table = u32_table<A>(2, 6 + lg_k); + target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator()); } else { if (source.table_data.size() == 0) throw std::logic_error("table is expected"); - vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, lg_k); + vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, + lg_k, source.table_data.get_allocator()); const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons); if (pseudo_phase >= 16) throw std::logic_error("pseudo phase >= 16"); @@ -342,7 +348,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou pairs[i] = (row << 6) | col; } - target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k); + target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator()); } } @@ -364,9 +370,10 @@ void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, u } template<typename A> -vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const { +vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, + uint8_t lg_k, const A& allocator) const { const size_t k = 1 << lg_k; - vector_u32<A> pairs(num_pairs); + vector_u32<A> pairs(num_pairs, 0, allocator); const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs); low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words); return pairs; @@ -388,7 +395,8 @@ void cpc_compressor<A>::compress_sliding_window(const uint8_t* window, uint8_t l } template<typename A> -void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const { +void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, + uint8_t lg_k, uint32_t num_coupons) const { const size_t k = 1 << lg_k; window.resize(k); // zeroing not needed here (unlike the Hybrid Flavor) const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons); @@ -710,9 +718,10 @@ void write_unary( // The empty space that this leaves at the beginning of the output array // will be filled in later by the caller. template<typename A> -vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space) { +vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, + uint32_t empty_space, const A& allocator) { const size_t output_length = empty_space + num_pairs_to_get; - vector_u32<A> pairs(output_length); + vector_u32<A> pairs(output_length, 0, allocator); size_t pair_index = empty_space; for (unsigned row_index = 0; row_index < k; row_index++) { uint8_t byte = window[row_index]; diff --git a/cpc/include/cpc_sketch.hpp b/cpc/include/cpc_sketch.hpp index 9aba16f..a4bf8f6 100644 --- a/cpc/include/cpc_sketch.hpp +++ b/cpc/include/cpc_sketch.hpp @@ -49,7 +49,7 @@ template<typename A> class cpc_sketch_alloc; template<typename A> class cpc_union_alloc; // alias with default allocator for convenience -typedef cpc_sketch_alloc<std::allocator<void>> cpc_sketch; +using cpc_sketch = cpc_sketch_alloc<std::allocator<uint8_t>>; // allocation and initialization of global decompression (decoding) tables // call this before anything else if you want to control the initialization time @@ -67,7 +67,10 @@ public: * @param lg_k base 2 logarithm of the number of bins in the sketch * @param seed for hash function */ - explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED); + explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A()); + + using allocator_type = A; + A get_allocator() const; /** * @return configured lg_k of this sketch @@ -204,7 +207,7 @@ public: // This is a convenience alias for users // The type returned by the following serialize method - typedef vector_u8<A> vector_bytes; + using vector_bytes = vector_u8<A>; /** * This method serializes the sketch as a vector of bytes. @@ -221,7 +224,7 @@ public: * @param seed the seed for the hash function that was used to create the sketch * @return an instance of a sketch */ - static cpc_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED); + static cpc_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED, const A& allocator = A()); /** * This method deserializes a sketch from a given array of bytes. @@ -230,7 +233,7 @@ public: * @param seed the seed for the hash function that was used to create the sketch * @return an instance of the sketch */ - static cpc_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED); + static cpc_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, const A& allocator = A()); // for internal use uint32_t get_num_coupons() const; diff --git a/cpc/include/cpc_sketch_impl.hpp b/cpc/include/cpc_sketch_impl.hpp index e6bc010..a314de8 100644 --- a/cpc/include/cpc_sketch_impl.hpp +++ b/cpc/include/cpc_sketch_impl.hpp @@ -41,13 +41,13 @@ void cpc_init() { } template<typename A> -cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed): +cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed, const A& allocator): lg_k(lg_k), seed(seed), was_merged(false), num_coupons(0), -surprising_value_table(2, 6 + lg_k), -sliding_window(), +surprising_value_table(2, 6 + lg_k, allocator), +sliding_window(allocator), window_offset(0), first_interesting_column(0), kxp(1 << lg_k), @@ -59,6 +59,11 @@ hip_est_accum(0) } template<typename A> +A cpc_sketch_alloc<A>::get_allocator() const { + return sliding_window.get_allocator(); +} + +template<typename A> uint8_t cpc_sketch_alloc<A>::get_lg_k() const { return lg_k; } @@ -277,7 +282,7 @@ void cpc_sketch_alloc<A>::promote_sparse_to_windowed() { sliding_window.resize(k, 0); // zero the memory (because we will be OR'ing into it) - u32_table<A> new_table(2, 6 + lg_k); + u32_table<A> new_table(2, 6 + lg_k, sliding_window.get_allocator()); const uint32_t* old_slots = surprising_value_table.get_slots(); const size_t old_num_slots = 1 << surprising_value_table.get_lg_size(); @@ -401,7 +406,7 @@ string<A> cpc_sketch_alloc<A>::to_string() const { template<typename A> void cpc_sketch_alloc<A>::serialize(std::ostream& os) const { - compressed_state<A> compressed; + compressed_state<A> compressed(A(sliding_window.get_allocator())); compressed.table_data_words = 0; compressed.table_num_entries = 0; compressed.window_data_words = 0; @@ -454,7 +459,7 @@ void cpc_sketch_alloc<A>::serialize(std::ostream& os) const { template<typename A> vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const { - compressed_state<A> compressed; + compressed_state<A> compressed(sliding_window.get_allocator()); compressed.table_data_words = 0; compressed.table_num_entries = 0; compressed.window_data_words = 0; @@ -464,7 +469,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const { const bool has_window = compressed.window_data.size() > 0; const uint8_t preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window); const size_t size = header_size_bytes + (preamble_ints + compressed.table_data_words + compressed.window_data_words) * sizeof(uint32_t); - vector_u8<A> bytes(size); + vector_u8<A> bytes(size, 0, sliding_window.get_allocator()); uint8_t* ptr = bytes.data() + header_size_bytes; ptr += copy_to_mem(&preamble_ints, ptr, sizeof(preamble_ints)); const uint8_t serial_version = SERIAL_VERSION; @@ -511,7 +516,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const { } template<typename A> -cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) { +cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) { uint8_t preamble_ints; is.read((char*)&preamble_ints, sizeof(preamble_ints)); uint8_t serial_version; @@ -529,7 +534,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t const bool has_hip = flags_byte & (1 << flags::HAS_HIP); const bool has_table = flags_byte & (1 << flags::HAS_TABLE); const bool has_window = flags_byte & (1 << flags::HAS_WINDOW); - compressed_state<A> compressed; + compressed_state<A> compressed(allocator); compressed.table_data_words = 0; compressed.table_num_entries = 0; compressed.window_data_words = 0; @@ -583,7 +588,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", " + std::to_string(compute_seed_hash(seed))); } - uncompressed_state<A> uncompressed; + uncompressed_state<A> uncompressed(allocator); get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons); if (!is.good()) throw std::runtime_error("error reading from std::istream"); @@ -592,7 +597,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t } template<typename A> -cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) { +cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) { ensure_minimum_memory(size, 8); const char* ptr = static_cast<const char*>(bytes); const char* base = static_cast<const char*>(bytes); @@ -614,7 +619,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s const bool has_table = flags_byte & (1 << flags::HAS_TABLE); const bool has_window = flags_byte & (1 << flags::HAS_WINDOW); ensure_minimum_memory(size, preamble_ints << 2); - compressed_state<A> compressed; + compressed_state<A> compressed(allocator); compressed.table_data_words = 0; compressed.table_num_entries = 0; compressed.window_data_words = 0; @@ -677,7 +682,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", " + std::to_string(compute_seed_hash(seed))); } - uncompressed_state<A> uncompressed; + uncompressed_state<A> uncompressed(allocator); get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons); return cpc_sketch_alloc(lg_k, num_coupons, first_interesting_column, std::move(uncompressed.table), std::move(uncompressed.window), has_hip, kxp, hip_est_accum, seed); @@ -766,7 +771,7 @@ vector_u64<A> cpc_sketch_alloc<A>::build_bit_matrix() const { // Fill the matrix with default rows in which the "early zone" is filled with ones. // This is essential for the routine's O(k) time cost (as opposed to O(C)). const uint64_t default_row = (static_cast<uint64_t>(1) << window_offset) - 1; - vector_u64<A> matrix(k, default_row); + vector_u64<A> matrix(k, default_row, sliding_window.get_allocator()); if (num_coupons == 0) return matrix; diff --git a/cpc/include/cpc_union.hpp b/cpc/include/cpc_union.hpp index e56aa72..dd59abc 100644 --- a/cpc/include/cpc_union.hpp +++ b/cpc/include/cpc_union.hpp @@ -35,7 +35,7 @@ namespace datasketches { */ // alias with default allocator for convenience -typedef cpc_union_alloc<std::allocator<void>> cpc_union; +using cpc_union = cpc_union_alloc<std::allocator<uint8_t>>; template<typename A> class cpc_union_alloc { @@ -45,7 +45,7 @@ public: * @param lg_k base 2 logarithm of the number of bins in the sketch * @param seed for hash function */ - explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED); + explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A()); cpc_union_alloc(const cpc_union_alloc<A>& other); cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept; diff --git a/cpc/include/cpc_union_impl.hpp b/cpc/include/cpc_union_impl.hpp index 65d933c..5acfe5f 100644 --- a/cpc/include/cpc_union_impl.hpp +++ b/cpc/include/cpc_union_impl.hpp @@ -25,16 +25,16 @@ namespace datasketches { template<typename A> -cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed): +cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed, const A& allocator): lg_k(lg_k), seed(seed), accumulator(nullptr), -bit_matrix() +bit_matrix(allocator) { if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) { throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k)); } - accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed); + accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator); } template<typename A> @@ -200,13 +200,13 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const { const uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons); - vector_u8<A> sliding_window(k); + vector_u8<A> sliding_window(k, 0, bit_matrix.get_allocator()); // don't need to zero the window's memory // dynamically growing caused snowplow effect uint8_t table_lg_size = lg_k - 4; // K/16; in some cases this will end up being oversized if (table_lg_size < 2) table_lg_size = 2; - u32_table<A> table(table_lg_size, 6 + lg_k); + u32_table<A> table(table_lg_size, 6 + lg_k, bit_matrix.get_allocator()); // the following should work even when the offset is zero const uint64_t mask_for_clearing_window = (static_cast<uint64_t>(0xff) << offset) ^ UINT64_MAX; @@ -314,7 +314,7 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) { vector_u64<A> old_matrix = std::move(bit_matrix); const uint8_t old_lg_k = lg_k; const size_t new_k = 1 << new_lg_k; - bit_matrix = vector_u64<A>(new_k, 0); + bit_matrix = vector_u64<A>(new_k, 0, old_matrix.get_allocator()); lg_k = new_lg_k; or_matrix_into_matrix(old_matrix, old_lg_k); return; diff --git a/cpc/include/u32_table.hpp b/cpc/include/u32_table.hpp index 2316fc1..fe228a5 100644 --- a/cpc/include/u32_table.hpp +++ b/cpc/include/u32_table.hpp @@ -39,8 +39,8 @@ template<typename A> class u32_table { public: - u32_table(); - u32_table(uint8_t lg_size, uint8_t num_valid_bits); + u32_table(const A& allocator); + u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator); inline size_t get_num_items() const; inline const uint32_t* get_slots() const; @@ -52,7 +52,7 @@ public: // returns true iff the item was present and was therefore removed from the table inline bool maybe_delete(uint32_t item); - static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k); + static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator); vector_u32<A> unwrapping_get_items() const; diff --git a/cpc/include/u32_table_impl.hpp b/cpc/include/u32_table_impl.hpp index aa44ba2..bf8ece9 100644 --- a/cpc/include/u32_table_impl.hpp +++ b/cpc/include/u32_table_impl.hpp @@ -29,19 +29,19 @@ namespace datasketches { template<typename A> -u32_table<A>::u32_table(): +u32_table<A>::u32_table(const A& allocator): lg_size(0), num_valid_bits(0), num_items(0), -slots() +slots(allocator) {} template<typename A> -u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits): +u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator): lg_size(lg_size), num_valid_bits(num_valid_bits), num_items(0), -slots(1 << lg_size, UINT32_MAX) +slots(1 << lg_size, UINT32_MAX, allocator) { if (lg_size < 2) throw std::invalid_argument("lg_size must be >= 2"); if (num_valid_bits < 1 || num_valid_bits > 32) throw std::invalid_argument("num_valid_bits must be between 1 and 32"); @@ -110,10 +110,10 @@ bool u32_table<A>::maybe_delete(uint32_t item) { // this one is specifically tailored to be a part of fm85 decompression scheme template<typename A> -u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k) { +u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator) { uint8_t lg_num_slots = 2; while (U32_TABLE_UPSIZE_DENOM * num_pairs > U32_TABLE_UPSIZE_NUMER * (1 << lg_num_slots)) lg_num_slots++; - u32_table<A> table(lg_num_slots, 6 + lg_k); + u32_table<A> table(lg_num_slots, 6 + lg_k, allocator); // Note: there is a possible "snowplow effect" here because the caller is passing in a sorted pairs array // However, we are starting out with the correct final table size, so the problem might not occur for (size_t i = 0; i < num_pairs; i++) { @@ -152,7 +152,7 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) { const size_t new_size = 1 << new_lg_size; if (new_size <= num_items) throw std::logic_error("new_size <= num_items"); vector_u32<A> old_slots = std::move(slots); - slots = vector_u32<A>(new_size, UINT32_MAX); + slots = vector_u32<A>(new_size, UINT32_MAX, old_slots.get_allocator()); lg_size = new_lg_size; for (size_t i = 0; i < old_size; i++) { if (old_slots[i] != UINT32_MAX) { @@ -169,9 +169,9 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) { // The result is nearly sorted, so make sure to use an efficient sort for that case template<typename A> vector_u32<A> u32_table<A>::unwrapping_get_items() const { - if (num_items == 0) return vector_u32<A>(); + if (num_items == 0) return vector_u32<A>(slots.get_allocator()); const size_t table_size = 1 << lg_size; - vector_u32<A> result(num_items); + vector_u32<A> result(num_items, 0, slots.get_allocator()); size_t i = 0; size_t l = 0; size_t r = num_items - 1; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
