This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch theta_exception_safety in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 39bf807c387801151dbafb12fcce1a94f73d0611 Author: AlexanderSaydakov <[email protected]> AuthorDate: Thu May 7 13:20:20 2020 -0700 use vector to simplify and improve safety --- theta/include/theta_a_not_b_impl.hpp | 38 ++--- theta/include/theta_intersection.hpp | 9 +- theta/include/theta_intersection_impl.hpp | 100 +++---------- theta/include/theta_sketch.hpp | 26 ++-- theta/include/theta_sketch_impl.hpp | 234 +++++++----------------------- theta/include/theta_union_impl.hpp | 18 +-- 6 files changed, 98 insertions(+), 327 deletions(-) diff --git a/theta/include/theta_a_not_b_impl.hpp b/theta/include/theta_a_not_b_impl.hpp index 9a21920..cc171ce 100644 --- a/theta/include/theta_a_not_b_impl.hpp +++ b/theta/include/theta_a_not_b_impl.hpp @@ -43,34 +43,32 @@ compact_theta_sketch_alloc<A> theta_a_not_b_alloc<A>::compute(const theta_sketch if (a.get_num_retained() == 0 || b.is_empty()) return compact_theta_sketch_alloc<A>(a, ordered); const uint64_t theta = std::min(a.get_theta64(), b.get_theta64()); - uint64_t* keys = nullptr; + vector_u64<A> keys; uint32_t keys_size = 0; uint32_t count = 0; bool is_empty = a.is_empty(); if (b.get_num_retained() == 0) { for (auto key: a) if (key < theta) ++count; - keys_size = count; - keys = AllocU64().allocate(keys_size); - std::copy_if(a.begin(), a.end(), keys, [theta](uint64_t key) { return key < theta; }); - if (ordered && !a.is_ordered()) std::sort(keys, &keys[keys_size]); + keys.resize(count); + std::copy_if(a.begin(), a.end(), &keys[0], [theta](uint64_t key) { return key < theta; }); + if (ordered && !a.is_ordered()) std::sort(keys.begin(), keys.end()); if (count == 0 && theta == theta_sketch_alloc<A>::MAX_THETA) is_empty = true; - return compact_theta_sketch_alloc<A>(is_empty, theta, keys, count, seed_hash_, a.is_ordered() || ordered); + return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash_, a.is_ordered() || ordered); } keys_size = a.get_num_retained(); - keys = AllocU64().allocate(keys_size); + keys.resize(keys_size); if (a.is_ordered() && b.is_ordered()) { // sort-based - const auto end = std::set_difference(a.begin(), a.end(), b.begin(), b.end(), keys); - count = end - keys; + const auto end = std::set_difference(a.begin(), a.end(), b.begin(), b.end(), keys.begin()); + count = end - keys.begin(); } else { // hash-based const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), update_theta_sketch_alloc<A>::REBUILD_THRESHOLD); - uint64_t* b_hash_table = AllocU64().allocate(1 << lg_size); - std::fill(b_hash_table, &b_hash_table[1 << lg_size], 0); + vector_u64<A> b_hash_table(1 << lg_size, 0); for (auto key: b) { if (key < theta) { - update_theta_sketch_alloc<A>::hash_search_or_insert(key, b_hash_table, lg_size); + update_theta_sketch_alloc<A>::hash_search_or_insert(key, b_hash_table.data(), lg_size); } else if (b.is_ordered()) { break; // early stop } @@ -79,28 +77,22 @@ compact_theta_sketch_alloc<A> theta_a_not_b_alloc<A>::compute(const theta_sketch // scan A lookup B for (auto key: a) { if (key < theta) { - if (!update_theta_sketch_alloc<A>::hash_search(key, b_hash_table, lg_size)) keys[count++] = key; + if (!update_theta_sketch_alloc<A>::hash_search(key, b_hash_table.data(), lg_size)) keys[count++] = key; } else if (a.is_ordered()) { break; // early stop } } - - AllocU64().deallocate(b_hash_table, 1 << lg_size); } if (count == 0) { - AllocU64().deallocate(keys, keys_size); - keys = nullptr; + keys.resize(0); if (theta == theta_sketch_alloc<A>::MAX_THETA) is_empty = true; } else if (count < keys_size) { - uint64_t* keys_copy = AllocU64().allocate(count); - std::copy(keys, &keys[count], keys_copy); - AllocU64().deallocate(keys, keys_size); - keys = keys_copy; - if (ordered && !a.is_ordered()) std::sort(keys, &keys[count]); + keys.resize(count); + if (ordered && !a.is_ordered()) std::sort(keys.begin(), keys.end()); } - return compact_theta_sketch_alloc<A>(is_empty, theta, keys, count, seed_hash_, a.is_ordered() || ordered); + return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash_, a.is_ordered() || ordered); } } /* namespace datasketches */ diff --git a/theta/include/theta_intersection.hpp b/theta/include/theta_intersection.hpp index 51beb6e..f74ae5e 100644 --- a/theta/include/theta_intersection.hpp +++ b/theta/include/theta_intersection.hpp @@ -44,13 +44,6 @@ public: */ explicit theta_intersection_alloc(uint64_t seed = DEFAULT_SEED); - theta_intersection_alloc(const theta_intersection_alloc<A>& other); - theta_intersection_alloc(theta_intersection_alloc<A>&& other) noexcept; - ~theta_intersection_alloc(); - - theta_intersection_alloc<A>& operator=(theta_intersection_alloc<A> other); - theta_intersection_alloc<A>& operator=(theta_intersection_alloc<A>&& other); - /** * Updates the intersection with a given sketch. * The intersection can be viewed as starting from the "universe" set, and every update @@ -80,7 +73,7 @@ private: bool is_empty_; uint64_t theta_; uint8_t lg_size_; - uint64_t* keys_; + vector_u64<A> keys_; uint32_t num_keys_; uint16_t seed_hash_; }; diff --git a/theta/include/theta_intersection_impl.hpp b/theta/include/theta_intersection_impl.hpp index f6c66db..79fea4e 100644 --- a/theta/include/theta_intersection_impl.hpp +++ b/theta/include/theta_intersection_impl.hpp @@ -36,74 +36,12 @@ is_valid_(false), is_empty_(false), theta_(theta_sketch_alloc<A>::MAX_THETA), lg_size_(0), -keys_(nullptr), +keys_(), num_keys_(0), seed_hash_(theta_sketch_alloc<A>::get_seed_hash(seed)) {} template<typename A> -theta_intersection_alloc<A>::theta_intersection_alloc(const theta_intersection_alloc<A>& other): -is_valid_(other.is_valid_), -is_empty_(other.is_empty_), -theta_(other.theta_), -lg_size_(other.lg_size_), -keys_(other.keys_ == nullptr ? nullptr : AllocU64().allocate(1 << lg_size_)), -num_keys_(other.num_keys_), -seed_hash_(other.seed_hash_) -{ - if (keys_ != nullptr) std::copy(other.keys_, &other.keys_[1 << lg_size_], keys_); -} - -template<typename A> -theta_intersection_alloc<A>::theta_intersection_alloc(theta_intersection_alloc<A>&& other) noexcept: -is_valid_(false), -is_empty_(false), -theta_(theta_sketch_alloc<A>::MAX_THETA), -lg_size_(0), -keys_(nullptr), -num_keys_(0), -seed_hash_(other.seed_hash_) -{ - std::swap(is_valid_, other.is_valid_); - std::swap(is_empty_, other.is_empty_); - std::swap(theta_, other.theta_); - std::swap(lg_size_, other.lg_size_); - std::swap(keys_, other.keys_); - std::swap(num_keys_, other.num_keys_); -} - -template<typename A> -theta_intersection_alloc<A>::~theta_intersection_alloc() { - if (keys_ != nullptr) { - AllocU64().deallocate(keys_, 1 << lg_size_); - } -} - -template<typename A> -theta_intersection_alloc<A>& theta_intersection_alloc<A>::operator=(theta_intersection_alloc<A> other) { - std::swap(is_valid_, other.is_valid_); - std::swap(is_empty_, other.is_empty_); - std::swap(theta_, other.theta_); - std::swap(lg_size_, other.lg_size_); - std::swap(keys_, other.keys_); - std::swap(num_keys_, other.num_keys_); - std::swap(seed_hash_, other.seed_hash_); - return *this; -} - -template<typename A> -theta_intersection_alloc<A>& theta_intersection_alloc<A>::operator=(theta_intersection_alloc<A>&& other) { - std::swap(is_valid_, other.is_valid_); - std::swap(is_empty_, other.is_empty_); - std::swap(theta_, other.theta_); - std::swap(lg_size_, other.lg_size_); - std::swap(keys_, other.keys_); - std::swap(num_keys_, other.num_keys_); - std::swap(seed_hash_, other.seed_hash_); - return *this; -} - -template<typename A> void theta_intersection_alloc<A>::update(const theta_sketch_alloc<A>& sketch) { if (is_empty_) return; if (sketch.get_seed_hash() != seed_hash_) throw std::invalid_argument("seed hash mismatch"); @@ -112,9 +50,8 @@ void theta_intersection_alloc<A>::update(const theta_sketch_alloc<A>& sketch) { if (is_valid_ && num_keys_ == 0) return; if (sketch.get_num_retained() == 0) { is_valid_ = true; - if (keys_ != nullptr) { - AllocU64().deallocate(keys_, 1 << lg_size_); - keys_ = nullptr; + if (keys_.size() > 0) { + keys_.resize(0); lg_size_ = 0; num_keys_ = 0; } @@ -123,10 +60,9 @@ void theta_intersection_alloc<A>::update(const theta_sketch_alloc<A>& sketch) { if (!is_valid_) { // first update, clone incoming sketch is_valid_ = true; lg_size_ = lg_size_from_count(sketch.get_num_retained(), update_theta_sketch_alloc<A>::REBUILD_THRESHOLD); - keys_ = AllocU64().allocate(1 << lg_size_); - std::fill(keys_, &keys_[1 << lg_size_], 0); + keys_.resize(1 << lg_size_, 0); for (auto key: sketch) { - if (!update_theta_sketch_alloc<A>::hash_search_or_insert(key, keys_, lg_size_)) { + if (!update_theta_sketch_alloc<A>::hash_search_or_insert(key, keys_.data(), lg_size_)) { throw std::invalid_argument("duplicate key, possibly corrupted input sketch"); } ++num_keys_; @@ -134,12 +70,12 @@ void theta_intersection_alloc<A>::update(const theta_sketch_alloc<A>& sketch) { if (num_keys_ != sketch.get_num_retained()) throw std::invalid_argument("num keys mismatch, possibly corrupted input sketch"); } else { // intersection const uint32_t max_matches = std::min(num_keys_, sketch.get_num_retained()); - uint64_t* matched_keys = AllocU64().allocate(max_matches); + vector_u64<A> matched_keys(max_matches); uint32_t match_count = 0; uint32_t count = 0; for (auto key: sketch) { if (key < theta_) { - if (update_theta_sketch_alloc<A>::hash_search(key, keys_, lg_size_)) { + if (update_theta_sketch_alloc<A>::hash_search(key, keys_.data(), lg_size_)) { if (match_count == max_matches) throw std::invalid_argument("max matches exceeded, possibly corrupted input sketch"); matched_keys[match_count++] = key; } @@ -154,36 +90,34 @@ void theta_intersection_alloc<A>::update(const theta_sketch_alloc<A>& sketch) { throw std::invalid_argument(" fewer keys then expected, possibly corrupted input sketch"); } if (match_count == 0) { - AllocU64().deallocate(keys_, 1 << lg_size_); - keys_ = nullptr; + keys_.resize(0); lg_size_ = 0; num_keys_ = 0; if (theta_ == theta_sketch_alloc<A>::MAX_THETA) is_empty_ = true; } else { const uint8_t lg_size = lg_size_from_count(match_count, update_theta_sketch_alloc<A>::REBUILD_THRESHOLD); if (lg_size != lg_size_) { - AllocU64().deallocate(keys_, 1 << lg_size_); lg_size_ = lg_size; - keys_ = AllocU64().allocate(1 << lg_size_); + keys_.resize(1 << lg_size_); } - std::fill(keys_, &keys_[1 << lg_size_], 0); + std::fill(&keys_[0], &keys_[1 << lg_size_], 0); for (uint32_t i = 0; i < match_count; i++) { - update_theta_sketch_alloc<A>::hash_search_or_insert(matched_keys[i], keys_, lg_size_); + update_theta_sketch_alloc<A>::hash_search_or_insert(matched_keys[i], keys_.data(), lg_size_); } num_keys_ = match_count; } - AllocU64().deallocate(matched_keys, max_matches); } } template<typename A> compact_theta_sketch_alloc<A> theta_intersection_alloc<A>::get_result(bool ordered) const { if (!is_valid_) throw std::invalid_argument("calling get_result() before calling update() is undefined"); - if (num_keys_ == 0) return compact_theta_sketch_alloc<A>(is_empty_, theta_, nullptr, 0, seed_hash_, ordered); - uint64_t* keys = AllocU64().allocate(num_keys_); - std::copy_if(keys_, &keys_[1 << lg_size_], keys, [](uint64_t key) { return key != 0; }); - if (ordered) std::sort(keys, &keys[num_keys_]); - return compact_theta_sketch_alloc<A>(false, this->theta_, keys, num_keys_, seed_hash_, ordered); + vector_u64<A> keys(num_keys_); + if (num_keys_ > 0) { + std::copy_if(&keys_[0], &keys_[1 << lg_size_], &keys[0], [](uint64_t key) { return key != 0; }); + if (ordered) std::sort(keys.begin(), keys.end()); + } + return compact_theta_sketch_alloc<A>(is_empty_, theta_, std::move(keys), seed_hash_, ordered); } template<typename A> diff --git a/theta/include/theta_sketch.hpp b/theta/include/theta_sketch.hpp index 5e85131..b1a88ed 100644 --- a/theta/include/theta_sketch.hpp +++ b/theta/include/theta_sketch.hpp @@ -198,6 +198,9 @@ protected: // update sketch +template<typename A> using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>; +template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>; + template<typename A> class update_theta_sketch_alloc: public theta_sketch_alloc<A> { public: @@ -207,12 +210,7 @@ public: // No constructor here. Use builder instead. - update_theta_sketch_alloc(const update_theta_sketch_alloc<A>& other); - update_theta_sketch_alloc(update_theta_sketch_alloc<A>&& other) noexcept; - virtual ~update_theta_sketch_alloc(); - - update_theta_sketch_alloc<A>& operator=(const update_theta_sketch_alloc<A>& other); - update_theta_sketch_alloc<A>& operator=(update_theta_sketch_alloc<A>&& other); + virtual ~update_theta_sketch_alloc() = default; virtual uint32_t get_num_retained() const; virtual uint16_t get_seed_hash() const; @@ -355,7 +353,7 @@ private: uint8_t lg_cur_size_; uint8_t lg_nom_size_; - uint64_t* keys_; + vector_u64<A> keys_; uint32_t num_keys_; resize_factor rf_; float p_; @@ -367,7 +365,7 @@ private: // for builder update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed); // for deserialize - update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, uint64_t* keys, uint32_t num_keys, resize_factor rf, float p, uint64_t seed); + update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, vector_u64<A>&& keys, uint32_t num_keys, resize_factor rf, float p, uint64_t seed); void resize(); void rebuild(); @@ -400,13 +398,8 @@ public: // - as a result of a set operation // - by deserializing a previously serialized compact sketch - compact_theta_sketch_alloc(const compact_theta_sketch_alloc<A>& other); compact_theta_sketch_alloc(const theta_sketch_alloc<A>& other, bool ordered); - compact_theta_sketch_alloc(compact_theta_sketch_alloc<A>&& other) noexcept; - virtual ~compact_theta_sketch_alloc(); - - compact_theta_sketch_alloc<A>& operator=(const compact_theta_sketch_alloc<A>& other); - compact_theta_sketch_alloc<A>& operator=(compact_theta_sketch_alloc<A>&& other); + virtual ~compact_theta_sketch_alloc() = default; virtual uint32_t get_num_retained() const; virtual uint16_t get_seed_hash() const; @@ -440,8 +433,7 @@ public: private: typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64; - uint64_t* keys_; - uint32_t num_keys_; + vector_u64<A> keys_; uint16_t seed_hash_; bool is_ordered_; @@ -450,7 +442,7 @@ private: friend theta_union_alloc<A>; friend theta_intersection_alloc<A>; friend theta_a_not_b_alloc<A>; - compact_theta_sketch_alloc(bool is_empty, uint64_t theta, uint64_t* keys, uint32_t num_keys, uint16_t seed_hash, bool is_ordered); + compact_theta_sketch_alloc(bool is_empty, uint64_t theta, vector_u64<A>&& keys, uint16_t seed_hash, bool is_ordered); static compact_theta_sketch_alloc<A> internal_deserialize(std::istream& is, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash); static compact_theta_sketch_alloc<A> internal_deserialize(const void* bytes, size_t size, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash); }; diff --git a/theta/include/theta_sketch_impl.hpp b/theta/include/theta_sketch_impl.hpp index ac6b223..8abbaf8 100644 --- a/theta/include/theta_sketch_impl.hpp +++ b/theta/include/theta_sketch_impl.hpp @@ -237,7 +237,7 @@ update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uin theta_sketch_alloc<A>(true, theta_sketch_alloc<A>::MAX_THETA), lg_cur_size_(lg_cur_size), lg_nom_size_(lg_nom_size), -keys_(AllocU64().allocate(1 << lg_cur_size_)), +keys_(1 << lg_cur_size_, 0), num_keys_(0), rf_(rf), p_(p), @@ -245,15 +245,14 @@ seed_(seed), capacity_(get_capacity(lg_cur_size, lg_nom_size)) { if (p < 1) this->theta_ *= p; - std::fill(keys_, &keys_[1 << lg_cur_size_], 0); } template<typename A> -update_theta_sketch_alloc<A>::update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, uint64_t* keys, uint32_t num_keys, resize_factor rf, float p, uint64_t seed): +update_theta_sketch_alloc<A>::update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, vector_u64<A>&& keys, uint32_t num_keys, resize_factor rf, float p, uint64_t seed): theta_sketch_alloc<A>(is_empty, theta), lg_cur_size_(lg_cur_size), lg_nom_size_(lg_nom_size), -keys_(keys), +keys_(std::move(keys)), num_keys_(num_keys), rf_(rf), p_(p), @@ -262,74 +261,6 @@ capacity_(get_capacity(lg_cur_size, lg_nom_size)) {} template<typename A> -update_theta_sketch_alloc<A>::update_theta_sketch_alloc(const update_theta_sketch_alloc<A>& other): -theta_sketch_alloc<A>(other), -lg_cur_size_(other.lg_cur_size_), -lg_nom_size_(other.lg_nom_size_), -keys_(AllocU64().allocate(1 << lg_cur_size_)), -num_keys_(other.num_keys_), -rf_(other.rf_), -p_(other.p_), -seed_(other.seed_), -capacity_(other.capacity_) -{ - std::copy(other.keys_, &other.keys_[1 << lg_cur_size_], keys_); -} - -template<typename A> -update_theta_sketch_alloc<A>::update_theta_sketch_alloc(update_theta_sketch_alloc<A>&& other) noexcept: -theta_sketch_alloc<A>(std::move(other)), -lg_cur_size_(other.lg_cur_size_), -lg_nom_size_(other.lg_nom_size_), -keys_(nullptr), -num_keys_(other.num_keys_), -rf_(other.rf_), -p_(other.p_), -seed_(other.seed_), -capacity_(other.capacity_) -{ - std::swap(keys_, other.keys_); -} - -template<typename A> -update_theta_sketch_alloc<A>::~update_theta_sketch_alloc() { - if (keys_ != nullptr) - AllocU64().deallocate(keys_, 1 << lg_cur_size_); -} - -template<typename A> -update_theta_sketch_alloc<A>& update_theta_sketch_alloc<A>::operator=(const update_theta_sketch_alloc<A>& other) { - theta_sketch_alloc<A>::operator=(other); - if (lg_cur_size_ != other.lg_cur_size_) { - AllocU64().deallocate(keys_, 1 << lg_cur_size_); - lg_cur_size_ = other.lg_cur_size_; - keys_ = AllocU64().allocate(1 << lg_cur_size_); - } - lg_nom_size_ = other.lg_nom_size_; - std::copy(other.keys_, &other.keys_[1 << lg_cur_size_], keys_); - num_keys_ = other.num_keys_; - rf_ = other.rf_; - p_ = other.p_; - seed_ = other.seed_; - capacity_ = other.capacity_; - return *this; -} - -template<typename A> -update_theta_sketch_alloc<A>& update_theta_sketch_alloc<A>::operator=(update_theta_sketch_alloc<A>&& other) { - theta_sketch_alloc<A>::operator=(std::move(other)); - std::swap(lg_cur_size_, other.lg_cur_size_); - lg_nom_size_ = other.lg_nom_size_; - std::swap(keys_, other.keys_); - num_keys_ = other.num_keys_; - rf_ = other.rf_; - p_ = other.p_; - seed_ = other.seed_; - capacity_ = other.capacity_; - return *this; -} - -template<typename A> uint32_t update_theta_sketch_alloc<A>::get_num_retained() const { return num_keys_; } @@ -388,13 +319,13 @@ void update_theta_sketch_alloc<A>::serialize(std::ostream& os) const { os.write((char*)&num_keys_, sizeof(num_keys_)); os.write((char*)&p_, sizeof(p_)); os.write((char*)&(this->theta_), sizeof(uint64_t)); - os.write((char*)keys_, sizeof(uint64_t) * (1 << lg_cur_size_)); + os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size()); } template<typename A> vector_u8<A> update_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const { const uint8_t preamble_longs = 3; - const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * (1 << lg_cur_size_); + const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * keys_.size(); vector_u8<A> bytes(size); uint8_t* ptr = bytes.data() + header_size_bytes; @@ -415,7 +346,7 @@ vector_u8<A> update_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) ptr += copy_to_mem(&num_keys_, ptr, sizeof(num_keys_)); ptr += copy_to_mem(&p_, ptr, sizeof(p_)); ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t)); - ptr += copy_to_mem(keys_, ptr, sizeof(uint64_t) * (1 << lg_cur_size_)); + ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size()); return bytes; } @@ -452,10 +383,10 @@ update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::internal_deserialize( is.read((char*)&p, sizeof(p)); uint64_t theta; is.read((char*)&theta, sizeof(theta)); - uint64_t* keys = AllocU64().allocate(1 << lg_cur_size); - is.read((char*)keys, sizeof(uint64_t) * (1 << lg_cur_size)); + vector_u64<A> keys(1 << lg_cur_size); + is.read((char*)keys.data(), sizeof(uint64_t) * keys.size()); const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY); - return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, keys, num_keys, rf, p, seed); + return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, std::move(keys), num_keys, rf, p, seed); } template<typename A> @@ -495,10 +426,10 @@ update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::internal_deserialize( ptr += copy_from_mem(ptr, &p, sizeof(p)); uint64_t theta; ptr += copy_from_mem(ptr, &theta, sizeof(theta)); - uint64_t* keys = AllocU64().allocate(table_size); - ptr += copy_from_mem(ptr, keys, sizeof(uint64_t) * table_size); + vector_u64<A> keys(table_size); + ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * table_size); const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY); - return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, keys, num_keys, rf, p, seed); + return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, std::move(keys), num_keys, rf, p, seed); } template<typename A> @@ -586,7 +517,7 @@ template<typename A> void update_theta_sketch_alloc<A>::internal_update(uint64_t hash) { this->is_empty_ = false; if (hash >= this->theta_ || hash == 0) return; // hash == 0 is reserved to mark empty slots in the table - if (hash_search_or_insert(hash, keys_, lg_cur_size_)) { + if (hash_search_or_insert(hash, keys_.data(), lg_cur_size_)) { num_keys_++; if (num_keys_ > capacity_) { if (lg_cur_size_ <= lg_nom_size_) { @@ -605,41 +536,35 @@ void update_theta_sketch_alloc<A>::trim() { template<typename A> void update_theta_sketch_alloc<A>::resize() { - const uint32_t cur_size = 1 << lg_cur_size_; const uint8_t lg_tgt_size = lg_nom_size_ + 1; const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), lg_tgt_size - lg_cur_size_)); const uint8_t lg_new_size = lg_cur_size_ + factor; const uint32_t new_size = 1 << lg_new_size; - uint64_t* new_keys = AllocU64().allocate(new_size); - std::fill(new_keys, &new_keys[new_size], 0); - for (uint32_t i = 0; i < cur_size; i++) { + vector_u64<A> new_keys(new_size, 0); + for (uint32_t i = 0; i < keys_.size(); i++) { if (keys_[i] != 0) { - hash_search_or_insert(keys_[i], new_keys, lg_new_size); // TODO hash_insert + hash_search_or_insert(keys_[i], new_keys.data(), lg_new_size); // TODO hash_insert } } - AllocU64().deallocate(keys_, cur_size); - keys_ = new_keys; + keys_ = std::move(new_keys); lg_cur_size_ += factor; capacity_ = get_capacity(lg_cur_size_, lg_nom_size_); } template<typename A> void update_theta_sketch_alloc<A>::rebuild() { - const uint32_t cur_size = 1 << lg_cur_size_; - const uint32_t pivot = (1 << lg_nom_size_) + cur_size - num_keys_; - std::nth_element(&keys_[0], &keys_[pivot], &keys_[cur_size]); + const uint32_t pivot = (1 << lg_nom_size_) + keys_.size() - num_keys_; + std::nth_element(&keys_[0], &keys_[pivot], &keys_[keys_.size()]); this->theta_ = keys_[pivot]; - uint64_t* new_keys = AllocU64().allocate(cur_size); - std::fill(new_keys, &new_keys[cur_size], 0); + vector_u64<A> new_keys(keys_.size(), 0); num_keys_ = 0; - for (uint32_t i = 0; i < cur_size; i++) { + for (uint32_t i = 0; i < keys_.size(); i++) { if (keys_[i] != 0 && keys_[i] < this->theta_) { - hash_search_or_insert(keys_[i], new_keys, lg_cur_size_); // TODO hash_insert + hash_search_or_insert(keys_[i], new_keys.data(), lg_cur_size_); // TODO hash_insert num_keys_++; } } - AllocU64().deallocate(keys_, cur_size); - keys_ = new_keys; + keys_ = std::move(new_keys); } template<typename A> @@ -695,93 +620,38 @@ bool update_theta_sketch_alloc<A>::hash_search(uint64_t hash, const uint64_t* ta template<typename A> typename theta_sketch_alloc<A>::const_iterator update_theta_sketch_alloc<A>::begin() const { - return typename theta_sketch_alloc<A>::const_iterator(keys_, 1 << lg_cur_size_, 0); + return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), 0); } template<typename A> typename theta_sketch_alloc<A>::const_iterator update_theta_sketch_alloc<A>::end() const { - return typename theta_sketch_alloc<A>::const_iterator(keys_, 1 << lg_cur_size_, 1 << lg_cur_size_); + return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), keys_.size()); } // compact sketch template<typename A> -compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, uint64_t theta, uint64_t* keys, uint32_t num_keys, uint16_t seed_hash, bool is_ordered): +compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, uint64_t theta, vector_u64<A>&& keys, uint16_t seed_hash, bool is_ordered): theta_sketch_alloc<A>(is_empty, theta), -keys_(keys), -num_keys_(num_keys), +keys_(std::move(keys)), seed_hash_(seed_hash), is_ordered_(is_ordered) {} template<typename A> -compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const compact_theta_sketch_alloc<A>& other): -theta_sketch_alloc<A>(other), -keys_(AllocU64().allocate(other.num_keys_)), -num_keys_(other.num_keys_), -seed_hash_(other.seed_hash_), -is_ordered_(other.is_ordered_) -{ - std::copy(other.keys_, &other.keys_[num_keys_], keys_); -} - -template<typename A> compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const theta_sketch_alloc<A>& other, bool ordered): theta_sketch_alloc<A>(other), -keys_(AllocU64().allocate(other.get_num_retained())), -num_keys_(other.get_num_retained()), +keys_(other.get_num_retained()), seed_hash_(other.get_seed_hash()), is_ordered_(other.is_ordered() || ordered) { - std::copy(other.begin(), other.end(), keys_); - if (ordered && !other.is_ordered()) std::sort(keys_, &keys_[num_keys_]); -} - -template<typename A> -compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(compact_theta_sketch_alloc<A>&& other) noexcept: -theta_sketch_alloc<A>(std::move(other)), -keys_(nullptr), -num_keys_(other.num_keys_), -seed_hash_(other.seed_hash_), -is_ordered_(other.is_ordered_) -{ - std::swap(keys_, other.keys_); -} - -template<typename A> -compact_theta_sketch_alloc<A>::~compact_theta_sketch_alloc() { - typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64; - if (keys_ != nullptr) - AllocU64().deallocate(keys_, num_keys_); -} - -template<typename A> -compact_theta_sketch_alloc<A>& compact_theta_sketch_alloc<A>::operator=(const compact_theta_sketch_alloc<A>& other) { - theta_sketch_alloc<A>::operator=(other); - if (num_keys_ != other.num_keys_) { - AllocU64().deallocate(keys_, num_keys_); - num_keys_ = other.num_keys_; - keys_ = AllocU64().allocate(num_keys_); - } - seed_hash_ = other.seed_hash_; - is_ordered_ = other.is_ordered_; - std::copy(other.keys_, &other.keys_[num_keys_], keys_); - return *this; -} - -template<typename A> -compact_theta_sketch_alloc<A>& compact_theta_sketch_alloc<A>::operator=(compact_theta_sketch_alloc<A>&& other) { - theta_sketch_alloc<A>::operator=(std::move(other)); - std::swap(keys_, other.keys_); - std::swap(num_keys_, other.num_keys_); - std::swap(seed_hash_, other.seed_hash_); - std::swap(is_ordered_, other.is_ordered_); - return *this; + std::copy(other.begin(), other.end(), keys_.begin()); + if (ordered && !other.is_ordered()) std::sort(keys_.begin(), keys_.end()); } template<typename A> uint32_t compact_theta_sketch_alloc<A>::get_num_retained() const { - return num_keys_; + return keys_.size(); } template<typename A> @@ -797,7 +667,7 @@ bool compact_theta_sketch_alloc<A>::is_ordered() const { template<typename A> void compact_theta_sketch_alloc<A>::to_stream(std::ostream& os, bool print_items) const { os << "### Compact Theta sketch summary:" << std::endl; - os << " num retained keys : " << num_keys_ << std::endl; + os << " num retained keys : " << keys_.size() << std::endl; os << " seed hash : " << this->get_seed_hash() << std::endl; os << " empty? : " << (this->is_empty() ? "true" : "false") << std::endl; os << " ordered? : " << (this->is_ordered() ? "true" : "false") << std::endl; @@ -817,7 +687,7 @@ void compact_theta_sketch_alloc<A>::to_stream(std::ostream& os, bool print_items template<typename A> void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const { - const bool is_single_item = num_keys_ == 1 && !this->is_estimation_mode(); + const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode(); const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2; os.write((char*)&preamble_longs, sizeof(preamble_longs)); const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION; @@ -837,22 +707,23 @@ void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const { os.write((char*)&seed_hash, sizeof(seed_hash)); if (!this->is_empty()) { if (!is_single_item) { - os.write((char*)&num_keys_, sizeof(num_keys_)); + const uint32_t num_keys = keys_.size(); + os.write((char*)&num_keys, sizeof(num_keys)); const uint32_t unused32 = 0; os.write((char*)&unused32, sizeof(unused32)); if (this->is_estimation_mode()) { os.write((char*)&(this->theta_), sizeof(uint64_t)); } } - os.write((char*)keys_, sizeof(uint64_t) * num_keys_); + os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size()); } } template<typename A> vector_u8<A> compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const { - const bool is_single_item = num_keys_ == 1 && !this->is_estimation_mode(); + const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode(); const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2; - const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * num_keys_; + const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * keys_.size(); vector_u8<A> bytes(size); uint8_t* ptr = bytes.data() + header_size_bytes; @@ -874,14 +745,15 @@ vector_u8<A> compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash)); if (!this->is_empty()) { if (!is_single_item) { - ptr += copy_to_mem(&num_keys_, ptr, sizeof(num_keys_)); + const uint32_t num_keys = keys_.size(); + ptr += copy_to_mem(&num_keys, ptr, sizeof(num_keys)); const uint32_t unused32 = 0; ptr += copy_to_mem(&unused32, ptr, sizeof(unused32)); if (this->is_estimation_mode()) { ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t)); } } - ptr += copy_to_mem(keys_, ptr, sizeof(uint64_t) * num_keys_); + ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size()); } return bytes; @@ -910,7 +782,6 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::is template<typename A> compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserialize(std::istream& is, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash) { uint64_t theta = theta_sketch_alloc<A>::MAX_THETA; - uint64_t* keys = nullptr; uint32_t num_keys = 0; const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY); @@ -925,13 +796,12 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserializ is.read((char*)&theta, sizeof(theta)); } } - typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64; - keys = AllocU64().allocate(num_keys); - is.read((char*)keys, sizeof(uint64_t) * num_keys); } + vector_u64<A> keys(num_keys); + if (!is_empty) is.read((char*)keys.data(), sizeof(uint64_t) * keys.size()); const bool is_ordered = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_ORDERED); - return compact_theta_sketch_alloc<A>(is_empty, theta, keys, num_keys, seed_hash, is_ordered); + return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash, is_ordered); } template<typename A> @@ -962,7 +832,6 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserializ const char* base = ptr; uint64_t theta = theta_sketch_alloc<A>::MAX_THETA; - uint64_t* keys = nullptr; uint32_t num_keys = 0; const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY); @@ -979,25 +848,24 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserializ ptr += copy_from_mem(ptr, &theta, sizeof(theta)); } } - const size_t keys_size_bytes = sizeof(uint64_t) * num_keys; - check_memory_size(ptr - base + keys_size_bytes, size); - typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64; - keys = AllocU64().allocate(num_keys); - ptr += copy_from_mem(ptr, keys, keys_size_bytes); } + const size_t keys_size_bytes = sizeof(uint64_t) * num_keys; + check_memory_size(ptr - base + keys_size_bytes, size); + vector_u64<A> keys(num_keys); + if (!is_empty) ptr += copy_from_mem(ptr, keys.data(), keys_size_bytes); const bool is_ordered = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_ORDERED); - return compact_theta_sketch_alloc<A>(is_empty, theta, keys, num_keys, seed_hash, is_ordered); + return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash, is_ordered); } template<typename A> typename theta_sketch_alloc<A>::const_iterator compact_theta_sketch_alloc<A>::begin() const { - return typename theta_sketch_alloc<A>::const_iterator(keys_, num_keys_, 0); + return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), 0); } template<typename A> typename theta_sketch_alloc<A>::const_iterator compact_theta_sketch_alloc<A>::end() const { - return typename theta_sketch_alloc<A>::const_iterator(keys_, num_keys_, num_keys_); + return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), keys_.size()); } // builder diff --git a/theta/include/theta_union_impl.hpp b/theta/include/theta_union_impl.hpp index 07c46c2..5f4d7f9 100644 --- a/theta/include/theta_union_impl.hpp +++ b/theta/include/theta_union_impl.hpp @@ -55,29 +55,21 @@ compact_theta_sketch_alloc<A> theta_union_alloc<A>::get_result(bool ordered) con const uint32_t nom_num_keys = 1 << state_.lg_nom_size_; if (theta_ >= state_.theta_ && state_.get_num_retained() <= nom_num_keys) return state_.compact(ordered); uint64_t theta = std::min(theta_, state_.get_theta64()); - typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64; - uint64_t* keys = AllocU64().allocate(state_.get_num_retained()); + vector_u64<A> keys(state_.get_num_retained()); uint32_t num_keys = 0; for (auto key: state_) { if (key < theta) keys[num_keys++] = key; } - if (num_keys == 0) { - AllocU64().deallocate(keys, state_.get_num_retained()); - return compact_theta_sketch_alloc<A>(is_empty_, theta, nullptr, 0, state_.get_seed_hash(), ordered); - } if (num_keys > nom_num_keys) { - std::nth_element(keys, &keys[nom_num_keys], &keys[num_keys]); + std::nth_element(&keys[0], &keys[nom_num_keys], &keys[num_keys]); theta = keys[nom_num_keys]; num_keys = nom_num_keys; } if (num_keys != state_.get_num_retained()) { - uint64_t* new_keys = AllocU64().allocate(num_keys); - std::copy(keys, &keys[num_keys], new_keys); - AllocU64().deallocate(keys, state_.get_num_retained()); - keys = new_keys; + keys.resize(num_keys); } - if (ordered) std::sort(keys, &keys[num_keys]); - return compact_theta_sketch_alloc<A>(false, theta, keys, num_keys, state_.get_seed_hash(), ordered); + if (ordered) std::sort(keys.begin(), keys.end()); + return compact_theta_sketch_alloc<A>(false, theta, std::move(keys), state_.get_seed_hash(), ordered); } // builder --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
