[incubator-datasketches-cpp] 01/01: use vector to simplify and improve safety

alsay Thu, 07 May 2020 13:21:09 -0700

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch theta_exception_safety
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git


commit 39bf807c387801151dbafb12fcce1a94f73d0611
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Thu May 7 13:20:20 2020 -0700

    use vector to simplify and improve safety
---
 theta/include/theta_a_not_b_impl.hpp      |  38 ++---
 theta/include/theta_intersection.hpp      |   9 +-
 theta/include/theta_intersection_impl.hpp | 100 +++----------
 theta/include/theta_sketch.hpp            |  26 ++--
 theta/include/theta_sketch_impl.hpp       | 234 +++++++-----------------------
 theta/include/theta_union_impl.hpp        |  18 +--
 6 files changed, 98 insertions(+), 327 deletions(-)

diff --git a/theta/include/theta_a_not_b_impl.hpp 
b/theta/include/theta_a_not_b_impl.hpp
index 9a21920..cc171ce 100644
--- a/theta/include/theta_a_not_b_impl.hpp
+++ b/theta/include/theta_a_not_b_impl.hpp
@@ -43,34 +43,32 @@ compact_theta_sketch_alloc<A> 
theta_a_not_b_alloc<A>::compute(const theta_sketch
   if (a.get_num_retained() == 0 || b.is_empty()) return 
compact_theta_sketch_alloc<A>(a, ordered);
 
   const uint64_t theta = std::min(a.get_theta64(), b.get_theta64());
-  uint64_t* keys = nullptr;
+  vector_u64<A> keys;
   uint32_t keys_size = 0;
   uint32_t count = 0;
   bool is_empty = a.is_empty();
 
   if (b.get_num_retained() == 0) {
     for (auto key: a) if (key < theta) ++count;
-    keys_size = count;
-    keys = AllocU64().allocate(keys_size);
-    std::copy_if(a.begin(), a.end(), keys, [theta](uint64_t key) { return key 
< theta; });
-    if (ordered && !a.is_ordered()) std::sort(keys, &keys[keys_size]);
+    keys.resize(count);
+    std::copy_if(a.begin(), a.end(), &keys[0], [theta](uint64_t key) { return 
key < theta; });
+    if (ordered && !a.is_ordered()) std::sort(keys.begin(), keys.end());
     if (count == 0 && theta == theta_sketch_alloc<A>::MAX_THETA) is_empty = 
true;
-    return compact_theta_sketch_alloc<A>(is_empty, theta, keys, count, 
seed_hash_, a.is_ordered() || ordered);
+    return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), 
seed_hash_, a.is_ordered() || ordered);
   }
 
   keys_size = a.get_num_retained();
-  keys = AllocU64().allocate(keys_size);
+  keys.resize(keys_size);
 
   if (a.is_ordered() && b.is_ordered()) { // sort-based
-    const auto end = std::set_difference(a.begin(), a.end(), b.begin(), 
b.end(), keys);
-    count = end - keys;
+    const auto end = std::set_difference(a.begin(), a.end(), b.begin(), 
b.end(), keys.begin());
+    count = end - keys.begin();
   } else { // hash-based
     const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), 
update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
-    uint64_t* b_hash_table = AllocU64().allocate(1 << lg_size);
-    std::fill(b_hash_table, &b_hash_table[1 << lg_size], 0);
+    vector_u64<A> b_hash_table(1 << lg_size, 0);
     for (auto key: b) {
       if (key < theta) {
-        update_theta_sketch_alloc<A>::hash_search_or_insert(key, b_hash_table, 
lg_size);
+        update_theta_sketch_alloc<A>::hash_search_or_insert(key, 
b_hash_table.data(), lg_size);
       } else if (b.is_ordered()) {
         break; // early stop
       }
@@ -79,28 +77,22 @@ compact_theta_sketch_alloc<A> 
theta_a_not_b_alloc<A>::compute(const theta_sketch
     // scan A lookup B
     for (auto key: a) {
       if (key < theta) {
-        if (!update_theta_sketch_alloc<A>::hash_search(key, b_hash_table, 
lg_size)) keys[count++] = key;
+        if (!update_theta_sketch_alloc<A>::hash_search(key, 
b_hash_table.data(), lg_size)) keys[count++] = key;
       } else if (a.is_ordered()) {
         break; // early stop
       }
     }
-
-    AllocU64().deallocate(b_hash_table, 1 << lg_size);
   }
 
   if (count == 0) {
-    AllocU64().deallocate(keys, keys_size);
-    keys = nullptr;
+    keys.resize(0);
     if (theta == theta_sketch_alloc<A>::MAX_THETA) is_empty = true;
   } else if (count < keys_size) {
-    uint64_t* keys_copy = AllocU64().allocate(count);
-    std::copy(keys, &keys[count], keys_copy);
-    AllocU64().deallocate(keys, keys_size);
-    keys = keys_copy;
-    if (ordered && !a.is_ordered()) std::sort(keys, &keys[count]);
+    keys.resize(count);
+    if (ordered && !a.is_ordered()) std::sort(keys.begin(), keys.end());
   }
 
-  return compact_theta_sketch_alloc<A>(is_empty, theta, keys, count, 
seed_hash_, a.is_ordered() || ordered);
+  return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), 
seed_hash_, a.is_ordered() || ordered);
 }
 
 } /* namespace datasketches */
diff --git a/theta/include/theta_intersection.hpp 
b/theta/include/theta_intersection.hpp
index 51beb6e..f74ae5e 100644
--- a/theta/include/theta_intersection.hpp
+++ b/theta/include/theta_intersection.hpp
@@ -44,13 +44,6 @@ public:
    */
   explicit theta_intersection_alloc(uint64_t seed = DEFAULT_SEED);
 
-  theta_intersection_alloc(const theta_intersection_alloc<A>& other);
-  theta_intersection_alloc(theta_intersection_alloc<A>&& other) noexcept;
-  ~theta_intersection_alloc();
-
-  theta_intersection_alloc<A>& operator=(theta_intersection_alloc<A> other);
-  theta_intersection_alloc<A>& operator=(theta_intersection_alloc<A>&& other);
-
   /**
    * Updates the intersection with a given sketch.
    * The intersection can be viewed as starting from the "universe" set, and 
every update
@@ -80,7 +73,7 @@ private:
   bool is_empty_;
   uint64_t theta_;
   uint8_t lg_size_;
-  uint64_t* keys_;
+  vector_u64<A> keys_;
   uint32_t num_keys_;
   uint16_t seed_hash_;
 };
diff --git a/theta/include/theta_intersection_impl.hpp 
b/theta/include/theta_intersection_impl.hpp
index f6c66db..79fea4e 100644
--- a/theta/include/theta_intersection_impl.hpp
+++ b/theta/include/theta_intersection_impl.hpp
@@ -36,74 +36,12 @@ is_valid_(false),
 is_empty_(false),
 theta_(theta_sketch_alloc<A>::MAX_THETA),
 lg_size_(0),
-keys_(nullptr),
+keys_(),
 num_keys_(0),
 seed_hash_(theta_sketch_alloc<A>::get_seed_hash(seed))
 {}
 
 template<typename A>
-theta_intersection_alloc<A>::theta_intersection_alloc(const 
theta_intersection_alloc<A>& other):
-is_valid_(other.is_valid_),
-is_empty_(other.is_empty_),
-theta_(other.theta_),
-lg_size_(other.lg_size_),
-keys_(other.keys_ == nullptr ? nullptr : AllocU64().allocate(1 << lg_size_)),
-num_keys_(other.num_keys_),
-seed_hash_(other.seed_hash_)
-{
-  if (keys_ != nullptr) std::copy(other.keys_, &other.keys_[1 << lg_size_], 
keys_);
-}
-
-template<typename A>
-theta_intersection_alloc<A>::theta_intersection_alloc(theta_intersection_alloc<A>&&
 other) noexcept:
-is_valid_(false),
-is_empty_(false),
-theta_(theta_sketch_alloc<A>::MAX_THETA),
-lg_size_(0),
-keys_(nullptr),
-num_keys_(0),
-seed_hash_(other.seed_hash_)
-{
-  std::swap(is_valid_, other.is_valid_);
-  std::swap(is_empty_, other.is_empty_);
-  std::swap(theta_, other.theta_);
-  std::swap(lg_size_, other.lg_size_);
-  std::swap(keys_, other.keys_);
-  std::swap(num_keys_, other.num_keys_);
-}
-
-template<typename A>
-theta_intersection_alloc<A>::~theta_intersection_alloc() {
-  if (keys_ != nullptr) {
-    AllocU64().deallocate(keys_, 1 << lg_size_);
-  }
-}
-
-template<typename A>
-theta_intersection_alloc<A>& 
theta_intersection_alloc<A>::operator=(theta_intersection_alloc<A> other) {
-  std::swap(is_valid_, other.is_valid_);
-  std::swap(is_empty_, other.is_empty_);
-  std::swap(theta_, other.theta_);
-  std::swap(lg_size_, other.lg_size_);
-  std::swap(keys_, other.keys_);
-  std::swap(num_keys_, other.num_keys_);
-  std::swap(seed_hash_, other.seed_hash_);
-  return *this;
-}
-
-template<typename A>
-theta_intersection_alloc<A>& 
theta_intersection_alloc<A>::operator=(theta_intersection_alloc<A>&& other) {
-  std::swap(is_valid_, other.is_valid_);
-  std::swap(is_empty_, other.is_empty_);
-  std::swap(theta_, other.theta_);
-  std::swap(lg_size_, other.lg_size_);
-  std::swap(keys_, other.keys_);
-  std::swap(num_keys_, other.num_keys_);
-  std::swap(seed_hash_, other.seed_hash_);
-  return *this;
-}
-
-template<typename A>
 void theta_intersection_alloc<A>::update(const theta_sketch_alloc<A>& sketch) {
   if (is_empty_) return;
   if (sketch.get_seed_hash() != seed_hash_) throw std::invalid_argument("seed 
hash mismatch");
@@ -112,9 +50,8 @@ void theta_intersection_alloc<A>::update(const 
theta_sketch_alloc<A>& sketch) {
   if (is_valid_ && num_keys_ == 0) return;
   if (sketch.get_num_retained() == 0) {
     is_valid_ = true;
-    if (keys_ != nullptr) {
-      AllocU64().deallocate(keys_, 1 << lg_size_);
-      keys_ = nullptr;
+    if (keys_.size() > 0) {
+      keys_.resize(0);
       lg_size_ = 0;
       num_keys_ = 0;
     }
@@ -123,10 +60,9 @@ void theta_intersection_alloc<A>::update(const 
theta_sketch_alloc<A>& sketch) {
   if (!is_valid_) { // first update, clone incoming sketch
     is_valid_ = true;
     lg_size_ = lg_size_from_count(sketch.get_num_retained(), 
update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
-    keys_ = AllocU64().allocate(1 << lg_size_);
-    std::fill(keys_, &keys_[1 << lg_size_], 0);
+    keys_.resize(1 << lg_size_, 0);
     for (auto key: sketch) {
-      if (!update_theta_sketch_alloc<A>::hash_search_or_insert(key, keys_, 
lg_size_)) {
+      if (!update_theta_sketch_alloc<A>::hash_search_or_insert(key, 
keys_.data(), lg_size_)) {
         throw std::invalid_argument("duplicate key, possibly corrupted input 
sketch");
       }
       ++num_keys_;
@@ -134,12 +70,12 @@ void theta_intersection_alloc<A>::update(const 
theta_sketch_alloc<A>& sketch) {
     if (num_keys_ != sketch.get_num_retained()) throw 
std::invalid_argument("num keys mismatch, possibly corrupted input sketch");
   } else { // intersection
     const uint32_t max_matches = std::min(num_keys_, 
sketch.get_num_retained());
-    uint64_t* matched_keys = AllocU64().allocate(max_matches);
+    vector_u64<A> matched_keys(max_matches);
     uint32_t match_count = 0;
     uint32_t count = 0;
     for (auto key: sketch) {
       if (key < theta_) {
-        if (update_theta_sketch_alloc<A>::hash_search(key, keys_, lg_size_)) {
+        if (update_theta_sketch_alloc<A>::hash_search(key, keys_.data(), 
lg_size_)) {
           if (match_count == max_matches) throw std::invalid_argument("max 
matches exceeded, possibly corrupted input sketch");
           matched_keys[match_count++] = key;
         }
@@ -154,36 +90,34 @@ void theta_intersection_alloc<A>::update(const 
theta_sketch_alloc<A>& sketch) {
       throw std::invalid_argument(" fewer keys then expected, possibly 
corrupted input sketch");
     }
     if (match_count == 0) {
-      AllocU64().deallocate(keys_, 1 << lg_size_);
-      keys_ = nullptr;
+      keys_.resize(0);
       lg_size_ = 0;
       num_keys_ = 0;
       if (theta_ == theta_sketch_alloc<A>::MAX_THETA) is_empty_ = true;
     } else {
       const uint8_t lg_size = lg_size_from_count(match_count, 
update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
       if (lg_size != lg_size_) {
-        AllocU64().deallocate(keys_, 1 << lg_size_);
         lg_size_ = lg_size;
-        keys_ = AllocU64().allocate(1 << lg_size_);
+        keys_.resize(1 << lg_size_);
       }
-      std::fill(keys_, &keys_[1 << lg_size_], 0);
+      std::fill(&keys_[0], &keys_[1 << lg_size_], 0);
       for (uint32_t i = 0; i < match_count; i++) {
-        update_theta_sketch_alloc<A>::hash_search_or_insert(matched_keys[i], 
keys_, lg_size_);
+        update_theta_sketch_alloc<A>::hash_search_or_insert(matched_keys[i], 
keys_.data(), lg_size_);
       }
       num_keys_ = match_count;
     }
-    AllocU64().deallocate(matched_keys, max_matches);
   }
 }
 
 template<typename A>
 compact_theta_sketch_alloc<A> theta_intersection_alloc<A>::get_result(bool 
ordered) const {
   if (!is_valid_) throw std::invalid_argument("calling get_result() before 
calling update() is undefined");
-  if (num_keys_ == 0) return compact_theta_sketch_alloc<A>(is_empty_, theta_, 
nullptr, 0, seed_hash_, ordered);
-  uint64_t* keys = AllocU64().allocate(num_keys_);
-  std::copy_if(keys_, &keys_[1 << lg_size_], keys, [](uint64_t key) { return 
key != 0; });
-  if (ordered) std::sort(keys, &keys[num_keys_]);
-  return compact_theta_sketch_alloc<A>(false, this->theta_, keys, num_keys_, 
seed_hash_, ordered);
+  vector_u64<A> keys(num_keys_);
+  if (num_keys_ > 0) {
+    std::copy_if(&keys_[0], &keys_[1 << lg_size_], &keys[0], [](uint64_t key) 
{ return key != 0; });
+    if (ordered) std::sort(keys.begin(), keys.end());
+  }
+  return compact_theta_sketch_alloc<A>(is_empty_, theta_, std::move(keys), 
seed_hash_, ordered);
 }
 
 template<typename A>
diff --git a/theta/include/theta_sketch.hpp b/theta/include/theta_sketch.hpp
index 5e85131..b1a88ed 100644
--- a/theta/include/theta_sketch.hpp
+++ b/theta/include/theta_sketch.hpp
@@ -198,6 +198,9 @@ protected:
 
 // update sketch
 
+template<typename A> using AllocU64 = typename 
std::allocator_traits<A>::template rebind_alloc<uint64_t>;
+template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>;
+
 template<typename A>
 class update_theta_sketch_alloc: public theta_sketch_alloc<A> {
 public:
@@ -207,12 +210,7 @@ public:
 
   // No constructor here. Use builder instead.
 
-  update_theta_sketch_alloc(const update_theta_sketch_alloc<A>& other);
-  update_theta_sketch_alloc(update_theta_sketch_alloc<A>&& other) noexcept;
-  virtual ~update_theta_sketch_alloc();
-
-  update_theta_sketch_alloc<A>& operator=(const update_theta_sketch_alloc<A>& 
other);
-  update_theta_sketch_alloc<A>& operator=(update_theta_sketch_alloc<A>&& 
other);
+  virtual ~update_theta_sketch_alloc() = default;
 
   virtual uint32_t get_num_retained() const;
   virtual uint16_t get_seed_hash() const;
@@ -355,7 +353,7 @@ private:
 
   uint8_t lg_cur_size_;
   uint8_t lg_nom_size_;
-  uint64_t* keys_;
+  vector_u64<A> keys_;
   uint32_t num_keys_;
   resize_factor rf_;
   float p_;
@@ -367,7 +365,7 @@ private:
   // for builder
   update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, 
resize_factor rf, float p, uint64_t seed);
   // for deserialize
-  update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t 
lg_cur_size, uint8_t lg_nom_size, uint64_t* keys, uint32_t num_keys, 
resize_factor rf, float p, uint64_t seed);
+  update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t 
lg_cur_size, uint8_t lg_nom_size, vector_u64<A>&& keys, uint32_t num_keys, 
resize_factor rf, float p, uint64_t seed);
 
   void resize();
   void rebuild();
@@ -400,13 +398,8 @@ public:
   // - as a result of a set operation
   // - by deserializing a previously serialized compact sketch
 
-  compact_theta_sketch_alloc(const compact_theta_sketch_alloc<A>& other);
   compact_theta_sketch_alloc(const theta_sketch_alloc<A>& other, bool ordered);
-  compact_theta_sketch_alloc(compact_theta_sketch_alloc<A>&& other) noexcept;
-  virtual ~compact_theta_sketch_alloc();
-
-  compact_theta_sketch_alloc<A>& operator=(const 
compact_theta_sketch_alloc<A>& other);
-  compact_theta_sketch_alloc<A>& operator=(compact_theta_sketch_alloc<A>&& 
other);
+  virtual ~compact_theta_sketch_alloc() = default;
 
   virtual uint32_t get_num_retained() const;
   virtual uint16_t get_seed_hash() const;
@@ -440,8 +433,7 @@ public:
 private:
   typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> 
AllocU64;
 
-  uint64_t* keys_;
-  uint32_t num_keys_;
+  vector_u64<A> keys_;
   uint16_t seed_hash_;
   bool is_ordered_;
 
@@ -450,7 +442,7 @@ private:
   friend theta_union_alloc<A>;
   friend theta_intersection_alloc<A>;
   friend theta_a_not_b_alloc<A>;
-  compact_theta_sketch_alloc(bool is_empty, uint64_t theta, uint64_t* keys, 
uint32_t num_keys, uint16_t seed_hash, bool is_ordered);
+  compact_theta_sketch_alloc(bool is_empty, uint64_t theta, vector_u64<A>&& 
keys, uint16_t seed_hash, bool is_ordered);
   static compact_theta_sketch_alloc<A> internal_deserialize(std::istream& is, 
uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash);
   static compact_theta_sketch_alloc<A> internal_deserialize(const void* bytes, 
size_t size, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash);
 };
diff --git a/theta/include/theta_sketch_impl.hpp 
b/theta/include/theta_sketch_impl.hpp
index ac6b223..8abbaf8 100644
--- a/theta/include/theta_sketch_impl.hpp
+++ b/theta/include/theta_sketch_impl.hpp
@@ -237,7 +237,7 @@ 
update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uin
 theta_sketch_alloc<A>(true, theta_sketch_alloc<A>::MAX_THETA),
 lg_cur_size_(lg_cur_size),
 lg_nom_size_(lg_nom_size),
-keys_(AllocU64().allocate(1 << lg_cur_size_)),
+keys_(1 << lg_cur_size_, 0),
 num_keys_(0),
 rf_(rf),
 p_(p),
@@ -245,15 +245,14 @@ seed_(seed),
 capacity_(get_capacity(lg_cur_size, lg_nom_size))
 {
   if (p < 1) this->theta_ *= p;
-  std::fill(keys_, &keys_[1 << lg_cur_size_], 0);
 }
 
 template<typename A>
-update_theta_sketch_alloc<A>::update_theta_sketch_alloc(bool is_empty, 
uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, uint64_t* keys, 
uint32_t num_keys, resize_factor rf, float p, uint64_t seed):
+update_theta_sketch_alloc<A>::update_theta_sketch_alloc(bool is_empty, 
uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, vector_u64<A>&& keys, 
uint32_t num_keys, resize_factor rf, float p, uint64_t seed):
 theta_sketch_alloc<A>(is_empty, theta),
 lg_cur_size_(lg_cur_size),
 lg_nom_size_(lg_nom_size),
-keys_(keys),
+keys_(std::move(keys)),
 num_keys_(num_keys),
 rf_(rf),
 p_(p),
@@ -262,74 +261,6 @@ capacity_(get_capacity(lg_cur_size, lg_nom_size))
 {}
 
 template<typename A>
-update_theta_sketch_alloc<A>::update_theta_sketch_alloc(const 
update_theta_sketch_alloc<A>& other):
-theta_sketch_alloc<A>(other),
-lg_cur_size_(other.lg_cur_size_),
-lg_nom_size_(other.lg_nom_size_),
-keys_(AllocU64().allocate(1 << lg_cur_size_)),
-num_keys_(other.num_keys_),
-rf_(other.rf_),
-p_(other.p_),
-seed_(other.seed_),
-capacity_(other.capacity_)
-{
-  std::copy(other.keys_, &other.keys_[1 << lg_cur_size_], keys_);
-}
-
-template<typename A>
-update_theta_sketch_alloc<A>::update_theta_sketch_alloc(update_theta_sketch_alloc<A>&&
 other) noexcept:
-theta_sketch_alloc<A>(std::move(other)),
-lg_cur_size_(other.lg_cur_size_),
-lg_nom_size_(other.lg_nom_size_),
-keys_(nullptr),
-num_keys_(other.num_keys_),
-rf_(other.rf_),
-p_(other.p_),
-seed_(other.seed_),
-capacity_(other.capacity_)
-{
-  std::swap(keys_, other.keys_);
-}
-
-template<typename A>
-update_theta_sketch_alloc<A>::~update_theta_sketch_alloc() {
-  if (keys_ != nullptr)
-    AllocU64().deallocate(keys_, 1 << lg_cur_size_);
-}
-
-template<typename A>
-update_theta_sketch_alloc<A>& update_theta_sketch_alloc<A>::operator=(const 
update_theta_sketch_alloc<A>& other) {
-  theta_sketch_alloc<A>::operator=(other);
-  if (lg_cur_size_ != other.lg_cur_size_) {
-    AllocU64().deallocate(keys_, 1 << lg_cur_size_);
-    lg_cur_size_ = other.lg_cur_size_;
-    keys_ = AllocU64().allocate(1 << lg_cur_size_);
-  }
-  lg_nom_size_ = other.lg_nom_size_;
-  std::copy(other.keys_, &other.keys_[1 << lg_cur_size_], keys_);
-  num_keys_ = other.num_keys_;
-  rf_ = other.rf_;
-  p_ = other.p_;
-  seed_ = other.seed_;
-  capacity_ = other.capacity_;
-  return *this;
-}
-
-template<typename A>
-update_theta_sketch_alloc<A>& 
update_theta_sketch_alloc<A>::operator=(update_theta_sketch_alloc<A>&& other) {
-  theta_sketch_alloc<A>::operator=(std::move(other));
-  std::swap(lg_cur_size_, other.lg_cur_size_);
-  lg_nom_size_ = other.lg_nom_size_;
-  std::swap(keys_, other.keys_);
-  num_keys_ = other.num_keys_;
-  rf_ = other.rf_;
-  p_ = other.p_;
-  seed_ = other.seed_;
-  capacity_ = other.capacity_;
-  return *this;
-}
-
-template<typename A>
 uint32_t update_theta_sketch_alloc<A>::get_num_retained() const {
   return num_keys_;
 }
@@ -388,13 +319,13 @@ void 
update_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
   os.write((char*)&num_keys_, sizeof(num_keys_));
   os.write((char*)&p_, sizeof(p_));
   os.write((char*)&(this->theta_), sizeof(uint64_t));
-  os.write((char*)keys_, sizeof(uint64_t) * (1 << lg_cur_size_));
+  os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size());
 }
 
 template<typename A>
 vector_u8<A> update_theta_sketch_alloc<A>::serialize(unsigned 
header_size_bytes) const {
   const uint8_t preamble_longs = 3;
-  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + 
sizeof(uint64_t) * (1 << lg_cur_size_);
+  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + 
sizeof(uint64_t) * keys_.size();
   vector_u8<A> bytes(size);
   uint8_t* ptr = bytes.data() + header_size_bytes;
 
@@ -415,7 +346,7 @@ vector_u8<A> 
update_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes)
   ptr += copy_to_mem(&num_keys_, ptr, sizeof(num_keys_));
   ptr += copy_to_mem(&p_, ptr, sizeof(p_));
   ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
-  ptr += copy_to_mem(keys_, ptr, sizeof(uint64_t) * (1 << lg_cur_size_));
+  ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size());
 
   return bytes;
 }
@@ -452,10 +383,10 @@ update_theta_sketch_alloc<A> 
update_theta_sketch_alloc<A>::internal_deserialize(
   is.read((char*)&p, sizeof(p));
   uint64_t theta;
   is.read((char*)&theta, sizeof(theta));
-  uint64_t* keys = AllocU64().allocate(1 << lg_cur_size);
-  is.read((char*)keys, sizeof(uint64_t) * (1 << lg_cur_size));
+  vector_u64<A> keys(1 << lg_cur_size);
+  is.read((char*)keys.data(), sizeof(uint64_t) * keys.size());
   const bool is_empty = flags_byte & (1 << 
theta_sketch_alloc<A>::flags::IS_EMPTY);
-  return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, 
lg_nom_size, keys, num_keys, rf, p, seed);
+  return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, 
lg_nom_size, std::move(keys), num_keys, rf, p, seed);
 }
 
 template<typename A>
@@ -495,10 +426,10 @@ update_theta_sketch_alloc<A> 
update_theta_sketch_alloc<A>::internal_deserialize(
   ptr += copy_from_mem(ptr, &p, sizeof(p));
   uint64_t theta;
   ptr += copy_from_mem(ptr, &theta, sizeof(theta));
-  uint64_t* keys = AllocU64().allocate(table_size);
-  ptr += copy_from_mem(ptr, keys, sizeof(uint64_t) * table_size);
+  vector_u64<A> keys(table_size);
+  ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * table_size);
   const bool is_empty = flags_byte & (1 << 
theta_sketch_alloc<A>::flags::IS_EMPTY);
-  return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, 
lg_nom_size, keys, num_keys, rf, p, seed);
+  return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, 
lg_nom_size, std::move(keys), num_keys, rf, p, seed);
 }
 
 template<typename A>
@@ -586,7 +517,7 @@ template<typename A>
 void update_theta_sketch_alloc<A>::internal_update(uint64_t hash) {
   this->is_empty_ = false;
   if (hash >= this->theta_ || hash == 0) return; // hash == 0 is reserved to 
mark empty slots in the table
-  if (hash_search_or_insert(hash, keys_, lg_cur_size_)) {
+  if (hash_search_or_insert(hash, keys_.data(), lg_cur_size_)) {
     num_keys_++;
     if (num_keys_ > capacity_) {
       if (lg_cur_size_ <= lg_nom_size_) {
@@ -605,41 +536,35 @@ void update_theta_sketch_alloc<A>::trim() {
 
 template<typename A>
 void update_theta_sketch_alloc<A>::resize() {
-  const uint32_t cur_size = 1 << lg_cur_size_;
   const uint8_t lg_tgt_size = lg_nom_size_ + 1;
   const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), 
lg_tgt_size - lg_cur_size_));
   const uint8_t lg_new_size = lg_cur_size_ + factor;
   const uint32_t new_size = 1 << lg_new_size;
-  uint64_t* new_keys = AllocU64().allocate(new_size);
-  std::fill(new_keys, &new_keys[new_size], 0);
-  for (uint32_t i = 0; i < cur_size; i++) {
+  vector_u64<A> new_keys(new_size, 0);
+  for (uint32_t i = 0; i < keys_.size(); i++) {
     if (keys_[i] != 0) {
-      hash_search_or_insert(keys_[i], new_keys, lg_new_size); // TODO 
hash_insert
+      hash_search_or_insert(keys_[i], new_keys.data(), lg_new_size); // TODO 
hash_insert
     }
   }
-  AllocU64().deallocate(keys_, cur_size);
-  keys_ = new_keys;
+  keys_ = std::move(new_keys);
   lg_cur_size_ += factor;
   capacity_ = get_capacity(lg_cur_size_, lg_nom_size_);
 }
 
 template<typename A>
 void update_theta_sketch_alloc<A>::rebuild() {
-  const uint32_t cur_size = 1 << lg_cur_size_;
-  const uint32_t pivot = (1 << lg_nom_size_) + cur_size - num_keys_;
-  std::nth_element(&keys_[0], &keys_[pivot], &keys_[cur_size]);
+  const uint32_t pivot = (1 << lg_nom_size_) + keys_.size() - num_keys_;
+  std::nth_element(&keys_[0], &keys_[pivot], &keys_[keys_.size()]);
   this->theta_ = keys_[pivot];
-  uint64_t* new_keys = AllocU64().allocate(cur_size);
-  std::fill(new_keys, &new_keys[cur_size], 0);
+  vector_u64<A> new_keys(keys_.size(), 0);
   num_keys_ = 0;
-  for (uint32_t i = 0; i < cur_size; i++) {
+  for (uint32_t i = 0; i < keys_.size(); i++) {
     if (keys_[i] != 0 && keys_[i] < this->theta_) {
-      hash_search_or_insert(keys_[i], new_keys, lg_cur_size_); // TODO 
hash_insert
+      hash_search_or_insert(keys_[i], new_keys.data(), lg_cur_size_); // TODO 
hash_insert
       num_keys_++;
     }
   }
-  AllocU64().deallocate(keys_, cur_size);
-  keys_ = new_keys;
+  keys_ = std::move(new_keys);
 }
 
 template<typename A>
@@ -695,93 +620,38 @@ bool update_theta_sketch_alloc<A>::hash_search(uint64_t 
hash, const uint64_t* ta
 
 template<typename A>
 typename theta_sketch_alloc<A>::const_iterator 
update_theta_sketch_alloc<A>::begin() const {
-  return typename theta_sketch_alloc<A>::const_iterator(keys_, 1 << 
lg_cur_size_, 0);
+  return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), 
keys_.size(), 0);
 }
 
 template<typename A>
 typename theta_sketch_alloc<A>::const_iterator 
update_theta_sketch_alloc<A>::end() const {
-  return typename theta_sketch_alloc<A>::const_iterator(keys_, 1 << 
lg_cur_size_, 1 << lg_cur_size_);
+  return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), 
keys_.size(), keys_.size());
 }
 
 // compact sketch
 
 template<typename A>
-compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, 
uint64_t theta, uint64_t* keys, uint32_t num_keys, uint16_t seed_hash, bool 
is_ordered):
+compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, 
uint64_t theta, vector_u64<A>&& keys, uint16_t seed_hash, bool is_ordered):
 theta_sketch_alloc<A>(is_empty, theta),
-keys_(keys),
-num_keys_(num_keys),
+keys_(std::move(keys)),
 seed_hash_(seed_hash),
 is_ordered_(is_ordered)
 {}
 
 template<typename A>
-compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const 
compact_theta_sketch_alloc<A>& other):
-theta_sketch_alloc<A>(other),
-keys_(AllocU64().allocate(other.num_keys_)),
-num_keys_(other.num_keys_),
-seed_hash_(other.seed_hash_),
-is_ordered_(other.is_ordered_)
-{
-  std::copy(other.keys_, &other.keys_[num_keys_], keys_);
-}
-
-template<typename A>
 compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const 
theta_sketch_alloc<A>& other, bool ordered):
 theta_sketch_alloc<A>(other),
-keys_(AllocU64().allocate(other.get_num_retained())),
-num_keys_(other.get_num_retained()),
+keys_(other.get_num_retained()),
 seed_hash_(other.get_seed_hash()),
 is_ordered_(other.is_ordered() || ordered)
 {
-  std::copy(other.begin(), other.end(), keys_);
-  if (ordered && !other.is_ordered()) std::sort(keys_, &keys_[num_keys_]);
-}
-
-template<typename A>
-compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(compact_theta_sketch_alloc<A>&&
 other) noexcept:
-theta_sketch_alloc<A>(std::move(other)),
-keys_(nullptr),
-num_keys_(other.num_keys_),
-seed_hash_(other.seed_hash_),
-is_ordered_(other.is_ordered_)
-{
-  std::swap(keys_, other.keys_);
-}
-
-template<typename A>
-compact_theta_sketch_alloc<A>::~compact_theta_sketch_alloc() {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> 
AllocU64;
-  if (keys_ != nullptr)
-    AllocU64().deallocate(keys_, num_keys_);
-}
-
-template<typename A>
-compact_theta_sketch_alloc<A>& compact_theta_sketch_alloc<A>::operator=(const 
compact_theta_sketch_alloc<A>& other) {
-  theta_sketch_alloc<A>::operator=(other);
-  if (num_keys_ != other.num_keys_) {
-    AllocU64().deallocate(keys_, num_keys_);
-    num_keys_ = other.num_keys_;
-    keys_ = AllocU64().allocate(num_keys_);
-  }
-  seed_hash_ = other.seed_hash_;
-  is_ordered_ = other.is_ordered_;
-  std::copy(other.keys_, &other.keys_[num_keys_], keys_);
-  return *this;
-}
-
-template<typename A>
-compact_theta_sketch_alloc<A>& 
compact_theta_sketch_alloc<A>::operator=(compact_theta_sketch_alloc<A>&& other) 
{
-  theta_sketch_alloc<A>::operator=(std::move(other));
-  std::swap(keys_, other.keys_);
-  std::swap(num_keys_, other.num_keys_);
-  std::swap(seed_hash_, other.seed_hash_);
-  std::swap(is_ordered_, other.is_ordered_);
-  return *this;
+  std::copy(other.begin(), other.end(), keys_.begin());
+  if (ordered && !other.is_ordered()) std::sort(keys_.begin(), keys_.end());
 }
 
 template<typename A>
 uint32_t compact_theta_sketch_alloc<A>::get_num_retained() const {
-  return num_keys_;
+  return keys_.size();
 }
 
 template<typename A>
@@ -797,7 +667,7 @@ bool compact_theta_sketch_alloc<A>::is_ordered() const {
 template<typename A>
 void compact_theta_sketch_alloc<A>::to_stream(std::ostream& os, bool 
print_items) const {
   os << "### Compact Theta sketch summary:" << std::endl;
-  os << "   num retained keys    : " << num_keys_ << std::endl;
+  os << "   num retained keys    : " << keys_.size() << std::endl;
   os << "   seed hash            : " << this->get_seed_hash() << std::endl;
   os << "   empty?               : " << (this->is_empty() ? "true" : "false") 
<< std::endl;
   os << "   ordered?             : " << (this->is_ordered() ? "true" : 
"false") << std::endl;
@@ -817,7 +687,7 @@ void compact_theta_sketch_alloc<A>::to_stream(std::ostream& 
os, bool print_items
 
 template<typename A>
 void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
-  const bool is_single_item = num_keys_ == 1 && !this->is_estimation_mode();
+  const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode();
   const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : 
this->is_estimation_mode() ? 3 : 2;
   os.write((char*)&preamble_longs, sizeof(preamble_longs));
   const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
@@ -837,22 +707,23 @@ void 
compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
   os.write((char*)&seed_hash, sizeof(seed_hash));
   if (!this->is_empty()) {
     if (!is_single_item) {
-      os.write((char*)&num_keys_, sizeof(num_keys_));
+      const uint32_t num_keys = keys_.size();
+      os.write((char*)&num_keys, sizeof(num_keys));
       const uint32_t unused32 = 0;
       os.write((char*)&unused32, sizeof(unused32));
       if (this->is_estimation_mode()) {
         os.write((char*)&(this->theta_), sizeof(uint64_t));
       }
     }
-    os.write((char*)keys_, sizeof(uint64_t) * num_keys_);
+    os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size());
   }
 }
 
 template<typename A>
 vector_u8<A> compact_theta_sketch_alloc<A>::serialize(unsigned 
header_size_bytes) const {
-  const bool is_single_item = num_keys_ == 1 && !this->is_estimation_mode();
+  const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode();
   const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : 
this->is_estimation_mode() ? 3 : 2;
-  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + 
sizeof(uint64_t) * num_keys_;
+  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + 
sizeof(uint64_t) * keys_.size();
   vector_u8<A> bytes(size);
   uint8_t* ptr = bytes.data() + header_size_bytes;
 
@@ -874,14 +745,15 @@ vector_u8<A> 
compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes
   ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
   if (!this->is_empty()) {
     if (!is_single_item) {
-      ptr += copy_to_mem(&num_keys_, ptr, sizeof(num_keys_));
+      const uint32_t num_keys = keys_.size();
+      ptr += copy_to_mem(&num_keys, ptr, sizeof(num_keys));
       const uint32_t unused32 = 0;
       ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
       if (this->is_estimation_mode()) {
         ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
       }
     }
-    ptr += copy_to_mem(keys_, ptr, sizeof(uint64_t) * num_keys_);
+    ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size());
   }
 
   return bytes;
@@ -910,7 +782,6 @@ compact_theta_sketch_alloc<A> 
compact_theta_sketch_alloc<A>::deserialize(std::is
 template<typename A>
 compact_theta_sketch_alloc<A> 
compact_theta_sketch_alloc<A>::internal_deserialize(std::istream& is, uint8_t 
preamble_longs, uint8_t flags_byte, uint16_t seed_hash) {
   uint64_t theta = theta_sketch_alloc<A>::MAX_THETA;
-  uint64_t* keys = nullptr;
   uint32_t num_keys = 0;
 
   const bool is_empty = flags_byte & (1 << 
theta_sketch_alloc<A>::flags::IS_EMPTY);
@@ -925,13 +796,12 @@ compact_theta_sketch_alloc<A> 
compact_theta_sketch_alloc<A>::internal_deserializ
         is.read((char*)&theta, sizeof(theta));
       }
     }
-    typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> 
AllocU64;
-    keys = AllocU64().allocate(num_keys);
-    is.read((char*)keys, sizeof(uint64_t) * num_keys);
   }
+  vector_u64<A> keys(num_keys);
+  if (!is_empty) is.read((char*)keys.data(), sizeof(uint64_t) * keys.size());
 
   const bool is_ordered = flags_byte & (1 << 
theta_sketch_alloc<A>::flags::IS_ORDERED);
-  return compact_theta_sketch_alloc<A>(is_empty, theta, keys, num_keys, 
seed_hash, is_ordered);
+  return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), 
seed_hash, is_ordered);
 }
 
 template<typename A>
@@ -962,7 +832,6 @@ compact_theta_sketch_alloc<A> 
compact_theta_sketch_alloc<A>::internal_deserializ
   const char* base = ptr;
 
   uint64_t theta = theta_sketch_alloc<A>::MAX_THETA;
-  uint64_t* keys = nullptr;
   uint32_t num_keys = 0;
 
   const bool is_empty = flags_byte & (1 << 
theta_sketch_alloc<A>::flags::IS_EMPTY);
@@ -979,25 +848,24 @@ compact_theta_sketch_alloc<A> 
compact_theta_sketch_alloc<A>::internal_deserializ
         ptr += copy_from_mem(ptr, &theta, sizeof(theta));
       }
     }
-    const size_t keys_size_bytes = sizeof(uint64_t) * num_keys;
-    check_memory_size(ptr - base + keys_size_bytes, size);
-    typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> 
AllocU64;
-    keys = AllocU64().allocate(num_keys);
-    ptr += copy_from_mem(ptr, keys, keys_size_bytes);
   }
+  const size_t keys_size_bytes = sizeof(uint64_t) * num_keys;
+  check_memory_size(ptr - base + keys_size_bytes, size);
+  vector_u64<A> keys(num_keys);
+  if (!is_empty) ptr += copy_from_mem(ptr, keys.data(), keys_size_bytes);
 
   const bool is_ordered = flags_byte & (1 << 
theta_sketch_alloc<A>::flags::IS_ORDERED);
-  return compact_theta_sketch_alloc<A>(is_empty, theta, keys, num_keys, 
seed_hash, is_ordered);
+  return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), 
seed_hash, is_ordered);
 }
 
 template<typename A>
 typename theta_sketch_alloc<A>::const_iterator 
compact_theta_sketch_alloc<A>::begin() const {
-  return typename theta_sketch_alloc<A>::const_iterator(keys_, num_keys_, 0);
+  return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), 
keys_.size(), 0);
 }
 
 template<typename A>
 typename theta_sketch_alloc<A>::const_iterator 
compact_theta_sketch_alloc<A>::end() const {
-  return typename theta_sketch_alloc<A>::const_iterator(keys_, num_keys_, 
num_keys_);
+  return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), 
keys_.size(), keys_.size());
 }
 
 // builder
diff --git a/theta/include/theta_union_impl.hpp 
b/theta/include/theta_union_impl.hpp
index 07c46c2..5f4d7f9 100644
--- a/theta/include/theta_union_impl.hpp
+++ b/theta/include/theta_union_impl.hpp
@@ -55,29 +55,21 @@ compact_theta_sketch_alloc<A> 
theta_union_alloc<A>::get_result(bool ordered) con
   const uint32_t nom_num_keys = 1 << state_.lg_nom_size_;
   if (theta_ >= state_.theta_ && state_.get_num_retained() <= nom_num_keys) 
return state_.compact(ordered);
   uint64_t theta = std::min(theta_, state_.get_theta64());
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> 
AllocU64;
-  uint64_t* keys = AllocU64().allocate(state_.get_num_retained());
+  vector_u64<A> keys(state_.get_num_retained());
   uint32_t num_keys = 0;
   for (auto key: state_) {
     if (key < theta) keys[num_keys++] = key;
   }
-  if (num_keys == 0) {
-    AllocU64().deallocate(keys, state_.get_num_retained());
-    return compact_theta_sketch_alloc<A>(is_empty_, theta, nullptr, 0, 
state_.get_seed_hash(), ordered);
-  }
   if (num_keys > nom_num_keys) {
-    std::nth_element(keys, &keys[nom_num_keys], &keys[num_keys]);
+    std::nth_element(&keys[0], &keys[nom_num_keys], &keys[num_keys]);
     theta = keys[nom_num_keys];
     num_keys = nom_num_keys;
   }
   if (num_keys != state_.get_num_retained()) {
-    uint64_t* new_keys = AllocU64().allocate(num_keys);
-    std::copy(keys, &keys[num_keys], new_keys);
-    AllocU64().deallocate(keys, state_.get_num_retained());
-    keys = new_keys;
+    keys.resize(num_keys);
   }
-  if (ordered) std::sort(keys, &keys[num_keys]);
-  return compact_theta_sketch_alloc<A>(false, theta, keys, num_keys, 
state_.get_seed_hash(), ordered);
+  if (ordered) std::sort(keys.begin(), keys.end());
+  return compact_theta_sketch_alloc<A>(false, theta, std::move(keys), 
state_.get_seed_hash(), ordered);
 }
 
 // builder


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[incubator-datasketches-cpp] 01/01: use vector to simplify and improve safety

Reply via email to