[incubator-datasketches-cpp] branch tuple_sketch updated: stateful allocator support

alsay Thu, 16 Jul 2020 16:40:14 -0700

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git



The following commit(s) were added to refs/heads/tuple_sketch by this push:
     new b9fadeb  stateful allocator support
b9fadeb is described below

commit b9fadeb45e1425cb133d5b23bde54467038ee33a
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Thu Jul 16 16:39:29 2020 -0700

    stateful allocator support
---
 tuple/include/theta_intersection_base.hpp        |  3 +-
 tuple/include/theta_intersection_base_impl.hpp   | 15 +++++-----
 tuple/include/theta_set_difference_base_impl.hpp |  2 +-
 tuple/include/theta_sketch_experimental.hpp      |  3 ++
 tuple/include/theta_sketch_experimental_impl.hpp | 10 +++++--
 tuple/include/tuple_intersection.hpp             |  2 +-
 tuple/include/tuple_intersection_impl.hpp        |  4 +--
 tuple/include/tuple_sketch.hpp                   | 12 ++++++--
 tuple/include/tuple_sketch_impl.hpp              | 35 ++++++++++++++++--------
 9 files changed, 58 insertions(+), 28 deletions(-)

diff --git a/tuple/include/theta_intersection_base.hpp 
b/tuple/include/theta_intersection_base.hpp
index e9a69ee..f6ca4d0 100644
--- a/tuple/include/theta_intersection_base.hpp
+++ b/tuple/include/theta_intersection_base.hpp
@@ -33,7 +33,7 @@ template<
 class theta_intersection_base {
 public:
   using comparator = compare_by_key<ExtractKey>;
-  theta_intersection_base(uint64_t seed, const Policy& policy);
+  theta_intersection_base(uint64_t seed, const Policy& policy, const 
Allocator& allocator);
   ~theta_intersection_base();
   void destroy_objects();
 
@@ -47,6 +47,7 @@ public:
   bool has_result() const;
 
 private:
+  Allocator allocator_;
   Policy policy_;
   bool is_valid_;
   bool is_empty_;
diff --git a/tuple/include/theta_intersection_base_impl.hpp 
b/tuple/include/theta_intersection_base_impl.hpp
index 790873e..e56abcb 100644
--- a/tuple/include/theta_intersection_base_impl.hpp
+++ b/tuple/include/theta_intersection_base_impl.hpp
@@ -26,7 +26,8 @@
 namespace datasketches {
 
 template<typename EN, typename EK, typename P, typename S, typename CS, 
typename A>
-theta_intersection_base<EN, EK, P, S, CS, A>::theta_intersection_base(uint64_t 
seed, const P& policy):
+theta_intersection_base<EN, EK, P, S, CS, A>::theta_intersection_base(uint64_t 
seed, const P& policy, const A& allocator):
+allocator_(allocator),
 policy_(policy),
 is_valid_(false),
 is_empty_(false),
@@ -40,7 +41,7 @@ entries_(nullptr)
 template<typename EN, typename EK, typename P, typename S, typename CS, 
typename A>
 theta_intersection_base<EN, EK, P, S, CS, A>::~theta_intersection_base() {
   destroy_objects();
-  if (entries_ != nullptr) A().deallocate(entries_, 1 << lg_size_);
+  if (entries_ != nullptr) allocator_.deallocate(entries_, 1 << lg_size_);
 }
 
 template<typename EN, typename EK, typename P, typename S, typename CS, 
typename A>
@@ -67,7 +68,7 @@ void theta_intersection_base<EN, EK, P, S, CS, 
A>::update(SS&& sketch) {
   if (sketch.get_num_retained() == 0) {
     is_valid_ = true;
     destroy_objects();
-    A().deallocate(entries_, 1 << lg_size_);
+    allocator_.deallocate(entries_, 1 << lg_size_);
     entries_ = nullptr;
     lg_size_ = 0;
     num_entries_ = 0;
@@ -77,7 +78,7 @@ void theta_intersection_base<EN, EK, P, S, CS, 
A>::update(SS&& sketch) {
     is_valid_ = true;
     lg_size_ = lg_size_from_count(sketch.get_num_retained(), 
theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
     const size_t size = 1 << lg_size_;
-    entries_ = A().allocate(size);
+    entries_ = allocator_.allocate(size);
     for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
     for (auto& entry: sketch) {
       auto result = theta_update_sketch_base<EN, EK, A>::find(entries_, 
lg_size_, EK()(entry));
@@ -115,7 +116,7 @@ void theta_intersection_base<EN, EK, P, S, CS, 
A>::update(SS&& sketch) {
     }
     destroy_objects();
     if (match_count == 0) {
-      A().deallocate(entries_, 1 << lg_size_);
+      allocator_.deallocate(entries_, 1 << lg_size_);
       entries_ = nullptr;
       lg_size_ = 0;
       num_entries_ = 0;
@@ -124,10 +125,10 @@ void theta_intersection_base<EN, EK, P, S, CS, 
A>::update(SS&& sketch) {
       const uint8_t lg_size = lg_size_from_count(match_count, 
theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
       const size_t size = 1 << lg_size;
       if (lg_size != lg_size_) {
-        A().deallocate(entries_, 1 << lg_size_);
+        allocator_.deallocate(entries_, 1 << lg_size_);
         entries_ = nullptr;
         lg_size_ = lg_size;
-        entries_ = A().allocate(size);
+        entries_ = allocator_.allocate(size);
         for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
       }
       for (uint32_t i = 0; i < match_count; i++) {
diff --git a/tuple/include/theta_set_difference_base_impl.hpp 
b/tuple/include/theta_set_difference_base_impl.hpp
index 6e4d553..e3c51bb 100644
--- a/tuple/include/theta_set_difference_base_impl.hpp
+++ b/tuple/include/theta_set_difference_base_impl.hpp
@@ -38,7 +38,7 @@ CS theta_set_difference_base<EN, EK, S, CS, A>::compute(SS&& 
a, const S& b, bool
   if (b.get_seed_hash() != seed_hash_) throw std::invalid_argument("B seed 
hash mismatch");
 
   const uint64_t theta = std::min(a.get_theta64(), b.get_theta64());
-  std::vector<EN, A> entries;
+  std::vector<EN, A> entries(allocator_);
   bool is_empty = a.is_empty();
 
   if (b.get_num_retained() == 0) {
diff --git a/tuple/include/theta_sketch_experimental.hpp 
b/tuple/include/theta_sketch_experimental.hpp
index 4a081d1..1e24560 100644
--- a/tuple/include/theta_sketch_experimental.hpp
+++ b/tuple/include/theta_sketch_experimental.hpp
@@ -44,6 +44,7 @@ public:
       A allocator_;
   };
 
+  A get_allocator() const { return table_.allocator_; };
   bool is_empty() const { return table_.is_empty_; }
   bool is_ordered() const { return false; }
   uint16_t get_seed_hash() const { return compute_seed_hash(DEFAULT_SEED); }
@@ -84,6 +85,8 @@ public:
 
   string<A> to_string(bool detail = false) const;
 
+  A get_allocator() const;
+
 private:
   bool is_empty_;
   bool is_ordered_;
diff --git a/tuple/include/theta_sketch_experimental_impl.hpp 
b/tuple/include/theta_sketch_experimental_impl.hpp
index 68d8c93..15fbb0e 100644
--- a/tuple/include/theta_sketch_experimental_impl.hpp
+++ b/tuple/include/theta_sketch_experimental_impl.hpp
@@ -128,7 +128,7 @@ is_empty_(other.is_empty()),
 is_ordered_(other.is_ordered()),
 seed_hash_(other.get_seed_hash()),
 theta_(other.get_theta64()),
-entries_()
+entries_(other.get_allocator())
 {
   entries_.reserve(other.get_num_retained());
   std::copy(other.begin(), other.end(), std::back_inserter(entries_));
@@ -136,7 +136,8 @@ entries_()
 }
 
 template<typename A>
-compact_theta_sketch_experimental<A>::compact_theta_sketch_experimental(bool 
is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, 
std::vector<uint64_t, A>&& entries):
+compact_theta_sketch_experimental<A>::compact_theta_sketch_experimental(bool 
is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
+    std::vector<uint64_t, A>&& entries):
 is_empty_(is_empty),
 is_ordered_(is_ordered),
 seed_hash_(seed_hash),
@@ -157,4 +158,9 @@ string<A> 
compact_theta_sketch_experimental<A>::to_string(bool detail) const {
   return os.str();
 }
 
+template<typename A>
+A compact_theta_sketch_experimental<A>::get_allocator() const {
+  return entries_.get_allocator();
+}
+
 } /* namespace datasketches */
diff --git a/tuple/include/tuple_intersection.hpp 
b/tuple/include/tuple_intersection.hpp
index 3d9bb0d..e03e288 100644
--- a/tuple/include/tuple_intersection.hpp
+++ b/tuple/include/tuple_intersection.hpp
@@ -66,7 +66,7 @@ public:
 
   using State = theta_intersection_base<Entry, ExtractKey, internal_policy, 
Sketch, CompactSketch, AllocEntry>;
 
-  explicit tuple_intersection(uint64_t seed = DEFAULT_SEED, const Policy& 
policy = Policy());
+  explicit tuple_intersection(uint64_t seed = DEFAULT_SEED, const Policy& 
policy = Policy(), const Allocator& allocator = Allocator());
 
   /**
    * Updates the intersection with a given sketch.
diff --git a/tuple/include/tuple_intersection_impl.hpp 
b/tuple/include/tuple_intersection_impl.hpp
index b5a6050..74791c5 100644
--- a/tuple/include/tuple_intersection_impl.hpp
+++ b/tuple/include/tuple_intersection_impl.hpp
@@ -20,8 +20,8 @@
 namespace datasketches {
 
 template<typename S, typename P, typename A>
-tuple_intersection<S, P, A>::tuple_intersection(uint64_t seed, const P& 
policy):
-state_(seed, internal_policy(policy))
+tuple_intersection<S, P, A>::tuple_intersection(uint64_t seed, const P& 
policy, const A& allocator):
+state_(seed, internal_policy(policy), allocator)
 {}
 
 template<typename S, typename P, typename A>
diff --git a/tuple/include/tuple_sketch.hpp b/tuple/include/tuple_sketch.hpp
index f7f228e..7ebc9e2 100644
--- a/tuple/include/tuple_sketch.hpp
+++ b/tuple/include/tuple_sketch.hpp
@@ -45,6 +45,8 @@ public:
 
   virtual ~tuple_sketch() = default;
 
+  virtual Allocator get_allocator() const = 0;
+
   /**
    * @return true if this sketch represents an empty set (not the same as no 
retained entries!)
    */
@@ -178,6 +180,7 @@ public:
 
   virtual ~update_tuple_sketch() = default;
 
+  virtual Allocator get_allocator() const;
   virtual bool is_empty() const;
   virtual bool is_ordered() const;
   virtual uint64_t get_theta64() const;
@@ -352,6 +355,7 @@ public:
   compact_tuple_sketch(const Base& other, bool ordered);
   virtual ~compact_tuple_sketch() = default;
 
+  virtual Allocator get_allocator() const;
   virtual bool is_empty() const;
   virtual bool is_ordered() const;
   virtual uint64_t get_theta64() const;
@@ -377,7 +381,8 @@ public:
    * @return an instance of a sketch
    */
   template<typename SerDe = serde<Summary>>
-  static compact_tuple_sketch deserialize(std::istream& is, uint64_t seed = 
DEFAULT_SEED, const SerDe& sd = SerDe());
+  static compact_tuple_sketch deserialize(std::istream& is, uint64_t seed = 
DEFAULT_SEED,
+      const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
 
   /**
    * This method deserializes a sketch from a given array of bytes.
@@ -388,9 +393,10 @@ public:
    * @return an instance of the sketch
    */
   template<typename SerDe = serde<Summary>>
-  static compact_tuple_sketch deserialize(const void* bytes, size_t size, 
uint64_t seed = DEFAULT_SEED, const SerDe& sd = SerDe());
+  static compact_tuple_sketch deserialize(const void* bytes, size_t size, 
uint64_t seed = DEFAULT_SEED,
+      const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
 
-  // TODO: try to hide this
+  // for internal use
   compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, 
uint64_t theta, std::vector<Entry, AllocEntry>&& entries);
 
 private:
diff --git a/tuple/include/tuple_sketch_impl.hpp 
b/tuple/include/tuple_sketch_impl.hpp
index a972d8a..aa68cd5 100644
--- a/tuple/include/tuple_sketch_impl.hpp
+++ b/tuple/include/tuple_sketch_impl.hpp
@@ -88,6 +88,11 @@ map_(lg_cur_size, lg_nom_size, rf, p, seed, allocator)
 {}
 
 template<typename S, typename U, typename P, typename A>
+A update_tuple_sketch<S, U, P, A>::get_allocator() const {
+  return map_.allocator_;
+}
+
+template<typename S, typename U, typename P, typename A>
 bool update_tuple_sketch<S, U, P, A>::is_empty() const {
   return map_.is_empty_;
 }
@@ -244,7 +249,8 @@ void update_tuple_sketch<S, U, P, 
A>::print_specifics(std::ostringstream& os) co
 // compact sketch
 
 template<typename S, typename A>
-compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool 
is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, 
AllocEntry>&& entries):
+compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool 
is_ordered, uint16_t seed_hash, uint64_t theta,
+    std::vector<Entry, AllocEntry>&& entries):
 is_empty_(is_empty),
 is_ordered_(is_ordered),
 seed_hash_(seed_hash),
@@ -258,7 +264,7 @@ is_empty_(other.is_empty()),
 is_ordered_(other.is_ordered() || ordered),
 seed_hash_(other.get_seed_hash()),
 theta_(other.get_theta64()),
-entries_()
+entries_(other.get_allocator())
 {
   entries_.reserve(other.get_num_retained());
   std::copy(other.begin(), other.end(), std::back_inserter(entries_));
@@ -266,6 +272,11 @@ entries_()
 }
 
 template<typename S, typename A>
+A compact_tuple_sketch<S, A>::get_allocator() const {
+  return entries_.get_allocator();
+}
+
+template<typename S, typename A>
 bool compact_tuple_sketch<S, A>::is_empty() const {
   return is_empty_;
 }
@@ -356,7 +367,7 @@ auto compact_tuple_sketch<S, A>::serialize(unsigned 
header_size_bytes, const Ser
   const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : 
this->is_estimation_mode() ? 3 : 2;
   const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
       + sizeof(uint64_t) * entries_.size() + 
get_serialized_size_summaries_bytes(sd);
-  vector_bytes bytes(size);
+  vector_bytes bytes(size, 0, entries_.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;
   const uint8_t* end_ptr = ptr + size;
 
@@ -398,7 +409,7 @@ auto compact_tuple_sketch<S, A>::serialize(unsigned 
header_size_bytes, const Ser
 
 template<typename S, typename A>
 template<typename SerDe>
-compact_tuple_sketch<S, A> compact_tuple_sketch<S, 
A>::deserialize(std::istream& is, uint64_t seed, const SerDe& sd) {
+compact_tuple_sketch<S, A> compact_tuple_sketch<S, 
A>::deserialize(std::istream& is, uint64_t seed, const SerDe& sd, const A& 
allocator) {
   uint8_t preamble_longs;
   is.read((char*)&preamble_longs, sizeof(preamble_longs));
   uint8_t serial_version;
@@ -430,12 +441,13 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, 
A>::deserialize(std::istream&
       }
     }
   }
-  std::vector<Entry, AllocEntry> entries;
+  std::vector<Entry, AllocEntry> entries(allocator);
   if (!is_empty) {
     entries.reserve(num_entries);
-    std::vector<uint64_t, AllocU64> keys(num_entries);
+    std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
     is.read((char*)keys.data(), num_entries * sizeof(uint64_t));
-    std::unique_ptr<S, deleter_of_summaries> 
summaries(A().allocate(num_entries), deleter_of_summaries(num_entries, false));
+    A alloc(allocator);
+    std::unique_ptr<S, deleter_of_summaries> 
summaries(alloc.allocate(num_entries), deleter_of_summaries(num_entries, 
false));
     sd.deserialize(is, summaries.get(), num_entries);
     summaries.get_deleter().set_destroy(true); // serde did not throw, so the 
items must be constructed
     for (size_t i = 0; i < num_entries; ++i) {
@@ -449,7 +461,7 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, 
A>::deserialize(std::istream&
 
 template<typename S, typename A>
 template<typename SerDe>
-compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* 
bytes, size_t size, uint64_t seed, const SerDe& sd) {
+compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* 
bytes, size_t size, uint64_t seed, const SerDe& sd, const A& allocator) {
   ensure_minimum_memory(size, 8);
   const char* ptr = static_cast<const char*>(bytes);
   const char* base = ptr;
@@ -489,12 +501,13 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, 
A>::deserialize(const void* b
   }
   const size_t keys_size_bytes = sizeof(uint64_t) * num_entries;
   ensure_minimum_memory(size, ptr - base + keys_size_bytes);
-  std::vector<Entry, AllocEntry> entries;
+  std::vector<Entry, AllocEntry> entries(allocator);
   if (!is_empty) {
     entries.reserve(num_entries);
-    std::vector<uint64_t, AllocU64> keys(num_entries);
+    std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
     ptr += copy_from_mem(ptr, keys.data(), keys_size_bytes);
-    std::unique_ptr<S, deleter_of_summaries> 
summaries(A().allocate(num_entries), deleter_of_summaries(num_entries, false));
+    A alloc(allocator);
+    std::unique_ptr<S, deleter_of_summaries> 
summaries(alloc.allocate(num_entries), deleter_of_summaries(num_entries, 
false));
     ptr += sd.deserialize(ptr, base + size - ptr, summaries.get(), 
num_entries);
     summaries.get_deleter().set_destroy(true); // serde did not throw, so the 
items must be constructed
     for (size_t i = 0; i < num_entries; ++i) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[incubator-datasketches-cpp] branch tuple_sketch updated: stateful allocator support

Reply via email to