This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
The following commit(s) were added to refs/heads/tuple_sketch by this push:
new b9fadeb stateful allocator support
b9fadeb is described below
commit b9fadeb45e1425cb133d5b23bde54467038ee33a
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Thu Jul 16 16:39:29 2020 -0700
stateful allocator support
---
tuple/include/theta_intersection_base.hpp | 3 +-
tuple/include/theta_intersection_base_impl.hpp | 15 +++++-----
tuple/include/theta_set_difference_base_impl.hpp | 2 +-
tuple/include/theta_sketch_experimental.hpp | 3 ++
tuple/include/theta_sketch_experimental_impl.hpp | 10 +++++--
tuple/include/tuple_intersection.hpp | 2 +-
tuple/include/tuple_intersection_impl.hpp | 4 +--
tuple/include/tuple_sketch.hpp | 12 ++++++--
tuple/include/tuple_sketch_impl.hpp | 35 ++++++++++++++++--------
9 files changed, 58 insertions(+), 28 deletions(-)
diff --git a/tuple/include/theta_intersection_base.hpp
b/tuple/include/theta_intersection_base.hpp
index e9a69ee..f6ca4d0 100644
--- a/tuple/include/theta_intersection_base.hpp
+++ b/tuple/include/theta_intersection_base.hpp
@@ -33,7 +33,7 @@ template<
class theta_intersection_base {
public:
using comparator = compare_by_key<ExtractKey>;
- theta_intersection_base(uint64_t seed, const Policy& policy);
+ theta_intersection_base(uint64_t seed, const Policy& policy, const
Allocator& allocator);
~theta_intersection_base();
void destroy_objects();
@@ -47,6 +47,7 @@ public:
bool has_result() const;
private:
+ Allocator allocator_;
Policy policy_;
bool is_valid_;
bool is_empty_;
diff --git a/tuple/include/theta_intersection_base_impl.hpp
b/tuple/include/theta_intersection_base_impl.hpp
index 790873e..e56abcb 100644
--- a/tuple/include/theta_intersection_base_impl.hpp
+++ b/tuple/include/theta_intersection_base_impl.hpp
@@ -26,7 +26,8 @@
namespace datasketches {
template<typename EN, typename EK, typename P, typename S, typename CS,
typename A>
-theta_intersection_base<EN, EK, P, S, CS, A>::theta_intersection_base(uint64_t
seed, const P& policy):
+theta_intersection_base<EN, EK, P, S, CS, A>::theta_intersection_base(uint64_t
seed, const P& policy, const A& allocator):
+allocator_(allocator),
policy_(policy),
is_valid_(false),
is_empty_(false),
@@ -40,7 +41,7 @@ entries_(nullptr)
template<typename EN, typename EK, typename P, typename S, typename CS,
typename A>
theta_intersection_base<EN, EK, P, S, CS, A>::~theta_intersection_base() {
destroy_objects();
- if (entries_ != nullptr) A().deallocate(entries_, 1 << lg_size_);
+ if (entries_ != nullptr) allocator_.deallocate(entries_, 1 << lg_size_);
}
template<typename EN, typename EK, typename P, typename S, typename CS,
typename A>
@@ -67,7 +68,7 @@ void theta_intersection_base<EN, EK, P, S, CS,
A>::update(SS&& sketch) {
if (sketch.get_num_retained() == 0) {
is_valid_ = true;
destroy_objects();
- A().deallocate(entries_, 1 << lg_size_);
+ allocator_.deallocate(entries_, 1 << lg_size_);
entries_ = nullptr;
lg_size_ = 0;
num_entries_ = 0;
@@ -77,7 +78,7 @@ void theta_intersection_base<EN, EK, P, S, CS,
A>::update(SS&& sketch) {
is_valid_ = true;
lg_size_ = lg_size_from_count(sketch.get_num_retained(),
theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
const size_t size = 1 << lg_size_;
- entries_ = A().allocate(size);
+ entries_ = allocator_.allocate(size);
for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
for (auto& entry: sketch) {
auto result = theta_update_sketch_base<EN, EK, A>::find(entries_,
lg_size_, EK()(entry));
@@ -115,7 +116,7 @@ void theta_intersection_base<EN, EK, P, S, CS,
A>::update(SS&& sketch) {
}
destroy_objects();
if (match_count == 0) {
- A().deallocate(entries_, 1 << lg_size_);
+ allocator_.deallocate(entries_, 1 << lg_size_);
entries_ = nullptr;
lg_size_ = 0;
num_entries_ = 0;
@@ -124,10 +125,10 @@ void theta_intersection_base<EN, EK, P, S, CS,
A>::update(SS&& sketch) {
const uint8_t lg_size = lg_size_from_count(match_count,
theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
const size_t size = 1 << lg_size;
if (lg_size != lg_size_) {
- A().deallocate(entries_, 1 << lg_size_);
+ allocator_.deallocate(entries_, 1 << lg_size_);
entries_ = nullptr;
lg_size_ = lg_size;
- entries_ = A().allocate(size);
+ entries_ = allocator_.allocate(size);
for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
}
for (uint32_t i = 0; i < match_count; i++) {
diff --git a/tuple/include/theta_set_difference_base_impl.hpp
b/tuple/include/theta_set_difference_base_impl.hpp
index 6e4d553..e3c51bb 100644
--- a/tuple/include/theta_set_difference_base_impl.hpp
+++ b/tuple/include/theta_set_difference_base_impl.hpp
@@ -38,7 +38,7 @@ CS theta_set_difference_base<EN, EK, S, CS, A>::compute(SS&&
a, const S& b, bool
if (b.get_seed_hash() != seed_hash_) throw std::invalid_argument("B seed
hash mismatch");
const uint64_t theta = std::min(a.get_theta64(), b.get_theta64());
- std::vector<EN, A> entries;
+ std::vector<EN, A> entries(allocator_);
bool is_empty = a.is_empty();
if (b.get_num_retained() == 0) {
diff --git a/tuple/include/theta_sketch_experimental.hpp
b/tuple/include/theta_sketch_experimental.hpp
index 4a081d1..1e24560 100644
--- a/tuple/include/theta_sketch_experimental.hpp
+++ b/tuple/include/theta_sketch_experimental.hpp
@@ -44,6 +44,7 @@ public:
A allocator_;
};
+ A get_allocator() const { return table_.allocator_; };
bool is_empty() const { return table_.is_empty_; }
bool is_ordered() const { return false; }
uint16_t get_seed_hash() const { return compute_seed_hash(DEFAULT_SEED); }
@@ -84,6 +85,8 @@ public:
string<A> to_string(bool detail = false) const;
+ A get_allocator() const;
+
private:
bool is_empty_;
bool is_ordered_;
diff --git a/tuple/include/theta_sketch_experimental_impl.hpp
b/tuple/include/theta_sketch_experimental_impl.hpp
index 68d8c93..15fbb0e 100644
--- a/tuple/include/theta_sketch_experimental_impl.hpp
+++ b/tuple/include/theta_sketch_experimental_impl.hpp
@@ -128,7 +128,7 @@ is_empty_(other.is_empty()),
is_ordered_(other.is_ordered()),
seed_hash_(other.get_seed_hash()),
theta_(other.get_theta64()),
-entries_()
+entries_(other.get_allocator())
{
entries_.reserve(other.get_num_retained());
std::copy(other.begin(), other.end(), std::back_inserter(entries_));
@@ -136,7 +136,8 @@ entries_()
}
template<typename A>
-compact_theta_sketch_experimental<A>::compact_theta_sketch_experimental(bool
is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
std::vector<uint64_t, A>&& entries):
+compact_theta_sketch_experimental<A>::compact_theta_sketch_experimental(bool
is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
+ std::vector<uint64_t, A>&& entries):
is_empty_(is_empty),
is_ordered_(is_ordered),
seed_hash_(seed_hash),
@@ -157,4 +158,9 @@ string<A>
compact_theta_sketch_experimental<A>::to_string(bool detail) const {
return os.str();
}
+template<typename A>
+A compact_theta_sketch_experimental<A>::get_allocator() const {
+ return entries_.get_allocator();
+}
+
} /* namespace datasketches */
diff --git a/tuple/include/tuple_intersection.hpp
b/tuple/include/tuple_intersection.hpp
index 3d9bb0d..e03e288 100644
--- a/tuple/include/tuple_intersection.hpp
+++ b/tuple/include/tuple_intersection.hpp
@@ -66,7 +66,7 @@ public:
using State = theta_intersection_base<Entry, ExtractKey, internal_policy,
Sketch, CompactSketch, AllocEntry>;
- explicit tuple_intersection(uint64_t seed = DEFAULT_SEED, const Policy&
policy = Policy());
+ explicit tuple_intersection(uint64_t seed = DEFAULT_SEED, const Policy&
policy = Policy(), const Allocator& allocator = Allocator());
/**
* Updates the intersection with a given sketch.
diff --git a/tuple/include/tuple_intersection_impl.hpp
b/tuple/include/tuple_intersection_impl.hpp
index b5a6050..74791c5 100644
--- a/tuple/include/tuple_intersection_impl.hpp
+++ b/tuple/include/tuple_intersection_impl.hpp
@@ -20,8 +20,8 @@
namespace datasketches {
template<typename S, typename P, typename A>
-tuple_intersection<S, P, A>::tuple_intersection(uint64_t seed, const P&
policy):
-state_(seed, internal_policy(policy))
+tuple_intersection<S, P, A>::tuple_intersection(uint64_t seed, const P&
policy, const A& allocator):
+state_(seed, internal_policy(policy), allocator)
{}
template<typename S, typename P, typename A>
diff --git a/tuple/include/tuple_sketch.hpp b/tuple/include/tuple_sketch.hpp
index f7f228e..7ebc9e2 100644
--- a/tuple/include/tuple_sketch.hpp
+++ b/tuple/include/tuple_sketch.hpp
@@ -45,6 +45,8 @@ public:
virtual ~tuple_sketch() = default;
+ virtual Allocator get_allocator() const = 0;
+
/**
* @return true if this sketch represents an empty set (not the same as no
retained entries!)
*/
@@ -178,6 +180,7 @@ public:
virtual ~update_tuple_sketch() = default;
+ virtual Allocator get_allocator() const;
virtual bool is_empty() const;
virtual bool is_ordered() const;
virtual uint64_t get_theta64() const;
@@ -352,6 +355,7 @@ public:
compact_tuple_sketch(const Base& other, bool ordered);
virtual ~compact_tuple_sketch() = default;
+ virtual Allocator get_allocator() const;
virtual bool is_empty() const;
virtual bool is_ordered() const;
virtual uint64_t get_theta64() const;
@@ -377,7 +381,8 @@ public:
* @return an instance of a sketch
*/
template<typename SerDe = serde<Summary>>
- static compact_tuple_sketch deserialize(std::istream& is, uint64_t seed =
DEFAULT_SEED, const SerDe& sd = SerDe());
+ static compact_tuple_sketch deserialize(std::istream& is, uint64_t seed =
DEFAULT_SEED,
+ const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
/**
* This method deserializes a sketch from a given array of bytes.
@@ -388,9 +393,10 @@ public:
* @return an instance of the sketch
*/
template<typename SerDe = serde<Summary>>
- static compact_tuple_sketch deserialize(const void* bytes, size_t size,
uint64_t seed = DEFAULT_SEED, const SerDe& sd = SerDe());
+ static compact_tuple_sketch deserialize(const void* bytes, size_t size,
uint64_t seed = DEFAULT_SEED,
+ const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
- // TODO: try to hide this
+ // for internal use
compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash,
uint64_t theta, std::vector<Entry, AllocEntry>&& entries);
private:
diff --git a/tuple/include/tuple_sketch_impl.hpp
b/tuple/include/tuple_sketch_impl.hpp
index a972d8a..aa68cd5 100644
--- a/tuple/include/tuple_sketch_impl.hpp
+++ b/tuple/include/tuple_sketch_impl.hpp
@@ -88,6 +88,11 @@ map_(lg_cur_size, lg_nom_size, rf, p, seed, allocator)
{}
template<typename S, typename U, typename P, typename A>
+A update_tuple_sketch<S, U, P, A>::get_allocator() const {
+ return map_.allocator_;
+}
+
+template<typename S, typename U, typename P, typename A>
bool update_tuple_sketch<S, U, P, A>::is_empty() const {
return map_.is_empty_;
}
@@ -244,7 +249,8 @@ void update_tuple_sketch<S, U, P,
A>::print_specifics(std::ostringstream& os) co
// compact sketch
template<typename S, typename A>
-compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool
is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry,
AllocEntry>&& entries):
+compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool
is_ordered, uint16_t seed_hash, uint64_t theta,
+ std::vector<Entry, AllocEntry>&& entries):
is_empty_(is_empty),
is_ordered_(is_ordered),
seed_hash_(seed_hash),
@@ -258,7 +264,7 @@ is_empty_(other.is_empty()),
is_ordered_(other.is_ordered() || ordered),
seed_hash_(other.get_seed_hash()),
theta_(other.get_theta64()),
-entries_()
+entries_(other.get_allocator())
{
entries_.reserve(other.get_num_retained());
std::copy(other.begin(), other.end(), std::back_inserter(entries_));
@@ -266,6 +272,11 @@ entries_()
}
template<typename S, typename A>
+A compact_tuple_sketch<S, A>::get_allocator() const {
+ return entries_.get_allocator();
+}
+
+template<typename S, typename A>
bool compact_tuple_sketch<S, A>::is_empty() const {
return is_empty_;
}
@@ -356,7 +367,7 @@ auto compact_tuple_sketch<S, A>::serialize(unsigned
header_size_bytes, const Ser
const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 :
this->is_estimation_mode() ? 3 : 2;
const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
+ sizeof(uint64_t) * entries_.size() +
get_serialized_size_summaries_bytes(sd);
- vector_bytes bytes(size);
+ vector_bytes bytes(size, 0, entries_.get_allocator());
uint8_t* ptr = bytes.data() + header_size_bytes;
const uint8_t* end_ptr = ptr + size;
@@ -398,7 +409,7 @@ auto compact_tuple_sketch<S, A>::serialize(unsigned
header_size_bytes, const Ser
template<typename S, typename A>
template<typename SerDe>
-compact_tuple_sketch<S, A> compact_tuple_sketch<S,
A>::deserialize(std::istream& is, uint64_t seed, const SerDe& sd) {
+compact_tuple_sketch<S, A> compact_tuple_sketch<S,
A>::deserialize(std::istream& is, uint64_t seed, const SerDe& sd, const A&
allocator) {
uint8_t preamble_longs;
is.read((char*)&preamble_longs, sizeof(preamble_longs));
uint8_t serial_version;
@@ -430,12 +441,13 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S,
A>::deserialize(std::istream&
}
}
}
- std::vector<Entry, AllocEntry> entries;
+ std::vector<Entry, AllocEntry> entries(allocator);
if (!is_empty) {
entries.reserve(num_entries);
- std::vector<uint64_t, AllocU64> keys(num_entries);
+ std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
is.read((char*)keys.data(), num_entries * sizeof(uint64_t));
- std::unique_ptr<S, deleter_of_summaries>
summaries(A().allocate(num_entries), deleter_of_summaries(num_entries, false));
+ A alloc(allocator);
+ std::unique_ptr<S, deleter_of_summaries>
summaries(alloc.allocate(num_entries), deleter_of_summaries(num_entries,
false));
sd.deserialize(is, summaries.get(), num_entries);
summaries.get_deleter().set_destroy(true); // serde did not throw, so the
items must be constructed
for (size_t i = 0; i < num_entries; ++i) {
@@ -449,7 +461,7 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S,
A>::deserialize(std::istream&
template<typename S, typename A>
template<typename SerDe>
-compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void*
bytes, size_t size, uint64_t seed, const SerDe& sd) {
+compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void*
bytes, size_t size, uint64_t seed, const SerDe& sd, const A& allocator) {
ensure_minimum_memory(size, 8);
const char* ptr = static_cast<const char*>(bytes);
const char* base = ptr;
@@ -489,12 +501,13 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S,
A>::deserialize(const void* b
}
const size_t keys_size_bytes = sizeof(uint64_t) * num_entries;
ensure_minimum_memory(size, ptr - base + keys_size_bytes);
- std::vector<Entry, AllocEntry> entries;
+ std::vector<Entry, AllocEntry> entries(allocator);
if (!is_empty) {
entries.reserve(num_entries);
- std::vector<uint64_t, AllocU64> keys(num_entries);
+ std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
ptr += copy_from_mem(ptr, keys.data(), keys_size_bytes);
- std::unique_ptr<S, deleter_of_summaries>
summaries(A().allocate(num_entries), deleter_of_summaries(num_entries, false));
+ A alloc(allocator);
+ std::unique_ptr<S, deleter_of_summaries>
summaries(alloc.allocate(num_entries), deleter_of_summaries(num_entries,
false));
ptr += sd.deserialize(ptr, base + size - ptr, summaries.get(),
num_entries);
summaries.get_deleter().set_destroy(true); // serde did not throw, so the
items must be constructed
for (size_t i = 0; i < num_entries; ++i) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]