This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch fi_allocator in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
commit 73a32cfd32b7a2028a0a1b4ffae84af23b91f33d Author: AlexanderSaydakov <[email protected]> AuthorDate: Thu Jan 14 11:27:55 2021 -0800 support passing an allocator instance --- fi/include/frequent_items_sketch.hpp | 25 +-- fi/include/frequent_items_sketch_impl.hpp | 68 +++++--- fi/include/reverse_purge_hash_map.hpp | 32 ++-- fi/include/reverse_purge_hash_map_impl.hpp | 264 +++++++++++++++-------------- 4 files changed, 214 insertions(+), 175 deletions(-) diff --git a/fi/include/frequent_items_sketch.hpp b/fi/include/frequent_items_sketch.hpp index b2a891f..6efe2b9 100644 --- a/fi/include/frequent_items_sketch.hpp +++ b/fi/include/frequent_items_sketch.hpp @@ -40,15 +40,20 @@ namespace datasketches { enum frequent_items_error_type { NO_FALSE_POSITIVES, NO_FALSE_NEGATIVES }; -// for serialization as raw bytes -template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>; -template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>; - // type W for weight must be an arithmetic type (integral or floating point) -template<typename T, typename W = uint64_t, typename H = std::hash<T>, typename E = std::equal_to<T>, typename S = serde<T>, typename A = std::allocator<T>> +template< + typename T, + typename W = uint64_t, + typename H = std::hash<T>, + typename E = std::equal_to<T>, + typename S = serde<T>, + typename A = std::allocator<T> +> class frequent_items_sketch { public: + static const uint8_t LG_MIN_MAP_SIZE = 3; + /** * Construct this sketch with parameters lg_max_map_size and lg_start_map_size. * @@ -59,7 +64,7 @@ public: * @param lg_start_map_size Log2 of the starting physical size of the internal hash * map managed by this sketch. */ - explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE); + explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE, const A& allocator = A()); /** * Update this sketch with an item and a positive weight (frequency count). @@ -232,7 +237,8 @@ public: // This is a convenience alias for users // The type returned by the following serialize method - typedef vector_u8<A> vector_bytes; + using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>; + /** * This method serializes the sketch as a vector of bytes. @@ -249,7 +255,7 @@ public: * @param is input stream * @return an instance of the sketch */ - static frequent_items_sketch deserialize(std::istream& is); + static frequent_items_sketch deserialize(std::istream& is, const A& allocator = A()); /** * This method deserializes a sketch from a given array of bytes. @@ -257,7 +263,7 @@ public: * @param size the size of the array * @return an instance of the sketch */ - static frequent_items_sketch deserialize(const void* bytes, size_t size); + static frequent_items_sketch deserialize(const void* bytes, size_t size, const A& allocator = A()); /** * Returns a human readable summary of this sketch @@ -266,7 +272,6 @@ public: string<A> to_string(bool print_items = false) const; private: - static const uint8_t LG_MIN_MAP_SIZE = 3; static const uint8_t SERIAL_VERSION = 1; static const uint8_t FAMILY_ID = 10; static const uint8_t PREAMBLE_LONGS_EMPTY = 1; diff --git a/fi/include/frequent_items_sketch_impl.hpp b/fi/include/frequent_items_sketch_impl.hpp index bb05aa9..df4352e 100644 --- a/fi/include/frequent_items_sketch_impl.hpp +++ b/fi/include/frequent_items_sketch_impl.hpp @@ -33,10 +33,14 @@ template<typename T, typename W, typename H, typename E, typename S, typename A> const uint8_t frequent_items_sketch<T, W, H, E, S, A>::LG_MIN_MAP_SIZE; template<typename T, typename W, typename H, typename E, typename S, typename A> -frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size): +frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size, const A& allocator): total_weight(0), offset(0), -map(std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE), std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE)) +map( + std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE), + std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE), + allocator +) { if (lg_start_map_size > lg_max_map_size) throw std::invalid_argument("starting size must not be greater than maximum size"); } @@ -142,7 +146,7 @@ frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error template<typename T, typename W, typename H, typename E, typename S, typename A> typename frequent_items_sketch<T, W, H, E, S, A>::vector_row frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error_type err_type, W threshold) const { - vector_row items; + vector_row items(map.get_allocator()); for (auto &it: map) { const W lb = it.second; const W ub = it.second + offset; @@ -182,19 +186,21 @@ void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const os.write((char*)&offset, sizeof(offset)); // copy active items and their weights to use batch serialization - typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW; - W* weights = AllocW().allocate(num_items); - T* items = A().allocate(num_items); + using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>; + AllocW aw(map.get_allocator()); + W* weights = aw.allocate(num_items); + A alloc(map.get_allocator()); + T* items = alloc.allocate(num_items); uint32_t i = 0; for (auto &it: map) { new (&items[i]) T(it.first); weights[i++] = it.second; } os.write((char*)weights, sizeof(W) * num_items); - AllocW().deallocate(weights, num_items); + aw.deallocate(weights, num_items); S().serialize(os, items, num_items); for (unsigned i = 0; i < num_items; i++) items[i].~T(); - A().deallocate(items, num_items); + alloc.deallocate(items, num_items); } } @@ -207,9 +213,9 @@ size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes() cons } template<typename T, typename W, typename H, typename E, typename S, typename A> -vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const { +auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const -> vector_bytes { const size_t size = header_size_bytes + get_serialized_size_bytes(); - vector_u8<A> bytes(size); + vector_bytes bytes(size, 0, map.get_allocator()); uint8_t* ptr = bytes.data() + header_size_bytes; uint8_t* end_ptr = ptr + size; @@ -238,20 +244,22 @@ vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_ ptr += copy_to_mem(&offset, ptr, sizeof(offset)); // copy active items and their weights to use batch serialization - typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW; - W* weights = AllocW().allocate(num_items); - T* items = A().allocate(num_items); + using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>; + AllocW aw(map.get_allocator()); + W* weights = aw.allocate(num_items); + A alloc(map.get_allocator()); + T* items = alloc.allocate(num_items); uint32_t i = 0; for (auto &it: map) { new (&items[i]) T(it.first); weights[i++] = it.second; } ptr += copy_to_mem(weights, ptr, sizeof(W) * num_items); - AllocW().deallocate(weights, num_items); + aw.deallocate(weights, num_items); const size_t bytes_remaining = end_ptr - ptr; ptr += S().serialize(ptr, bytes_remaining, items, num_items); for (unsigned i = 0; i < num_items; i++) items[i].~T(); - A().deallocate(items, num_items); + alloc.deallocate(items, num_items); } return bytes; } @@ -259,23 +267,25 @@ vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_ template<typename T, typename W, typename H, typename E, typename S, typename A> class frequent_items_sketch<T, W, H, E, S, A>::items_deleter { public: - items_deleter(uint32_t num, bool destroy): num(num), destroy(destroy) {} + items_deleter(uint32_t num, bool destroy, const A& allocator): + allocator(allocator), num(num), destroy(destroy) {} void set_destroy(bool destroy) { this->destroy = destroy; } - void operator() (T* ptr) const { + void operator() (T* ptr) { if (ptr != nullptr) { if (destroy) { for (uint32_t i = 0; i < num; ++i) ptr[i].~T(); } - A().deallocate(ptr, num); + allocator.deallocate(ptr, num); } } private: + A allocator; uint32_t num; bool destroy; }; template<typename T, typename W, typename H, typename E, typename S, typename A> -frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is) { +frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const A& allocator) { uint8_t preamble_longs; is.read((char*)&preamble_longs, sizeof(preamble_longs)); uint8_t serial_version; @@ -298,7 +308,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>: check_family_id(family_id); check_size(lg_cur_size, lg_max_size); - frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size); + frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator); if (!is_empty) { uint32_t num_items; is.read((char*)&num_items, sizeof(num_items)); @@ -310,10 +320,11 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>: is.read((char*)&offset, sizeof(offset)); // batch deserialization with intermediate array of items and weights - typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW; - std::vector<W, AllocW> weights(num_items); + using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>; + std::vector<W, AllocW> weights(num_items, 0, allocator); is.read((char*)weights.data(), sizeof(W) * num_items); - std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false)); + A alloc(allocator); + std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc)); S().deserialize(is, items.get(), num_items); items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed for (uint32_t i = 0; i < num_items; i++) { @@ -328,7 +339,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>: } template<typename T, typename W, typename H, typename E, typename S, typename A> -frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size) { +frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) { ensure_minimum_memory(size, 8); const char* ptr = static_cast<const char*>(bytes); const char* base = static_cast<const char*>(bytes); @@ -355,7 +366,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>: check_size(lg_cur_size, lg_max_size); ensure_minimum_memory(size, 1 << preamble_longs); - frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size); + frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator); if (!is_empty) { uint32_t num_items; ptr += copy_from_mem(ptr, &num_items, sizeof(uint32_t)); @@ -368,10 +379,11 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>: ensure_minimum_memory(size, ptr - base + (sizeof(W) * num_items)); // batch deserialization with intermediate array of items and weights - typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW; - std::vector<W, AllocW> weights(num_items); + using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>; + std::vector<W, AllocW> weights(num_items, 0, allocator); ptr += copy_from_mem(ptr, weights.data(), sizeof(W) * num_items); - std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false)); + A alloc(allocator); + std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc)); const size_t bytes_remaining = size - (ptr - base); ptr += S().deserialize(ptr, bytes_remaining, items.get(), num_items); items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed diff --git a/fi/include/reverse_purge_hash_map.hpp b/fi/include/reverse_purge_hash_map.hpp index e3e7bfb..fc4cd83 100644 --- a/fi/include/reverse_purge_hash_map.hpp +++ b/fi/include/reverse_purge_hash_map.hpp @@ -39,33 +39,39 @@ public: using AllocV = typename std::allocator_traits<A>::template rebind_alloc<V>; using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>; - reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size); + reverse_purge_hash_map(uint8_t lg_size, uint8_t lg_max_size, const A& allocator); reverse_purge_hash_map(const reverse_purge_hash_map& other); reverse_purge_hash_map(reverse_purge_hash_map&& other) noexcept; ~reverse_purge_hash_map(); reverse_purge_hash_map& operator=(reverse_purge_hash_map other); reverse_purge_hash_map& operator=(reverse_purge_hash_map&& other); - V adjust_or_insert(const K& key, V value); - V adjust_or_insert(K&& key, V value); + + template<typename FwdK> + V adjust_or_insert(FwdK&& key, V value); + V get(const K& key) const; uint8_t get_lg_cur_size() const; uint8_t get_lg_max_size() const; uint32_t get_capacity() const; uint32_t get_num_active() const; + const A& get_allocator() const; + class iterator; iterator begin() const; iterator end() const; + private: static constexpr double LOAD_FACTOR = 0.75; static constexpr uint16_t DRIFT_LIMIT = 1024; // used only for stress testing static constexpr uint32_t MAX_SAMPLE_SIZE = 1024; // number of samples to compute approximate median during purge - uint8_t lg_cur_size; - uint8_t lg_max_size; - uint32_t num_active; - K* keys; - V* values; - uint16_t* states; + A allocator_; + uint8_t lg_cur_size_; + uint8_t lg_max_size_; + uint32_t num_active_; + K* keys_; + V* values_; + uint16_t* states_; inline bool is_active(uint32_t probe) const; void subtract_and_keep_positive_only(V amount); @@ -83,8 +89,8 @@ public: friend class reverse_purge_hash_map<K, V, H, E, A>; iterator& operator++() { ++count; - if (count < map->num_active) { - const uint32_t mask = (1 << map->lg_cur_size) - 1; + if (count < map->num_active_) { + const uint32_t mask = (1 << map->lg_cur_size_) - 1; do { index = (index + stride) & mask; } while (!map->is_active(index)); @@ -95,7 +101,7 @@ public: bool operator==(const iterator& rhs) const { return count == rhs.count; } bool operator!=(const iterator& rhs) const { return count != rhs.count; } const std::pair<K&, V> operator*() const { - return std::pair<K&, V>(map->keys[index], map->values[index]); + return std::pair<K&, V>(map->keys_[index], map->values_[index]); } private: static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2 @@ -104,7 +110,7 @@ private: uint32_t count; uint32_t stride; iterator(const reverse_purge_hash_map<K, V, H, E, A>* map, uint32_t index, uint32_t count): - map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size) * GOLDEN_RATIO_RECIPROCAL) | 1) {} + map(map), index(index), count(count), stride(static_cast<uint32_t>((1 << map->lg_cur_size_) * GOLDEN_RATIO_RECIPROCAL) | 1) {} }; } /* namespace datasketches */ diff --git a/fi/include/reverse_purge_hash_map_impl.hpp b/fi/include/reverse_purge_hash_map_impl.hpp index 732d923..beccea4 100644 --- a/fi/include/reverse_purge_hash_map_impl.hpp +++ b/fi/include/reverse_purge_hash_map_impl.hpp @@ -34,113 +34,121 @@ template<typename K, typename V, typename H, typename E, typename A> constexpr uint32_t reverse_purge_hash_map<K, V, H, E, A>::MAX_SAMPLE_SIZE; template<typename K, typename V, typename H, typename E, typename A> -reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size): -lg_cur_size(lg_cur_size), -lg_max_size(lg_max_size), -num_active(0), -keys(A().allocate(1 << lg_cur_size)), -values(AllocV().allocate(1 << lg_cur_size)), -states(AllocU16().allocate(1 << lg_cur_size)) +reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(uint8_t lg_cur_size, uint8_t lg_max_size, const A& allocator): +allocator_(allocator), +lg_cur_size_(lg_cur_size), +lg_max_size_(lg_max_size), +num_active_(0), +keys_(allocator_.allocate(1 << lg_cur_size)), +values_(nullptr), +states_(nullptr) { - std::fill(states, &states[1 << lg_cur_size], 0); + AllocV av(allocator_); + values_ = av.allocate(1 << lg_cur_size); + AllocU16 au16(allocator_); + states_ = au16.allocate(1 << lg_cur_size); + std::fill(states_, states_ + (1 << lg_cur_size), 0); } template<typename K, typename V, typename H, typename E, typename A> reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(const reverse_purge_hash_map<K, V, H, E, A>& other): -lg_cur_size(other.lg_cur_size), -lg_max_size(other.lg_max_size), -num_active(other.num_active), -keys(A().allocate(1 << lg_cur_size)), -values(AllocV().allocate(1 << lg_cur_size)), -states(AllocU16().allocate(1 << lg_cur_size)) +allocator_(other.allocator_), +lg_cur_size_(other.lg_cur_size_), +lg_max_size_(other.lg_max_size_), +num_active_(other.num_active_), +keys_(allocator_.allocate(1 << lg_cur_size_)), +values_(nullptr), +states_(nullptr) { - const uint32_t size = 1 << lg_cur_size; - if (num_active > 0) { - auto num = num_active; + AllocV av(allocator_); + values_ = av.allocate(1 << lg_cur_size_); + AllocU16 au16(allocator_); + states_ = au16.allocate(1 << lg_cur_size_); + const uint32_t size = 1 << lg_cur_size_; + if (num_active_ > 0) { + auto num = num_active_; for (uint32_t i = 0; i < size; i++) { - if (other.states[i] > 0) { - new (&keys[i]) K(other.keys[i]); - values[i] = other.values[i]; + if (other.states_[i] > 0) { + new (&keys_[i]) K(other.keys_[i]); + values_[i] = other.values_[i]; } if (--num == 0) break; } } - std::copy(&other.states[0], &other.states[size], states); + std::copy(other.states_, other.states_ + size, states_); } template<typename K, typename V, typename H, typename E, typename A> reverse_purge_hash_map<K, V, H, E, A>::reverse_purge_hash_map(reverse_purge_hash_map<K, V, H, E, A>&& other) noexcept: -lg_cur_size(other.lg_cur_size), -lg_max_size(other.lg_max_size), -num_active(other.num_active), -keys(nullptr), -values(nullptr), -states(nullptr) +allocator_(std::move(other.allocator_)), +lg_cur_size_(other.lg_cur_size_), +lg_max_size_(other.lg_max_size_), +num_active_(other.num_active_), +keys_(nullptr), +values_(nullptr), +states_(nullptr) { - std::swap(keys, other.keys); - std::swap(values, other.values); - std::swap(states, other.states); - other.num_active = 0; + std::swap(keys_, other.keys_); + std::swap(values_, other.values_); + std::swap(states_, other.states_); + other.num_active_ = 0; } template<typename K, typename V, typename H, typename E, typename A> reverse_purge_hash_map<K, V, H, E, A>::~reverse_purge_hash_map() { - const uint32_t size = 1 << lg_cur_size; - if (num_active > 0) { + const uint32_t size = 1 << lg_cur_size_; + if (num_active_ > 0) { for (uint32_t i = 0; i < size; i++) { if (is_active(i)) { - keys[i].~K(); - if (--num_active == 0) break; + keys_[i].~K(); + if (--num_active_ == 0) break; } } } - if (keys != nullptr) - A().deallocate(keys, size); - if (values != nullptr) - AllocV().deallocate(values, size); - if (states != nullptr) - AllocU16().deallocate(states, size); + if (keys_ != nullptr) { + allocator_.deallocate(keys_, size); + } + if (values_ != nullptr) { + AllocV av(allocator_); + av.deallocate(values_, size); + } + if (states_ != nullptr) { + AllocU16 au16(allocator_); + au16.deallocate(states_, size); + } } template<typename K, typename V, typename H, typename E, typename A> reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A> other) { - std::swap(lg_cur_size, other.lg_cur_size); - std::swap(lg_max_size, other.lg_max_size); - std::swap(num_active, other.num_active); - std::swap(keys, other.keys); - std::swap(values, other.values); - std::swap(states, other.states); + std::swap(allocator_, other.allocator_); + std::swap(lg_cur_size_, other.lg_cur_size_); + std::swap(lg_max_size_, other.lg_max_size_); + std::swap(num_active_, other.num_active_); + std::swap(keys_, other.keys_); + std::swap(values_, other.values_); + std::swap(states_, other.states_); return *this; } template<typename K, typename V, typename H, typename E, typename A> reverse_purge_hash_map<K, V, H, E, A>& reverse_purge_hash_map<K, V, H, E, A>::operator=(reverse_purge_hash_map<K, V, H, E, A>&& other) { - std::swap(lg_cur_size, other.lg_cur_size); - std::swap(lg_max_size, other.lg_max_size); - std::swap(num_active, other.num_active); - std::swap(keys, other.keys); - std::swap(values, other.values); - std::swap(states, other.states); + std::swap(allocator_, other.allocator_); + std::swap(lg_cur_size_, other.lg_cur_size_); + std::swap(lg_max_size_, other.lg_max_size_); + std::swap(num_active_, other.num_active_); + std::swap(keys_, other.keys_); + std::swap(values_, other.values_); + std::swap(states_, other.states_); return *this; } template<typename K, typename V, typename H, typename E, typename A> -V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(const K& key, V value) { - const uint32_t num_active_before = num_active; +template<typename FwdK> +V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(FwdK&& key, V value) { + const uint32_t num_active_before = num_active_; const uint32_t index = internal_adjust_or_insert(key, value); - if (num_active > num_active_before) { - new (&keys[index]) K(key); - return resize_or_purge_if_needed(); - } - return 0; -} - -template<typename K, typename V, typename H, typename E, typename A> -V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(K&& key, V value) { - const uint32_t num_active_before = num_active; - const uint32_t index = internal_adjust_or_insert(key, value); - if (num_active > num_active_before) { - new (&keys[index]) K(std::move(key)); + if (num_active_ > num_active_before) { + new (&keys_[index]) K(std::forward<FwdK>(key)); return resize_or_purge_if_needed(); } return 0; @@ -148,10 +156,10 @@ V reverse_purge_hash_map<K, V, H, E, A>::adjust_or_insert(K&& key, V value) { template<typename K, typename V, typename H, typename E, typename A> V reverse_purge_hash_map<K, V, H, E, A>::get(const K& key) const { - const uint32_t mask = (1 << lg_cur_size) - 1; + const uint32_t mask = (1 << lg_cur_size_) - 1; uint32_t probe = fmix64(H()(key)) & mask; while (is_active(probe)) { - if (E()(keys[probe], key)) return values[probe]; + if (E()(keys_[probe], key)) return values_[probe]; probe = (probe + 1) & mask; } return 0; @@ -159,27 +167,32 @@ V reverse_purge_hash_map<K, V, H, E, A>::get(const K& key) const { template<typename K, typename V, typename H, typename E, typename A> uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_cur_size() const { - return lg_cur_size; + return lg_cur_size_; } template<typename K, typename V, typename H, typename E, typename A> uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_max_size() const { - return lg_max_size; + return lg_max_size_; } template<typename K, typename V, typename H, typename E, typename A> uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_capacity() const { - return (1 << lg_cur_size) * LOAD_FACTOR; + return (1 << lg_cur_size_) * LOAD_FACTOR; } template<typename K, typename V, typename H, typename E, typename A> uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_num_active() const { - return num_active; + return num_active_; +} + +template<typename K, typename V, typename H, typename E, typename A> +const A& reverse_purge_hash_map<K, V, H, E, A>::get_allocator() const { + return allocator_; } template<typename K, typename V, typename H, typename E, typename A> typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::begin() const { - const uint32_t size = 1 << lg_cur_size; + const uint32_t size = 1 << lg_cur_size_; uint32_t i = 0; while (i < size && !is_active(i)) i++; return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, i, 0); @@ -187,40 +200,40 @@ typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map< template<typename K, typename V, typename H, typename E, typename A> typename reverse_purge_hash_map<K, V, H, E, A>::iterator reverse_purge_hash_map<K, V, H, E, A>::end() const { - return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 << lg_cur_size, num_active); + return reverse_purge_hash_map<K, V, H, E, A>::iterator(this, 1 << lg_cur_size_, num_active_); } template<typename K, typename V, typename H, typename E, typename A> bool reverse_purge_hash_map<K, V, H, E, A>::is_active(uint32_t index) const { - return states[index] > 0; + return states_[index] > 0; } template<typename K, typename V, typename H, typename E, typename A> void reverse_purge_hash_map<K, V, H, E, A>::subtract_and_keep_positive_only(V amount) { // starting from the back, find the first empty cell, // which establishes the high end of a cluster. - uint32_t first_probe = (1 << lg_cur_size) - 1; + uint32_t first_probe = (1 << lg_cur_size_) - 1; while (is_active(first_probe)) first_probe--; // when we find the next non-empty cell, we know we are at the high end of a cluster // work towards the front, delete any non-positive entries. for (uint32_t probe = first_probe; probe-- > 0;) { if (is_active(probe)) { - if (values[probe] <= amount) { + if (values_[probe] <= amount) { hash_delete(probe); // does the work of deletion and moving higher items towards the front - num_active--; + num_active_--; } else { - values[probe] -= amount; + values_[probe] -= amount; } } } // now work on the first cluster that was skipped - for (uint32_t probe = (1 << lg_cur_size); probe-- > first_probe;) { + for (uint32_t probe = (1 << lg_cur_size_); probe-- > first_probe;) { if (is_active(probe)) { - if (values[probe] <= amount) { + if (values_[probe] <= amount) { hash_delete(probe); - num_active--; + num_active_--; } else { - values[probe] -= amount; + values_[probe] -= amount; } } } @@ -231,20 +244,20 @@ void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) { // Looks ahead in the table to search for another // item to move to this location // if none are found, the status is changed - states[delete_index] = 0; // mark as empty - keys[delete_index].~K(); + states_[delete_index] = 0; // mark as empty + keys_[delete_index].~K(); uint32_t drift = 1; - const uint32_t mask = (1 << lg_cur_size) - 1; + const uint32_t mask = (1 << lg_cur_size_) - 1; uint32_t probe = (delete_index + drift) & mask; // map length must be a power of 2 // advance until we find a free location replacing locations as needed while (is_active(probe)) { - if (states[probe] > drift) { + if (states_[probe] > drift) { // move current element - new (&keys[delete_index]) K(std::move(keys[probe])); - values[delete_index] = values[probe]; - states[delete_index] = states[probe] - drift; - states[probe] = 0; // mark as empty - keys[probe].~K(); + new (&keys_[delete_index]) K(std::move(keys_[probe])); + values_[delete_index] = values_[probe]; + states_[delete_index] = states_[probe] - drift; + states_[probe] = 0; // mark as empty + keys_[probe].~K(); drift = 0; delete_index = probe; } @@ -257,13 +270,13 @@ void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) { template<typename K, typename V, typename H, typename E, typename A> uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(const K& key, V value) { - const uint32_t mask = (1 << lg_cur_size) - 1; + const uint32_t mask = (1 << lg_cur_size_) - 1; uint32_t index = fmix64(H()(key)) & mask; uint16_t drift = 1; while (is_active(index)) { - if (E()(keys[index], key)) { + if (E()(keys_[index], key)) { // adjusting the value of an existing key - values[index] += value; + values_[index] += value; return index; } index = (index + 1) & mask; @@ -272,23 +285,23 @@ uint32_t reverse_purge_hash_map<K, V, H, E, A>::internal_adjust_or_insert(const if (drift >= DRIFT_LIMIT) throw std::logic_error("drift limit reached"); } // adding the key and value to the table - if (num_active > get_capacity()) { - throw std::logic_error("num_active " + std::to_string(num_active) + " > capacity " + std::to_string(get_capacity())); + if (num_active_ > get_capacity()) { + throw std::logic_error("num_active " + std::to_string(num_active_) + " > capacity " + std::to_string(get_capacity())); } - values[index] = value; - states[index] = drift; - num_active++; + values_[index] = value; + states_[index] = drift; + num_active_++; return index; } template<typename K, typename V, typename H, typename E, typename A> V reverse_purge_hash_map<K, V, H, E, A>::resize_or_purge_if_needed() { - if (num_active > get_capacity()) { - if (lg_cur_size < lg_max_size) { // can grow - resize(lg_cur_size + 1); + if (num_active_ > get_capacity()) { + if (lg_cur_size_ < lg_max_size_) { // can grow + resize(lg_cur_size_ + 1); } else { // at target size, must purge const V offset = purge(); - if (num_active > get_capacity()) { + if (num_active_ > get_capacity()) { throw std::logic_error("purge did not reduce number of active items"); } return offset; @@ -299,43 +312,46 @@ V reverse_purge_hash_map<K, V, H, E, A>::resize_or_purge_if_needed() { template<typename K, typename V, typename H, typename E, typename A> void reverse_purge_hash_map<K, V, H, E, A>::resize(uint8_t lg_new_size) { - const uint32_t old_size = 1 << lg_cur_size; - K* old_keys = keys; - V* old_values = values; - uint16_t* old_states = states; + const uint32_t old_size = 1 << lg_cur_size_; + K* old_keys = keys_; + V* old_values = values_; + uint16_t* old_states = states_; const uint32_t new_size = 1 << lg_new_size; - keys = A().allocate(new_size); - values = AllocV().allocate(new_size); - states = AllocU16().allocate(new_size); - std::fill(states, &states[new_size], 0); - num_active = 0; - lg_cur_size = lg_new_size; + keys_ = allocator_.allocate(new_size); + AllocV av(allocator_); + values_ = av.allocate(new_size); + AllocU16 au16(allocator_); + states_ = au16.allocate(new_size); + std::fill(states_, states_ + new_size, 0); + num_active_ = 0; + lg_cur_size_ = lg_new_size; for (uint32_t i = 0; i < old_size; i++) { if (old_states[i] > 0) { adjust_or_insert(std::move(old_keys[i]), old_values[i]); old_keys[i].~K(); } } - A().deallocate(old_keys, old_size); - AllocV().deallocate(old_values, old_size); - AllocU16().deallocate(old_states, old_size); + allocator_.deallocate(old_keys, old_size); + av.deallocate(old_values, old_size); + au16.deallocate(old_states, old_size); } template<typename K, typename V, typename H, typename E, typename A> V reverse_purge_hash_map<K, V, H, E, A>::purge() { - const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active); + const uint32_t limit = std::min(MAX_SAMPLE_SIZE, num_active_); uint32_t num_samples = 0; uint32_t i = 0; - V* samples = AllocV().allocate(limit); + AllocV av(allocator_); + V* samples = av.allocate(limit); while (num_samples < limit) { if (is_active(i)) { - samples[num_samples++] = values[i]; + samples[num_samples++] = values_[i]; } i++; } - std::nth_element(&samples[0], &samples[num_samples / 2], &samples[num_samples]); + std::nth_element(samples, samples+ (num_samples / 2), samples + num_samples); const V median = samples[num_samples / 2]; - AllocV().deallocate(samples, limit); + av.deallocate(samples, limit); subtract_and_keep_positive_only(median); return median; } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
