This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch hll_allocator in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
commit 4bbafa065862f12297aed682a1e970eaabb4858c Author: AlexanderSaydakov <[email protected]> AuthorDate: Wed Jan 27 17:43:23 2021 -0800 support allocator instance, use vectors --- hll/include/AuxHashMap-internal.hpp | 89 +++++++------------ hll/include/AuxHashMap.hpp | 17 ++-- hll/include/CompositeInterpolationXTable.hpp | 4 +- hll/include/CouponHashSet-internal.hpp | 82 ++++++++---------- hll/include/CouponHashSet.hpp | 20 ++--- hll/include/CouponList-internal.hpp | 122 ++++++++++----------------- hll/include/CouponList.hpp | 23 +++-- hll/include/CubicInterpolation.hpp | 4 +- hll/include/HarmonicNumbers.hpp | 4 +- hll/include/Hll4Array-internal.hpp | 29 ++++--- hll/include/Hll4Array.hpp | 2 +- hll/include/Hll6Array-internal.hpp | 31 +++---- hll/include/Hll6Array.hpp | 5 +- hll/include/Hll8Array-internal.hpp | 31 +++---- hll/include/Hll8Array.hpp | 5 +- hll/include/HllArray-internal.hpp | 83 ++++++------------ hll/include/HllArray.hpp | 16 ++-- hll/include/HllSketch-internal.hpp | 20 ++--- hll/include/HllSketchImpl.hpp | 3 +- hll/include/HllSketchImplFactory.hpp | 65 +++++++------- hll/include/HllUnion-internal.hpp | 6 +- hll/include/HllUtil.hpp | 2 +- hll/include/RelativeErrorTables.hpp | 2 +- hll/include/hll.hpp | 12 +-- hll/test/AuxHashMapTest.cpp | 14 +-- hll/test/CouponHashSetTest.cpp | 4 +- hll/test/CouponListTest.cpp | 6 +- hll/test/HllArrayTest.cpp | 4 +- 28 files changed, 293 insertions(+), 412 deletions(-) diff --git a/hll/include/AuxHashMap-internal.hpp b/hll/include/AuxHashMap-internal.hpp index 9a8e135..60142ec 100644 --- a/hll/include/AuxHashMap-internal.hpp +++ b/hll/include/AuxHashMap-internal.hpp @@ -26,42 +26,28 @@ namespace datasketches { template<typename A> -AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK) - : lgConfigK(lgConfigK), - lgAuxArrInts(lgAuxArrInts), - auxCount(0) { - typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; - const int numItems = 1 << lgAuxArrInts; - auxIntArr = intAlloc().allocate(numItems); - std::fill(auxIntArr, auxIntArr + numItems, 0); -} - -template<typename A> -AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK) { - return new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK); -} +AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator): +lgConfigK(lgConfigK), +lgAuxArrInts(lgAuxArrInts), +auxCount(0), +entries(1 << lgAuxArrInts, 0, allocator) +{} template<typename A> -AuxHashMap<A>::AuxHashMap(const AuxHashMap& that) - : lgConfigK(that.lgConfigK), - lgAuxArrInts(that.lgAuxArrInts), - auxCount(that.auxCount) { - typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; - const int numItems = 1 << lgAuxArrInts; - auxIntArr = intAlloc().allocate(numItems); - std::copy(that.auxIntArr, that.auxIntArr + numItems, auxIntArr); +AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator) { + return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator); } template<typename A> AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) { - return new (ahmAlloc().allocate(1)) AuxHashMap<A>(that); + return new (ahmAlloc(that.entries.get_allocator()).allocate(1)) AuxHashMap<A>(that); } template<typename A> AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len, int lgConfigK, int auxCount, int lgAuxArrInts, - bool srcCompact) { + bool srcCompact, const A& allocator) { int lgArrInts = lgAuxArrInts; if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK); @@ -77,7 +63,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len, if (len < auxCount * sizeof(int)) { throw std::out_of_range("Input array too small to hold AuxHashMap image"); } - auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK); + auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator); for (int i = 0; i < auxCount; ++i) { int pair = auxPtr[i]; int slotNo = HllUtil<A>::getLow26(pair) & configKmask; @@ -89,7 +75,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len, if (len < itemsToRead * sizeof(int)) { throw std::out_of_range("Input array too small to hold AuxHashMap image"); } - auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK); + auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator); for (int i = 0; i < itemsToRead; ++i) { int pair = auxPtr[i]; if (pair == HllUtil<A>::EMPTY) { continue; } @@ -110,7 +96,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len, template<typename A> AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK, const int auxCount, const int lgAuxArrInts, - const bool srcCompact) { + const bool srcCompact, const A& allocator) { int lgArrInts = lgAuxArrInts; if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK); @@ -118,7 +104,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK, lgArrInts = lgAuxArrInts; } - AuxHashMap<A>* auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK); + AuxHashMap<A>* auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator); typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr; aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter()); @@ -153,23 +139,17 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK, } template<typename A> -AuxHashMap<A>::~AuxHashMap<A>() { - // should be no way to have an object without a valid array - typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; - intAlloc().deallocate(auxIntArr, 1 << lgAuxArrInts); -} - -template<typename A> std::function<void(AuxHashMap<A>*)> AuxHashMap<A>::make_deleter() { return [](AuxHashMap<A>* ptr) { + ahmAlloc alloc(ptr->entries.get_allocator()); ptr->~AuxHashMap(); - ahmAlloc().deallocate(ptr, 1); + alloc.deallocate(ptr, 1); }; } template<typename A> AuxHashMap<A>* AuxHashMap<A>::copy() const { - return new (ahmAlloc().allocate(1)) AuxHashMap<A>(*this); + return new (ahmAlloc(entries.get_allocator()).allocate(1)) AuxHashMap<A>(*this); } template<typename A> @@ -179,7 +159,7 @@ int AuxHashMap<A>::getAuxCount() const { template<typename A> int* AuxHashMap<A>::getAuxIntArr(){ - return auxIntArr; + return entries.data(); } template<typename A> @@ -199,7 +179,7 @@ int AuxHashMap<A>::getUpdatableSizeBytes() const { template<typename A> void AuxHashMap<A>::mustAdd(const int slotNo, const int value) { - const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo); + const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo); const int entry_pair = HllUtil<A>::pair(slotNo, value); if (index >= 0) { throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: " @@ -207,16 +187,16 @@ void AuxHashMap<A>::mustAdd(const int slotNo, const int value) { } // found empty entry - auxIntArr[~index] = entry_pair; + entries[~index] = entry_pair; ++auxCount; checkGrow(); } template<typename A> int AuxHashMap<A>::mustFindValueFor(const int slotNo) const { - const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo); + const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo); if (index >= 0) { - return HllUtil<A>::getValue(auxIntArr[index]); + return HllUtil<A>::getValue(entries[index]); } throw std::invalid_argument("slotNo not found: " + std::to_string(slotNo)); @@ -224,9 +204,9 @@ int AuxHashMap<A>::mustFindValueFor(const int slotNo) const { template<typename A> void AuxHashMap<A>::mustReplace(const int slotNo, const int value) { - const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo); + const int idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo); if (idx >= 0) { - auxIntArr[idx] = HllUtil<A>::pair(slotNo, value); + entries[idx] = HllUtil<A>::pair(slotNo, value); return; } @@ -243,23 +223,18 @@ void AuxHashMap<A>::checkGrow() { template<typename A> void AuxHashMap<A>::growAuxSpace() { - int* oldArray = auxIntArr; - const int oldArrLen = 1 << lgAuxArrInts; const int configKmask = (1 << lgConfigK) - 1; const int newArrLen = 1 << ++lgAuxArrInts; - typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; - auxIntArr = intAlloc().allocate(newArrLen); - std::fill(auxIntArr, auxIntArr + newArrLen, 0); - for (int i = 0; i < oldArrLen; ++i) { - const int fetched = oldArray[i]; + vector_int entries_new(newArrLen, 0, entries.get_allocator()); + for (size_t i = 0; i < entries.size(); ++i) { + const int fetched = entries[i]; if (fetched != HllUtil<A>::EMPTY) { // find empty in new array - const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, fetched & configKmask); - auxIntArr[~idx] = fetched; + const int idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask); + entries_new[~idx] = fetched; } } - - intAlloc().deallocate(oldArray, oldArrLen); + entries = std::move(entries_new); } //Searches the Aux arr hash table for an empty or a matching slotNo depending on the context. @@ -290,12 +265,12 @@ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgC template<typename A> coupon_iterator<A> AuxHashMap<A>::begin(bool all) const { - return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 0, all); + return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 0, all); } template<typename A> coupon_iterator<A> AuxHashMap<A>::end() const { - return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 1 << lgAuxArrInts, false); + return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 1 << lgAuxArrInts, false); } } diff --git a/hll/include/AuxHashMap.hpp b/hll/include/AuxHashMap.hpp index b37e85c..e18f15d 100644 --- a/hll/include/AuxHashMap.hpp +++ b/hll/include/AuxHashMap.hpp @@ -28,22 +28,21 @@ namespace datasketches { -template<typename A = std::allocator<char>> +template<typename A> class AuxHashMap final { public: - explicit AuxHashMap(int lgAuxArrInts, int lgConfigK); - explicit AuxHashMap(const AuxHashMap<A>& that); - static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK); + AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator); + static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator); static AuxHashMap* newAuxHashMap(const AuxHashMap<A>& that); static AuxHashMap* deserialize(const void* bytes, size_t len, int lgConfigK, int auxCount, int lgAuxArrInts, - bool srcCompact); + bool srcCompact, const A& allocator); static AuxHashMap* deserialize(std::istream& is, int lgConfigK, int auxCount, int lgAuxArrInts, - bool srcCompact); - virtual ~AuxHashMap(); + bool srcCompact, const A& allocator); + virtual ~AuxHashMap() = default; static std::function<void(AuxHashMap<A>*)> make_deleter(); AuxHashMap* copy() const; @@ -64,6 +63,8 @@ class AuxHashMap final { private: typedef typename std::allocator_traits<A>::template rebind_alloc<AuxHashMap<A>> ahmAlloc; + using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>; + // static so it can be used when resizing static int find(const int* auxArr, int lgAuxArrInts, int lgConfigK, int slotNo); @@ -73,7 +74,7 @@ class AuxHashMap final { const int lgConfigK; int lgAuxArrInts; int auxCount; - int* auxIntArr; + vector_int entries; }; } diff --git a/hll/include/CompositeInterpolationXTable.hpp b/hll/include/CompositeInterpolationXTable.hpp index 8baecbe..0fa0af8 100644 --- a/hll/include/CompositeInterpolationXTable.hpp +++ b/hll/include/CompositeInterpolationXTable.hpp @@ -24,7 +24,7 @@ namespace datasketches { -template<typename A = std::allocator<char>> +template<typename A = std::allocator<uint8_t>> class CompositeInterpolationXTable { public: static int get_y_stride(int logK); @@ -37,4 +37,4 @@ class CompositeInterpolationXTable { #include "CompositeInterpolationXTable-internal.hpp" -#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */ \ No newline at end of file +#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */ diff --git a/hll/include/CouponHashSet-internal.hpp b/hll/include/CouponHashSet-internal.hpp index 35facfe..29a3ea7 100644 --- a/hll/include/CouponHashSet-internal.hpp +++ b/hll/include/CouponHashSet-internal.hpp @@ -31,8 +31,8 @@ template<typename A> static int find(const int* array, const int lgArrInts, const int coupon); template<typename A> -CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType) - : CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET) +CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType, const A& allocator) + : CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET, allocator) { if (lgConfigK <= 7) { throw std::invalid_argument("CouponHashSet must be initialized with lgConfigK > 7. Found: " @@ -41,27 +41,21 @@ CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHl } template<typename A> -CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that) - : CouponList<A>(that) {} - -template<typename A> CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that, const target_hll_type tgtHllType) : CouponList<A>(that, tgtHllType) {} template<typename A> -CouponHashSet<A>::~CouponHashSet() {} - -template<typename A> std::function<void(HllSketchImpl<A>*)> CouponHashSet<A>::get_deleter() const { return [](HllSketchImpl<A>* ptr) { CouponHashSet<A>* chs = static_cast<CouponHashSet<A>*>(ptr); + ChsAlloc chsa(chs->getAllocator()); chs->~CouponHashSet(); - chsAlloc().deallocate(chs, 1); + chsa.deallocate(chs, 1); }; } template<typename A> -CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) { +CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len, const A& allocator) { if (len < HllUtil<A>::HASH_SET_INT_ARR_START) { // hard-coded throw std::out_of_range("Input data length insufficient to hold CouponHashSet"); } @@ -79,7 +73,7 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) { const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]); if (mode != SET) { - throw std::invalid_argument("Calling set construtor with non-set mode data"); + throw std::invalid_argument("Calling set constructor with non-set mode data"); } const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]); @@ -106,7 +100,8 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) { + ", found: " + std::to_string(len)); } - CouponHashSet<A>* sketch = new (chsAlloc().allocate(1)) CouponHashSet<A>(lgK, tgtHllType); + ChsAlloc chsa(allocator); + CouponHashSet<A>* sketch = new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator); if (compactFlag) { const uint8_t* curPos = data + HllUtil<A>::HASH_SET_INT_ARR_START; @@ -116,24 +111,19 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) { sketch->couponUpdate(coupon); } } else { - int* oldArr = sketch->couponIntArr; - const size_t oldArrLen = 1 << sketch->lgCouponArrInts; - sketch->lgCouponArrInts = lgArrInts; - typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; - sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts); + sketch->coupons.resize(1 << lgArrInts); sketch->couponCount = couponCount; // only need to read valid coupons, unlike in stream case - std::memcpy(sketch->couponIntArr, + std::memcpy(sketch->coupons.data(), data + HllUtil<A>::HASH_SET_INT_ARR_START, couponCount * sizeof(int)); - intAlloc().deallocate(oldArr, oldArrLen); } return sketch; } template<typename A> -CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) { +CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is, const A& allocator) { uint8_t listHeader[8]; is.read((char*)listHeader, 8 * sizeof(uint8_t)); @@ -149,7 +139,7 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) { hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]); if (mode != SET) { - throw std::invalid_argument("Calling set construtor with non-set mode data"); + throw std::invalid_argument("Calling set constructor with non-set mode data"); } target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]); @@ -168,7 +158,8 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) { lgArrInts = HllUtil<A>::computeLgArrInts(SET, couponCount, lgK); } - CouponHashSet<A>* sketch = new (chsAlloc().allocate(1)) CouponHashSet<A>(lgK, tgtHllType); + ChsAlloc chsa(allocator); + CouponHashSet<A>* sketch = new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator); typedef std::unique_ptr<CouponHashSet<A>, std::function<void(HllSketchImpl<A>*)>> coupon_hash_set_ptr; coupon_hash_set_ptr ptr(sketch, sketch->get_deleter()); @@ -181,13 +172,10 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) { sketch->couponUpdate(coupon); } } else { - typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; - intAlloc().deallocate(sketch->couponIntArr, 1 << sketch->lgCouponArrInts); - sketch->lgCouponArrInts = lgArrInts; - sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts); + sketch->coupons.resize(1 << lgArrInts); sketch->couponCount = couponCount; // for stream processing, read entire list so read pointer ends up set correctly - is.read((char*)sketch->couponIntArr, (1 << sketch->lgCouponArrInts) * sizeof(int)); + is.read((char*)sketch->coupons.data(), sketch->coupons.size() * sizeof(int)); } if (!is.good()) @@ -198,21 +186,24 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) { template<typename A> CouponHashSet<A>* CouponHashSet<A>::copy() const { - return new (chsAlloc().allocate(1)) CouponHashSet<A>(*this); + ChsAlloc chsa(this->coupons.get_allocator()); + return new (chsa.allocate(1)) CouponHashSet<A>(*this); } template<typename A> CouponHashSet<A>* CouponHashSet<A>::copyAs(const target_hll_type tgtHllType) const { - return new (chsAlloc().allocate(1)) CouponHashSet<A>(*this, tgtHllType); + ChsAlloc chsa(this->coupons.get_allocator()); + return new (chsa.allocate(1)) CouponHashSet<A>(*this, tgtHllType); } template<typename A> HllSketchImpl<A>* CouponHashSet<A>::couponUpdate(int coupon) { - const int index = find<A>(this->couponIntArr, this->lgCouponArrInts, coupon); + const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(this->coupons.size()); + const int index = find<A>(this->coupons.data(), lgCouponArrInts, coupon); if (index >= 0) { return this; // found duplicate, ignore } - this->couponIntArr[~index] = coupon; // found empty + this->coupons[~index] = coupon; // found empty ++this->couponCount; if (checkGrowOrPromote()) { return this->promoteHeapListOrSetToHll(*this); @@ -232,39 +223,34 @@ int CouponHashSet<A>::getPreInts() const { template<typename A> bool CouponHashSet<A>::checkGrowOrPromote() { - if ((HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER * (1 << this->lgCouponArrInts))) { - if (this->lgCouponArrInts == (this->lgConfigK - 3)) { // at max size + if (static_cast<size_t>(HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER * this->coupons.size())) { + const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(this->coupons.size()); + if (lgCouponArrInts == (this->lgConfigK - 3)) { // at max size return true; // promote to HLL } - int tgtLgCoupArrSize = this->lgCouponArrInts + 1; - growHashSet(this->lgCouponArrInts, tgtLgCoupArrSize); + growHashSet(lgCouponArrInts + 1); } return false; } template<typename A> -void CouponHashSet<A>::growHashSet(const int srcLgCoupArrSize, const int tgtLgCoupArrSize) { +void CouponHashSet<A>::growHashSet(int tgtLgCoupArrSize) { const int tgtLen = 1 << tgtLgCoupArrSize; - typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; - int* tgtCouponIntArr = intAlloc().allocate(tgtLen); - std::fill(tgtCouponIntArr, tgtCouponIntArr + tgtLen, 0); + vector_int coupons_new(tgtLen, 0, this->coupons.get_allocator()); - const int srcLen = 1 << srcLgCoupArrSize; + const int srcLen = this->coupons.size(); for (int i = 0; i < srcLen; ++i) { // scan existing array for non-zero values - const int fetched = this->couponIntArr[i]; + const int fetched = this->coupons[i]; if (fetched != HllUtil<A>::EMPTY) { - const int idx = find<A>(tgtCouponIntArr, tgtLgCoupArrSize, fetched); // search TGT array + const int idx = find<A>(coupons_new.data(), tgtLgCoupArrSize, fetched); // search TGT array if (idx < 0) { // found EMPTY - tgtCouponIntArr[~idx] = fetched; // insert + coupons_new[~idx] = fetched; // insert continue; } throw std::runtime_error("Error: Found duplicate coupon"); } } - - intAlloc().deallocate(this->couponIntArr, 1 << this->lgCouponArrInts); - this->couponIntArr = tgtCouponIntArr; - this->lgCouponArrInts = tgtLgCoupArrSize; + this->coupons = std::move(coupons_new); } template<typename A> diff --git a/hll/include/CouponHashSet.hpp b/hll/include/CouponHashSet.hpp index 7aaffc3..b9b99b7 100644 --- a/hll/include/CouponHashSet.hpp +++ b/hll/include/CouponHashSet.hpp @@ -24,20 +24,20 @@ namespace datasketches { -template<typename A = std::allocator<char>> +template<typename A> class CouponHashSet : public CouponList<A> { public: - static CouponHashSet* newSet(const void* bytes, size_t len); - static CouponHashSet* newSet(std::istream& is); - explicit CouponHashSet(int lgConfigK, target_hll_type tgtHllType); - explicit CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType); - explicit CouponHashSet(const CouponHashSet& that); + static CouponHashSet* newSet(const void* bytes, size_t len, const A& allocator); + static CouponHashSet* newSet(std::istream& is, const A& allocator); + CouponHashSet(int lgConfigK, target_hll_type tgtHllType, const A& allocator); + CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType); - virtual ~CouponHashSet(); + virtual ~CouponHashSet() = default; virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const; protected: - + using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>; + virtual CouponHashSet* copy() const; virtual CouponHashSet* copyAs(target_hll_type tgtHllType) const; @@ -49,9 +49,9 @@ class CouponHashSet : public CouponList<A> { friend class HllSketchImplFactory<A>; private: - typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc; + using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>; bool checkGrowOrPromote(); - void growHashSet(int srcLgCoupArrSize, int tgtLgCoupArrSize); + void growHashSet(int tgtLgCoupArrSize); }; } diff --git a/hll/include/CouponList-internal.hpp b/hll/include/CouponList-internal.hpp index 1800a37..fd304c8 100644 --- a/hll/include/CouponList-internal.hpp +++ b/hll/include/CouponList-internal.hpp @@ -23,6 +23,7 @@ #include "CouponList.hpp" #include "CubicInterpolation.hpp" #include "HllUtil.hpp" +#include "count_zeros.hpp" #include <algorithm> #include <cmath> @@ -30,74 +31,45 @@ namespace datasketches { template<typename A> -CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode) - : HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false) { - if (mode == hll_mode::LIST) { - lgCouponArrInts = HllUtil<A>::LG_INIT_LIST_SIZE; - } else { // mode == SET - lgCouponArrInts = HllUtil<A>::LG_INIT_SET_SIZE; - } - oooFlag = false; - const int arrayLen = 1 << lgCouponArrInts; - typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; - couponIntArr = intAlloc().allocate(arrayLen); - std::fill(couponIntArr, couponIntArr + arrayLen, 0); - couponCount = 0; -} - -template<typename A> -CouponList<A>::CouponList(const CouponList& that) - : HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, that.mode, false), - lgCouponArrInts(that.lgCouponArrInts), - couponCount(that.couponCount), - oooFlag(that.oooFlag) { - - const int numItems = 1 << lgCouponArrInts; - typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; - couponIntArr = intAlloc().allocate(numItems); - std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr); -} - -template<typename A> -CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType) - : HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false), - lgCouponArrInts(that.lgCouponArrInts), - couponCount(that.couponCount), - oooFlag(that.oooFlag) { - - const int numItems = 1 << lgCouponArrInts; - typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; - couponIntArr = intAlloc().allocate(numItems); - std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr); -} +CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode, const A& allocator): +HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false), +couponCount(0), +oooFlag(false), +coupons(1 << (mode == hll_mode::LIST ? HllUtil<A>::LG_INIT_LIST_SIZE : HllUtil<A>::LG_INIT_SET_SIZE), 0, allocator) +{} template<typename A> -CouponList<A>::~CouponList() { - typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc; - intAlloc().deallocate(couponIntArr, 1 << lgCouponArrInts); -} +CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType): +HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false), +couponCount(that.couponCount), +oooFlag(that.oooFlag), +coupons(that.coupons) +{} template<typename A> std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter() const { return [](HllSketchImpl<A>* ptr) { CouponList<A>* cl = static_cast<CouponList<A>*>(ptr); + ClAlloc cla(cl->getAllocator()); cl->~CouponList(); - clAlloc().deallocate(cl, 1); + cla.deallocate(cl, 1); }; } template<typename A> CouponList<A>* CouponList<A>::copy() const { - return new (clAlloc().allocate(1)) CouponList<A>(*this); + ClAlloc cla(coupons.get_allocator()); + return new (cla.allocate(1)) CouponList<A>(*this); } template<typename A> CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType) const { - return new (clAlloc().allocate(1)) CouponList<A>(*this, tgtHllType); + ClAlloc cla(coupons.get_allocator()); + return new (cla.allocate(1)) CouponList<A>(*this, tgtHllType); } template<typename A> -CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) { +CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& allocator) { if (len < HllUtil<A>::LIST_INT_ARR_START) { throw std::out_of_range("Input data length insufficient to hold CouponHashSet"); } @@ -115,7 +87,7 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) { hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]); if (mode != LIST) { - throw std::invalid_argument("Calling set construtor with non-list mode data"); + throw std::invalid_argument("Calling list constructor with non-list mode data"); } target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]); @@ -133,20 +105,21 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) { + ", found: " + std::to_string(len)); } - CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode); + ClAlloc cla(allocator); + CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator); sketch->couponCount = couponCount; sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST if (!emptyFlag) { // only need to read valid coupons, unlike in stream case - std::memcpy(sketch->couponIntArr, data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int)); + std::memcpy(sketch->coupons.data(), data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int)); } return sketch; } template<typename A> -CouponList<A>* CouponList<A>::newList(std::istream& is) { +CouponList<A>* CouponList<A>::newList(std::istream& is, const A& allocator) { uint8_t listHeader[8]; is.read((char*)listHeader, 8 * sizeof(uint8_t)); @@ -162,7 +135,7 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) { hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]); if (mode != LIST) { - throw std::invalid_argument("Calling list construtor with non-list mode data"); + throw std::invalid_argument("Calling list constructor with non-list mode data"); } const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]); @@ -172,8 +145,9 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) { const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false); const bool emptyFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false); - CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode); - typedef std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>> coupon_list_ptr; + ClAlloc cla(allocator); + CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator); + using coupon_list_ptr = std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>>; coupon_list_ptr ptr(sketch, sketch->get_deleter()); const int couponCount = listHeader[HllUtil<A>::LIST_COUNT_BYTE]; sketch->couponCount = couponCount; @@ -183,8 +157,8 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) { // For stream processing, need to read entire number written to stream so read // pointer ends up set correctly. // If not compact, still need to read empty items even though in order. - const int numToRead = (compact ? couponCount : (1 << sketch->lgCouponArrInts)); - is.read((char*)sketch->couponIntArr, numToRead * sizeof(int)); + const int numToRead = (compact ? couponCount : sketch->coupons.size()); + is.read((char*)sketch->coupons.data(), numToRead * sizeof(int)); } if (!is.good()) @@ -196,14 +170,14 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) { template<typename A> vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const { const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes; - vector_u8<A> byteArr(sketchSizeBytes); + vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator()); uint8_t* bytes = byteArr.data() + header_size_bytes; bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts()); bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER); bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID); bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK); - bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(lgCouponArrInts); + bytes[HllUtil<A>::LG_ARR_BYTE] = count_trailing_zeros_in_u32(coupons.size()); bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact); bytes[HllUtil<A>::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode == LIST ? couponCount : 0); bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte(); @@ -217,7 +191,7 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0); switch (sw) { case 0: { // src updatable, dst updatable - std::memcpy(bytes + getMemDataStart(), getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int)); + std::memcpy(bytes + getMemDataStart(), coupons.data(), coupons.size() * sizeof(int)); break; } case 1: { // src updatable, dst compact @@ -247,7 +221,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const { os.write((char*)&familyId, sizeof(familyId)); const uint8_t lgKByte((uint8_t) this->lgConfigK); os.write((char*)&lgKByte, sizeof(lgKByte)); - const uint8_t lgArrIntsByte((uint8_t) lgCouponArrInts); + const uint8_t lgArrIntsByte(count_trailing_zeros_in_u32(coupons.size())); os.write((char*)&lgArrIntsByte, sizeof(lgArrIntsByte)); const uint8_t flagsByte(this->makeFlagsByte(compact)); os.write((char*)&flagsByte, sizeof(flagsByte)); @@ -273,7 +247,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const { const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0); switch (sw) { case 0: { // src updatable, dst updatable - os.write((char*)getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int)); + os.write((char*)coupons.data(), coupons.size() * sizeof(int)); break; } case 1: { // src updatable, dst compact @@ -292,13 +266,12 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const { template<typename A> HllSketchImpl<A>* CouponList<A>::couponUpdate(int coupon) { - const int len = 1 << lgCouponArrInts; - for (int i = 0; i < len; ++i) { // search for empty slot - const int couponAtIdx = couponIntArr[i]; + for (size_t i = 0; i < coupons.size(); ++i) { // search for empty slot + const int couponAtIdx = coupons[i]; if (couponAtIdx == HllUtil<A>::EMPTY) { - couponIntArr[i] = coupon; // the actual update + coupons[i] = coupon; // the actual update ++couponCount; - if (couponCount >= len) { // array full + if (couponCount == static_cast<int>(coupons.size())) { // array full if (this->lgConfigK < 8) { return promoteHeapListOrSetToHll(*this); } @@ -348,7 +321,7 @@ bool CouponList<A>::isEmpty() const { return getCouponCount() == 0; } template<typename A> int CouponList<A>::getUpdatableSerializationBytes() const { - return getMemDataStart() + (4 << getLgCouponArrInts()); + return getMemDataStart() + coupons.size() * sizeof(int); } template<typename A> @@ -383,13 +356,8 @@ void CouponList<A>::putOutOfOrderFlag(bool oooFlag) { } template<typename A> -int CouponList<A>::getLgCouponArrInts() const { - return lgCouponArrInts; -} - -template<typename A> -int* CouponList<A>::getCouponIntArr() const { - return couponIntArr; +A CouponList<A>::getAllocator() const { + return coupons.get_allocator(); } template<typename A> @@ -404,12 +372,12 @@ HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) { template<typename A> coupon_iterator<A> CouponList<A>::begin(bool all) const { - return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 0, all); + return coupon_iterator<A>(coupons.data(), coupons.size(), 0, all); } template<typename A> coupon_iterator<A> CouponList<A>::end() const { - return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 1 << lgCouponArrInts, false); + return coupon_iterator<A>(coupons.data(), coupons.size(), coupons.size(), false); } } diff --git a/hll/include/CouponList.hpp b/hll/include/CouponList.hpp index 063805b..c19569e 100644 --- a/hll/include/CouponList.hpp +++ b/hll/include/CouponList.hpp @@ -30,19 +30,18 @@ namespace datasketches { template<typename A> class HllSketchImplFactory; -template<typename A = std::allocator<char>> +template<typename A> class CouponList : public HllSketchImpl<A> { public: - explicit CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode); - explicit CouponList(const CouponList& that); - explicit CouponList(const CouponList& that, target_hll_type tgtHllType); + CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator); + CouponList(const CouponList& that, target_hll_type tgtHllType); - static CouponList* newList(const void* bytes, size_t len); - static CouponList* newList(std::istream& is); + static CouponList* newList(const void* bytes, size_t len, const A& allocator); + static CouponList* newList(std::istream& is, const A& allocator); virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const; virtual void serialize(std::ostream& os, bool compact) const; - virtual ~CouponList(); + virtual ~CouponList() = default; virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const; virtual CouponList* copy() const; @@ -62,7 +61,9 @@ class CouponList : public HllSketchImpl<A> { coupon_iterator<A> end() const; protected: - typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc; + using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>; + + using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>; HllSketchImpl<A>* promoteHeapListToSet(CouponList& list); HllSketchImpl<A>* promoteHeapListOrSetToHll(CouponList& src); @@ -75,13 +76,11 @@ class CouponList : public HllSketchImpl<A> { virtual bool isOutOfOrderFlag() const; virtual void putOutOfOrderFlag(bool oooFlag); - virtual int getLgCouponArrInts() const; - virtual int* getCouponIntArr() const; + virtual A getAllocator() const; - int lgCouponArrInts; int couponCount; bool oooFlag; - int* couponIntArr; + vector_int coupons; friend class HllSketchImplFactory<A>; }; diff --git a/hll/include/CubicInterpolation.hpp b/hll/include/CubicInterpolation.hpp index b9cdfe7..58fb7d7 100644 --- a/hll/include/CubicInterpolation.hpp +++ b/hll/include/CubicInterpolation.hpp @@ -24,7 +24,7 @@ namespace datasketches { -template<typename A = std::allocator<char>> +template<typename A = std::allocator<uint8_t>> class CubicInterpolation { public: static double usingXAndYTables(const double xArr[], const double yArr[], @@ -40,4 +40,4 @@ class CubicInterpolation { #include "CubicInterpolation-internal.hpp" -#endif /* _CUBICINTERPOLATION_HPP_ */ \ No newline at end of file +#endif /* _CUBICINTERPOLATION_HPP_ */ diff --git a/hll/include/HarmonicNumbers.hpp b/hll/include/HarmonicNumbers.hpp index 501ce0c..34b830a 100644 --- a/hll/include/HarmonicNumbers.hpp +++ b/hll/include/HarmonicNumbers.hpp @@ -25,7 +25,7 @@ namespace datasketches { -template<typename A = std::allocator<char>> +template<typename A = std::allocator<uint8_t>> class HarmonicNumbers { public: /** @@ -45,4 +45,4 @@ class HarmonicNumbers { #include "HarmonicNumbers-internal.hpp" -#endif /* _HARMONICNUMBERS_HPP_ */ \ No newline at end of file +#endif /* _HARMONICNUMBERS_HPP_ */ diff --git a/hll/include/Hll4Array-internal.hpp b/hll/include/Hll4Array-internal.hpp index 8498bb8..f93014a 100644 --- a/hll/include/Hll4Array-internal.hpp +++ b/hll/include/Hll4Array-internal.hpp @@ -30,13 +30,12 @@ namespace datasketches { template<typename A> -Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize) : - HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize) { +Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize, const A& allocator): +HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize, allocator), +auxHashMap(nullptr) +{ const int numBytes = this->hll4ArrBytes(lgConfigK); - typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc; - this->hllByteArr = uint8Alloc().allocate(numBytes); - std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0); - auxHashMap = nullptr; + this->hllByteArr.resize(numBytes, 0); } template<typename A> @@ -63,17 +62,19 @@ Hll4Array<A>::~Hll4Array() { template<typename A> std::function<void(HllSketchImpl<A>*)> Hll4Array<A>::get_deleter() const { return [](HllSketchImpl<A>* ptr) { - typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc; Hll4Array<A>* hll = static_cast<Hll4Array<A>*>(ptr); + using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>; + Hll4Alloc hll4Alloc(hll->getAllocator()); hll->~Hll4Array(); - hll4Alloc().deallocate(hll, 1); + hll4Alloc.deallocate(hll, 1); }; } template<typename A> Hll4Array<A>* Hll4Array<A>::copy() const { - typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc; - return new (hll4Alloc().allocate(1)) Hll4Array<A>(*this); + using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>; + Hll4Alloc hll4Alloc(this->getAllocator()); + return new (hll4Alloc.allocate(1)) Hll4Array<A>(*this); } template<typename A> @@ -195,7 +196,7 @@ void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) { // added to the exception table putSlot(slotNo, HllUtil<A>::AUX_TOKEN); if (auxHashMap == nullptr) { - auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK); + auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator()); } auxHashMap->mustAdd(slotNo, newVal); } @@ -285,7 +286,7 @@ void Hll4Array<A>::shiftToBiggerCurMin() { } else { //newShiftedVal >= AUX_TOKEN // the former exception remains an exception, so must be added to the newAuxMap if (newAuxMap == nullptr) { - newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK); + newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator()); } newAuxMap->mustAdd(slotNum, oldActualVal); } @@ -315,12 +316,12 @@ void Hll4Array<A>::shiftToBiggerCurMin() { template<typename A> typename HllArray<A>::const_iterator Hll4Array<A>::begin(bool all) const { - return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all); + return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all); } template<typename A> typename HllArray<A>::const_iterator Hll4Array<A>::end() const { - return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false); + return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false); } template<typename A> diff --git a/hll/include/Hll4Array.hpp b/hll/include/Hll4Array.hpp index ff56c86..38b2c94 100644 --- a/hll/include/Hll4Array.hpp +++ b/hll/include/Hll4Array.hpp @@ -31,7 +31,7 @@ class Hll4Iterator; template<typename A> class Hll4Array final : public HllArray<A> { public: - explicit Hll4Array(int lgConfigK, bool startFullSize); + explicit Hll4Array(int lgConfigK, bool startFullSize, const A& allocator); explicit Hll4Array(const Hll4Array<A>& that); virtual ~Hll4Array(); diff --git a/hll/include/Hll6Array-internal.hpp b/hll/include/Hll6Array-internal.hpp index a318564..e9f6e9f 100644 --- a/hll/include/Hll6Array-internal.hpp +++ b/hll/include/Hll6Array-internal.hpp @@ -27,40 +27,29 @@ namespace datasketches { template<typename A> -Hll6Array<A>::Hll6Array(const int lgConfigK, const bool startFullSize) : - HllArray<A>(lgConfigK, target_hll_type::HLL_6, startFullSize) { - const int numBytes = this->hll6ArrBytes(lgConfigK); - typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc; - this->hllByteArr = uint8Alloc().allocate(numBytes); - std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0); -} - -template<typename A> -Hll6Array<A>::Hll6Array(const Hll6Array<A>& that) : - HllArray<A>(that) +Hll6Array<A>::Hll6Array(const int lgConfigK, const bool startFullSize, const A& allocator): +HllArray<A>(lgConfigK, target_hll_type::HLL_6, startFullSize, allocator) { - // can determine hllByteArr size in parent class, no need to allocate here -} - -template<typename A> -Hll6Array<A>::~Hll6Array() { - // hllByteArr deleted in parent + const int numBytes = this->hll6ArrBytes(lgConfigK); + this->hllByteArr.resize(numBytes, 0); } template<typename A> std::function<void(HllSketchImpl<A>*)> Hll6Array<A>::get_deleter() const { return [](HllSketchImpl<A>* ptr) { - typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc; + using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>; Hll6Array<A>* hll = static_cast<Hll6Array<A>*>(ptr); + Hll6Alloc hll6Alloc(hll->getAllocator()); hll->~Hll6Array(); - hll6Alloc().deallocate(hll, 1); + hll6Alloc.deallocate(hll, 1); }; } template<typename A> Hll6Array<A>* Hll6Array<A>::copy() const { - typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc; - return new (hll6Alloc().allocate(1)) Hll6Array<A>(*this); + using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>; + Hll6Alloc hll6Alloc(this->getAllocator()); + return new (hll6Alloc.allocate(1)) Hll6Array<A>(*this); } template<typename A> diff --git a/hll/include/Hll6Array.hpp b/hll/include/Hll6Array.hpp index 5178de8..03370b2 100644 --- a/hll/include/Hll6Array.hpp +++ b/hll/include/Hll6Array.hpp @@ -30,10 +30,9 @@ class Hll6Iterator; template<typename A> class Hll6Array final : public HllArray<A> { public: - explicit Hll6Array(int lgConfigK, bool startFullSize); - explicit Hll6Array(const Hll6Array<A>& that); + Hll6Array(int lgConfigK, bool startFullSize, const A& allocator); - virtual ~Hll6Array(); + virtual ~Hll6Array() = default; virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const; virtual Hll6Array* copy() const; diff --git a/hll/include/Hll8Array-internal.hpp b/hll/include/Hll8Array-internal.hpp index cb14a0f..f27a796 100644 --- a/hll/include/Hll8Array-internal.hpp +++ b/hll/include/Hll8Array-internal.hpp @@ -25,40 +25,29 @@ namespace datasketches { template<typename A> -Hll8Array<A>::Hll8Array(const int lgConfigK, const bool startFullSize) : - HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize) { - const int numBytes = this->hll8ArrBytes(lgConfigK); - typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc; - this->hllByteArr = uint8Alloc().allocate(numBytes); - std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0); -} - -template<typename A> -Hll8Array<A>::Hll8Array(const Hll8Array<A>& that) : - HllArray<A>(that) +Hll8Array<A>::Hll8Array(const int lgConfigK, const bool startFullSize, const A& allocator): +HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize, allocator) { - // can determine hllByteArr size in parent class, no need to allocate here -} - -template<typename A> -Hll8Array<A>::~Hll8Array() { - // hllByteArr deleted in parent + const int numBytes = this->hll8ArrBytes(lgConfigK); + this->hllByteArr.resize(numBytes, 0); } template<typename A> std::function<void(HllSketchImpl<A>*)> Hll8Array<A>::get_deleter() const { return [](HllSketchImpl<A>* ptr) { - typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc; Hll8Array<A>* hll = static_cast<Hll8Array<A>*>(ptr); + using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>; + Hll8Alloc hll8Alloc(hll->getAllocator()); hll->~Hll8Array(); - hll8Alloc().deallocate(hll, 1); + hll8Alloc.deallocate(hll, 1); }; } template<typename A> Hll8Array<A>* Hll8Array<A>::copy() const { - typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc; - return new (hll8Alloc().allocate(1)) Hll8Array<A>(*this); + using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>; + Hll8Alloc hll8Alloc(this->getAllocator()); + return new (hll8Alloc.allocate(1)) Hll8Array<A>(*this); } template<typename A> diff --git a/hll/include/Hll8Array.hpp b/hll/include/Hll8Array.hpp index 2b0aefc..ea9a5bd 100644 --- a/hll/include/Hll8Array.hpp +++ b/hll/include/Hll8Array.hpp @@ -30,10 +30,9 @@ class Hll8Iterator; template<typename A> class Hll8Array final : public HllArray<A> { public: - explicit Hll8Array(int lgConfigK, bool startFullSize); - explicit Hll8Array(const Hll8Array& that); + Hll8Array(int lgConfigK, bool startFullSize, const A& allocator); - virtual ~Hll8Array(); + virtual ~Hll8Array() = default; virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const; virtual Hll8Array<A>* copy() const; diff --git a/hll/include/HllArray-internal.hpp b/hll/include/HllArray-internal.hpp index 0a4bdce..4479417 100644 --- a/hll/include/HllArray-internal.hpp +++ b/hll/include/HllArray-internal.hpp @@ -35,48 +35,16 @@ namespace datasketches { template<typename A> -HllArray<A>::HllArray(const int lgConfigK, const target_hll_type tgtHllType, bool startFullSize) - : HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize) { - hipAccum = 0.0; - kxq0 = 1 << lgConfigK; - kxq1 = 0.0; - curMin = 0; - numAtCurMin = 1 << lgConfigK; - oooFlag = false; - hllByteArr = nullptr; // allocated in derived class -} - -template<typename A> -HllArray<A>::HllArray(const HllArray<A>& that): -HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, hll_mode::HLL, that.startFullSize), -hipAccum(that.hipAccum), -kxq0(that.kxq0), -kxq1(that.kxq1), -hllByteArr(nullptr), -curMin(that.curMin), -numAtCurMin(that.numAtCurMin), -oooFlag(that.oooFlag) -{ - const int arrayLen = that.getHllByteArrBytes(); - typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc; - hllByteArr = uint8Alloc().allocate(arrayLen); - std::copy(that.hllByteArr, that.hllByteArr + arrayLen, hllByteArr); -} - -template<typename A> -HllArray<A>::~HllArray() { - // need to determine number of bytes to deallocate - int hllArrBytes = 0; - if (this->tgtHllType == target_hll_type::HLL_4) { - hllArrBytes = hll4ArrBytes(this->lgConfigK); - } else if (this->tgtHllType == target_hll_type::HLL_6) { - hllArrBytes = hll6ArrBytes(this->lgConfigK); - } else { // tgtHllType == HLL_8 - hllArrBytes = hll8ArrBytes(this->lgConfigK); - } - typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc; - uint8Alloc().deallocate(hllByteArr, hllArrBytes); -} +HllArray<A>::HllArray(const int lgConfigK, const target_hll_type tgtHllType, bool startFullSize, const A& allocator): +HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize), +hipAccum(0.0), +kxq0(1 << lgConfigK), +kxq1(0.0), +hllByteArr(allocator), +curMin(0), +numAtCurMin(1 << lgConfigK), +oooFlag(false) +{} template<typename A> HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const { @@ -93,7 +61,7 @@ HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const { } template<typename A> -HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) { +HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len, const A& allocator) { if (len < HllUtil<A>::HLL_BYTE_ARR_START) { throw std::out_of_range("Input data length insufficient to hold HLL array"); } @@ -143,11 +111,11 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) { int auxLgIntArrSize = (int) data[4]; const size_t offset = HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes; const uint8_t* auxDataStart = data + offset; - auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag); + auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag, allocator); aux_ptr = aux_hash_map_ptr(auxHashMap, auxHashMap->make_deleter()); } - HllArray<A>* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag); + HllArray<A>* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag, allocator); sketch->putCurMin(curMin); sketch->putOutOfOrderFlag(oooFlag); if (!oooFlag) sketch->putHipAccum(hip); @@ -155,7 +123,7 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) { sketch->putKxQ1(kxq1); sketch->putNumAtCurMin(numAtCurMin); - std::memcpy(sketch->hllByteArr, data + HllUtil<A>::HLL_BYTE_ARR_START, arrayBytes); + std::memcpy(sketch->hllByteArr.data(), data + HllUtil<A>::HLL_BYTE_ARR_START, arrayBytes); if (auxHashMap != nullptr) ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap); @@ -165,7 +133,7 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) { } template<typename A> -HllArray<A>* HllArray<A>::newHll(std::istream& is) { +HllArray<A>* HllArray<A>::newHll(std::istream& is, const A& allocator) { uint8_t listHeader[8]; is.read((char*)listHeader, 8 * sizeof(uint8_t)); @@ -192,7 +160,7 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is) { const int lgK = (int) listHeader[HllUtil<A>::LG_K_BYTE]; const int curMin = (int) listHeader[HllUtil<A>::HLL_CUR_MIN_BYTE]; - HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag); + HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag, allocator); typedef std::unique_ptr<HllArray<A>, std::function<void(HllSketchImpl<A>*)>> hll_array_ptr; hll_array_ptr sketch_ptr(sketch, sketch->get_deleter()); sketch->putCurMin(curMin); @@ -211,11 +179,11 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is) { is.read((char*)&auxCount, sizeof(auxCount)); sketch->putNumAtCurMin(numAtCurMin); - is.read((char*)sketch->hllByteArr, sketch->getHllByteArrBytes()); + is.read((char*)sketch->hllByteArr.data(), sketch->getHllByteArrBytes()); if (auxCount > 0) { // necessarily TgtHllType == HLL_4 int auxLgIntArrSize = listHeader[4]; - AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag); + AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag, allocator); ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap); } @@ -228,7 +196,7 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is) { template<typename A> vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) const { const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes; - vector_u8<A> byteArr(sketchSizeBytes); + vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator()); uint8_t* bytes = byteArr.data() + header_size_bytes; AuxHashMap<A>* auxHashMap = getAuxHashMap(); @@ -249,7 +217,7 @@ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) co std::memcpy(bytes + HllUtil<A>::AUX_COUNT_INT, &auxCount, sizeof(int)); const int hllByteArrBytes = getHllByteArrBytes(); - std::memcpy(bytes + getMemDataStart(), hllByteArr, hllByteArrBytes); + std::memcpy(bytes + getMemDataStart(), hllByteArr.data(), hllByteArrBytes); // aux map if HLL_4 if (this->tgtHllType == HLL_4) { @@ -309,7 +277,7 @@ void HllArray<A>::serialize(std::ostream& os, const bool compact) const { const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount()); os.write((char*)&auxCount, sizeof(auxCount)); - os.write((char*)hllByteArr, getHllByteArrBytes()); + os.write((char*)hllByteArr.data(), getHllByteArrBytes()); // aux map if HLL_4 if (this->tgtHllType == HLL_4) { @@ -639,12 +607,12 @@ double HllArray<A>::getHllRawEstimate(const int lgConfigK, const double kxqSum) template<typename A> typename HllArray<A>::const_iterator HllArray<A>::begin(bool all) const { - return const_iterator(hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, nullptr, 0, all); + return const_iterator(hllByteArr.data(), 1 << this->lgConfigK, 0, this->tgtHllType, nullptr, 0, all); } template<typename A> typename HllArray<A>::const_iterator HllArray<A>::end() const { - return const_iterator(hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, nullptr, 0, false); + return const_iterator(hllByteArr.data(), 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, nullptr, 0, false); } template<typename A> @@ -701,6 +669,11 @@ uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, size_t inde return array[index]; } +template<typename A> +A HllArray<A>::getAllocator() const { + return hllByteArr.get_allocator(); +} + } #endif // _HLLARRAY_INTERNAL_HPP_ diff --git a/hll/include/HllArray.hpp b/hll/include/HllArray.hpp index 1cc64ea..e7be8c1 100644 --- a/hll/include/HllArray.hpp +++ b/hll/include/HllArray.hpp @@ -28,19 +28,18 @@ namespace datasketches { template<typename A> class AuxHashMap; -template<typename A = std::allocator<char>> +template<typename A> class HllArray : public HllSketchImpl<A> { public: - explicit HllArray(int lgConfigK, target_hll_type tgtHllType, bool startFullSize); - explicit HllArray(const HllArray<A>& that); + HllArray(int lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator); - static HllArray* newHll(const void* bytes, size_t len); - static HllArray* newHll(std::istream& is); + static HllArray* newHll(const void* bytes, size_t len, const A& allocator); + static HllArray* newHll(std::istream& is, const A& allocator); virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const; virtual void serialize(std::ostream& os, bool compact) const; - virtual ~HllArray(); + virtual ~HllArray() = default; virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const = 0; virtual HllArray* copy() const = 0; @@ -95,6 +94,8 @@ class HllArray : public HllSketchImpl<A> { virtual const_iterator begin(bool all = false) const; virtual const_iterator end() const; + virtual A getAllocator() const; + protected: void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue); double getHllBitMapEstimate(int lgConfigK, int curMin, int numAtCurMin) const; @@ -103,7 +104,7 @@ class HllArray : public HllSketchImpl<A> { double hipAccum; double kxq0; double kxq1; - uint8_t* hllByteArr; //init by sub-classes + vector_u8<A> hllByteArr; //init by sub-classes int curMin; //always zero for Hll6 and Hll8, only tracked by Hll4Array int numAtCurMin; //interpreted as num zeros when curMin == 0 bool oooFlag; //Out-Of-Order Flag @@ -115,7 +116,6 @@ template<typename A> class HllArray<A>::const_iterator: public std::iterator<std::input_iterator_tag, uint32_t> { public: const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all); - //const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset); const_iterator& operator++(); bool operator!=(const const_iterator& other) const; uint32_t operator*() const; diff --git a/hll/include/HllSketch-internal.hpp b/hll/include/HllSketch-internal.hpp index dd16955..8f7d1f4 100644 --- a/hll/include/HllSketch-internal.hpp +++ b/hll/include/HllSketch-internal.hpp @@ -42,28 +42,26 @@ typedef union { } longDoubleUnion; template<typename A> -hll_sketch_alloc<A>::hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type, bool start_full_size) { +hll_sketch_alloc<A>::hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type, bool start_full_size, const A& allocator) { HllUtil<A>::checkLgK(lg_config_k); if (start_full_size) { - sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size); + sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size, allocator); } else { typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc; - sketch_impl = new (clAlloc().allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST); + sketch_impl = new (clAlloc(allocator).allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST, allocator); } } template<typename A> -hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is) { - HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is); - hll_sketch_alloc<A> sketch(impl); - return sketch; +hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is, const A& allocator) { + HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is, allocator); + return hll_sketch_alloc<A>(impl); } template<typename A> -hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len) { - HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len); - hll_sketch_alloc<A> sketch(impl); - return sketch; +hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len, const A& allocator) { + HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len, allocator); + return hll_sketch_alloc<A>(impl); } template<typename A> diff --git a/hll/include/HllSketchImpl.hpp b/hll/include/HllSketchImpl.hpp index 82180b4..9f53705 100644 --- a/hll/include/HllSketchImpl.hpp +++ b/hll/include/HllSketchImpl.hpp @@ -27,7 +27,7 @@ namespace datasketches { -template<typename A = std::allocator<char>> +template<typename A> class HllSketchImpl { public: HllSketchImpl(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize); @@ -66,6 +66,7 @@ class HllSketchImpl { virtual bool isEmpty() const = 0; virtual bool isOutOfOrderFlag() const = 0; virtual void putOutOfOrderFlag(bool oooFlag) = 0; + virtual A getAllocator() const = 0; bool isStartFullSize() const; protected: diff --git a/hll/include/HllSketchImplFactory.hpp b/hll/include/HllSketchImplFactory.hpp index eb8dd77..85f9618 100644 --- a/hll/include/HllSketchImplFactory.hpp +++ b/hll/include/HllSketchImplFactory.hpp @@ -31,15 +31,15 @@ namespace datasketches { -template<typename A = std::allocator<char>> +template<typename A> class HllSketchImplFactory final { public: - static HllSketchImpl<A>* deserialize(std::istream& os); - static HllSketchImpl<A>* deserialize(const void* bytes, size_t len); + static HllSketchImpl<A>* deserialize(std::istream& os, const A& allocator); + static HllSketchImpl<A>* deserialize(const void* bytes, size_t len, const A& allocator); static CouponHashSet<A>* promoteListToSet(const CouponList<A>& list); static HllArray<A>* promoteListOrSetToHll(const CouponList<A>& list); - static HllArray<A>* newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize = false); + static HllArray<A>* newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator); // resets the input impl, deleting the input pointer and returning a new pointer static HllSketchImpl<A>* reset(HllSketchImpl<A>* impl, bool startFullSize); @@ -51,8 +51,8 @@ public: template<typename A> CouponHashSet<A>* HllSketchImplFactory<A>::promoteListToSet(const CouponList<A>& list) { - typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc; - CouponHashSet<A>* chSet = new (chsAlloc().allocate(1)) CouponHashSet<A>(list.getLgConfigK(), list.getTgtHllType()); + using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>; + CouponHashSet<A>* chSet = new (ChsAlloc(list.getAllocator()).allocate(1)) CouponHashSet<A>(list.getLgConfigK(), list.getTgtHllType(), list.getAllocator()); for (auto coupon: list) { chSet->couponUpdate(coupon); } @@ -61,7 +61,7 @@ CouponHashSet<A>* HllSketchImplFactory<A>::promoteListToSet(const CouponList<A>& template<typename A> HllArray<A>* HllSketchImplFactory<A>::promoteListOrSetToHll(const CouponList<A>& src) { - HllArray<A>* tgtHllArr = HllSketchImplFactory<A>::newHll(src.getLgConfigK(), src.getTgtHllType()); + HllArray<A>* tgtHllArr = HllSketchImplFactory<A>::newHll(src.getLgConfigK(), src.getTgtHllType(), false, src.getAllocator()); tgtHllArr->putKxQ0(1 << src.getLgConfigK()); for (auto coupon: src) { tgtHllArr->couponUpdate(coupon); @@ -72,48 +72,48 @@ HllArray<A>* HllSketchImplFactory<A>::promoteListOrSetToHll(const CouponList<A>& } template<typename A> -HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(std::istream& is) { +HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(std::istream& is, const A& allocator) { // we'll hand off the sketch based on PreInts so we don't need // to move the stream pointer back and forth -- perhaps somewhat fragile? const int preInts = is.peek(); if (preInts == HllUtil<A>::HLL_PREINTS) { - return HllArray<A>::newHll(is); + return HllArray<A>::newHll(is, allocator); } else if (preInts == HllUtil<A>::HASH_SET_PREINTS) { - return CouponHashSet<A>::newSet(is); + return CouponHashSet<A>::newSet(is, allocator); } else if (preInts == HllUtil<A>::LIST_PREINTS) { - return CouponList<A>::newList(is); + return CouponList<A>::newList(is, allocator); } else { throw std::invalid_argument("Attempt to deserialize unknown object type"); } } template<typename A> -HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(const void* bytes, size_t len) { +HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(const void* bytes, size_t len, const A& allocator) { // read current mode directly const int preInts = static_cast<const uint8_t*>(bytes)[0]; if (preInts == HllUtil<A>::HLL_PREINTS) { - return HllArray<A>::newHll(bytes, len); + return HllArray<A>::newHll(bytes, len, allocator); } else if (preInts == HllUtil<A>::HASH_SET_PREINTS) { - return CouponHashSet<A>::newSet(bytes, len); + return CouponHashSet<A>::newSet(bytes, len, allocator); } else if (preInts == HllUtil<A>::LIST_PREINTS) { - return CouponList<A>::newList(bytes, len); + return CouponList<A>::newList(bytes, len, allocator); } else { throw std::invalid_argument("Attempt to deserialize unknown object type"); } } template<typename A> -HllArray<A>* HllSketchImplFactory<A>::newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize) { +HllArray<A>* HllSketchImplFactory<A>::newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator) { switch (tgtHllType) { case HLL_8: - typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc; - return new (hll8Alloc().allocate(1)) Hll8Array<A>(lgConfigK, startFullSize); + using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>; + return new (Hll8Alloc(allocator).allocate(1)) Hll8Array<A>(lgConfigK, startFullSize, allocator); case HLL_6: - typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc; - return new (hll6Alloc().allocate(1)) Hll6Array<A>(lgConfigK, startFullSize); + using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>; + return new (Hll6Alloc(allocator).allocate(1)) Hll6Array<A>(lgConfigK, startFullSize, allocator); case HLL_4: - typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc; - return new (hll4Alloc().allocate(1)) Hll4Array<A>(lgConfigK, startFullSize); + using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>; + return new (Hll4Alloc(allocator).allocate(1)) Hll4Array<A>(lgConfigK, startFullSize, allocator); } throw std::logic_error("Invalid target_hll_type"); } @@ -121,12 +121,12 @@ HllArray<A>* HllSketchImplFactory<A>::newHll(int lgConfigK, target_hll_type tgtH template<typename A> HllSketchImpl<A>* HllSketchImplFactory<A>::reset(HllSketchImpl<A>* impl, bool startFullSize) { if (startFullSize) { - HllArray<A>* hll = newHll(impl->getLgConfigK(), impl->getTgtHllType(), startFullSize); + HllArray<A>* hll = newHll(impl->getLgConfigK(), impl->getTgtHllType(), startFullSize, impl->getAllocator()); impl->get_deleter()(impl); return hll; } else { - typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc; - CouponList<A>* cl = new (clAlloc().allocate(1)) CouponList<A>(impl->getLgConfigK(), impl->getTgtHllType(), hll_mode::LIST); + using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>; + CouponList<A>* cl = new (ClAlloc(impl->getAllocator()).allocate(1)) CouponList<A>(impl->getLgConfigK(), impl->getTgtHllType(), hll_mode::LIST, impl->getAllocator()); impl->get_deleter()(impl); return cl; } @@ -135,8 +135,9 @@ HllSketchImpl<A>* HllSketchImplFactory<A>::reset(HllSketchImpl<A>* impl, bool st template<typename A> Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllArr) { const int lgConfigK = srcHllArr.getLgConfigK(); - typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc; - Hll4Array<A>* hll4Array = new (hll4Alloc().allocate(1)) Hll4Array<A>(lgConfigK, srcHllArr.isStartFullSize()); + using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>; + Hll4Array<A>* hll4Array = new (Hll4Alloc(srcHllArr.getAllocator()).allocate(1)) + Hll4Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator()); hll4Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag()); hll4Array->mergeHll(srcHllArr); hll4Array->putHipAccum(srcHllArr.getHipAccum()); @@ -146,8 +147,9 @@ Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllAr template<typename A> Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllArr) { const int lgConfigK = srcHllArr.getLgConfigK(); - typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc; - Hll6Array<A>* hll6Array = new (hll6Alloc().allocate(1)) Hll6Array<A>(lgConfigK, srcHllArr.isStartFullSize()); + using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>; + Hll6Array<A>* hll6Array = new (Hll6Alloc(srcHllArr.getAllocator()).allocate(1)) + Hll6Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator()); hll6Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag()); hll6Array->mergeHll(srcHllArr); hll6Array->putHipAccum(srcHllArr.getHipAccum()); @@ -157,8 +159,9 @@ Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllAr template<typename A> Hll8Array<A>* HllSketchImplFactory<A>::convertToHll8(const HllArray<A>& srcHllArr) { const int lgConfigK = srcHllArr.getLgConfigK(); - typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc; - Hll8Array<A>* hll8Array = new (hll8Alloc().allocate(1)) Hll8Array<A>(lgConfigK, srcHllArr.isStartFullSize()); + using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>; + Hll8Array<A>* hll8Array = new (Hll8Alloc(srcHllArr.getAllocator()).allocate(1)) + Hll8Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator()); hll8Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag()); hll8Array->mergeHll(srcHllArr); hll8Array->putHipAccum(srcHllArr.getHipAccum()); diff --git a/hll/include/HllUnion-internal.hpp b/hll/include/HllUnion-internal.hpp index 0d12fd3..1e99906 100644 --- a/hll/include/HllUnion-internal.hpp +++ b/hll/include/HllUnion-internal.hpp @@ -32,9 +32,9 @@ namespace datasketches { template<typename A> -hll_union_alloc<A>::hll_union_alloc(const int lg_max_k): +hll_union_alloc<A>::hll_union_alloc(const int lg_max_k, const A& allocator): lg_max_k(HllUtil<A>::checkLgK(lg_max_k)), - gadget(lg_max_k, target_hll_type::HLL_8) + gadget(lg_max_k, target_hll_type::HLL_8, false, allocator) {} template<typename A> @@ -226,7 +226,7 @@ HllSketchImpl<A>* hll_union_alloc<A>::copy_or_downsample(const HllSketchImpl<A>* return src->copyAs(HLL_8); } typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc; - Hll8Array<A>* tgtHllArr = new (hll8Alloc().allocate(1)) Hll8Array<A>(tgt_lg_k, false); + Hll8Array<A>* tgtHllArr = new (hll8Alloc(src->getAllocator()).allocate(1)) Hll8Array<A>(tgt_lg_k, false, src->getAllocator()); tgtHllArr->mergeHll(*src); //both of these are required for isomorphism tgtHllArr->putHipAccum(src->getHipAccum()); diff --git a/hll/include/HllUtil.hpp b/hll/include/HllUtil.hpp index ec0ddf2..3a1ebe2 100644 --- a/hll/include/HllUtil.hpp +++ b/hll/include/HllUtil.hpp @@ -36,7 +36,7 @@ enum hll_mode { LIST = 0, SET, HLL }; // template provides internal consistency and allows static float values // but we don't use the template parameter anywhere -template<typename A = std::allocator<char> > +template<typename A = std::allocator<uint8_t> > class HllUtil final { public: // preamble stuff diff --git a/hll/include/RelativeErrorTables.hpp b/hll/include/RelativeErrorTables.hpp index da8bebf..5e0a3c7 100644 --- a/hll/include/RelativeErrorTables.hpp +++ b/hll/include/RelativeErrorTables.hpp @@ -24,7 +24,7 @@ namespace datasketches { -template<typename A = std::allocator<char>> +template<typename A = std::allocator<uint8_t>> class RelativeErrorTables { public: /** diff --git a/hll/include/hll.hpp b/hll/include/hll.hpp index 3898dda..07f9257 100644 --- a/hll/include/hll.hpp +++ b/hll/include/hll.hpp @@ -108,7 +108,7 @@ class hll_union_alloc; template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>; template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>; -template<typename A = std::allocator<char> > +template<typename A = std::allocator<uint8_t> > class hll_sketch_alloc final { public: /** @@ -119,7 +119,7 @@ class hll_sketch_alloc final { * keeping memory use constant (if HLL_6 or HLL_8) at the cost of * starting out using much more memory */ - explicit hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false); + explicit hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A()); /** * Copy constructor @@ -140,14 +140,14 @@ class hll_sketch_alloc final { * Reconstructs a sketch from a serialized image on a stream. * @param is An input stream with a binary image of a sketch */ - static hll_sketch_alloc deserialize(std::istream& is); + static hll_sketch_alloc deserialize(std::istream& is, const A& allocator = A()); /** * Reconstructs a sketch from a serialized image in a byte array. * @param is bytes An input array with a binary image of a sketch * @param len Length of the input array, in bytes */ - static hll_sketch_alloc deserialize(const void* bytes, size_t len); + static hll_sketch_alloc deserialize(const void* bytes, size_t len, const A& allocator = A()); //! Class destructor virtual ~hll_sketch_alloc(); @@ -423,7 +423,7 @@ class hll_sketch_alloc final { * author Kevin Lang */ -template<typename A = std::allocator<char> > +template<typename A = std::allocator<uint8_t> > class hll_union_alloc { public: /** @@ -431,7 +431,7 @@ class hll_union_alloc { * @param lg_max_k The maximum size, in log2, of k. The value must * be between 7 and 21, inclusive. */ - explicit hll_union_alloc(int lg_max_k); + explicit hll_union_alloc(int lg_max_k, const A& allocator = A()); /** * Returns the current cardinality estimate diff --git a/hll/test/AuxHashMapTest.cpp b/hll/test/AuxHashMapTest.cpp index 827f4ad..ed1b380 100644 --- a/hll/test/AuxHashMapTest.cpp +++ b/hll/test/AuxHashMapTest.cpp @@ -25,7 +25,7 @@ namespace datasketches { TEST_CASE("aux hash map: check must replace", "[aux_hash_map]") { - AuxHashMap<>* map = new AuxHashMap<>(3, 7); + AuxHashMap<std::allocator<uint8_t>>* map = new AuxHashMap<std::allocator<uint8_t>>(3, 7, std::allocator<uint8_t>()); map->mustAdd(100, 5); int val = map->mustFindValueFor(100); REQUIRE(val == 5); @@ -40,9 +40,9 @@ TEST_CASE("aux hash map: check must replace", "[aux_hash_map]") { } TEST_CASE("aux hash map: check grow space", "[aux_hash_map]") { - auto map = std::unique_ptr<AuxHashMap<>, std::function<void(AuxHashMap<>*)>>( - AuxHashMap<>::newAuxHashMap(3, 7), - AuxHashMap<>::make_deleter() + auto map = std::unique_ptr<AuxHashMap<std::allocator<uint8_t>>, std::function<void(AuxHashMap<std::allocator<uint8_t>>*)>>( + AuxHashMap<std::allocator<uint8_t>>::newAuxHashMap(3, 7, std::allocator<uint8_t>()), + AuxHashMap<std::allocator<uint8_t>>::make_deleter() ); REQUIRE(map->getLgAuxArrInts() == 3); for (int i = 1; i <= 7; ++i) { @@ -63,17 +63,17 @@ TEST_CASE("aux hash map: check grow space", "[aux_hash_map]") { } TEST_CASE("aux hash map: check exception must find value for", "[aux_hash_map]") { - AuxHashMap<> map(3, 7); + AuxHashMap<std::allocator<uint8_t>> map(3, 7, std::allocator<uint8_t>()); map.mustAdd(100, 5); REQUIRE_THROWS_AS(map.mustFindValueFor(101), std::invalid_argument); } TEST_CASE("aux hash map: check exception must add", "[aux_hash_map]") { - AuxHashMap<>* map = AuxHashMap<>::newAuxHashMap(3, 7); + AuxHashMap<std::allocator<uint8_t>>* map = AuxHashMap<std::allocator<uint8_t>>::newAuxHashMap(3, 7, std::allocator<uint8_t>()); map->mustAdd(100, 5); REQUIRE_THROWS_AS(map->mustAdd(100, 6), std::invalid_argument); - AuxHashMap<>::make_deleter()(map); + AuxHashMap<std::allocator<uint8_t>>::make_deleter()(map); } } /* namespace datasketches */ diff --git a/hll/test/CouponHashSetTest.cpp b/hll/test/CouponHashSetTest.cpp index 8e8b873..ebadd8f 100644 --- a/hll/test/CouponHashSetTest.cpp +++ b/hll/test/CouponHashSetTest.cpp @@ -43,7 +43,7 @@ TEST_CASE("coupon hash set: check corrupt bytearray", "[coupon_hash_set]") { // fail in HllSketchImpl REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument); // fail in CouponHashSet - REQUIRE_THROWS_AS(CouponHashSet<>::newSet(bytes, size), std::invalid_argument); + REQUIRE_THROWS_AS(CouponHashSet<std::allocator<uint8_t>>::newSet(bytes, size, std::allocator<uint8_t>()), std::invalid_argument); bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = HllUtil<>::HASH_SET_PREINTS; bytes[HllUtil<>::SER_VER_BYTE] = 0; @@ -88,7 +88,7 @@ TEST_CASE("coupon hash set: check corrupt stream", "[coupon_hash_set]") { // fail in HllSketchImpl REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument); // fail in CouponHashSet - REQUIRE_THROWS_AS(CouponHashSet<>::newSet(ss), std::invalid_argument); + REQUIRE_THROWS_AS(CouponHashSet<std::allocator<uint8_t>>::newSet(ss, std::allocator<uint8_t>()), std::invalid_argument); ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE); ss.put(HllUtil<>::HASH_SET_PREINTS); diff --git a/hll/test/CouponListTest.cpp b/hll/test/CouponListTest.cpp index ac48d52..380b60d 100644 --- a/hll/test/CouponListTest.cpp +++ b/hll/test/CouponListTest.cpp @@ -36,7 +36,7 @@ void println_string(std::string str) { TEST_CASE("coupon list: check iterator", "[coupon_list]") { int lgConfigK = 8; - CouponList<> cl(lgConfigK, HLL_4, LIST); + CouponList<std::allocator<uint8_t>> cl(lgConfigK, HLL_4, LIST, std::allocator<uint8_t>()); for (int i = 1; i <= 7; ++i) { cl.couponUpdate(HllUtil<>::pair(i, i)); } // not hashes but distinct values const int mask = (1 << lgConfigK) - 1; int idx = 0; @@ -120,7 +120,7 @@ TEST_CASE("coupon list: check corrupt bytearray data", "[coupon_list]") { bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = 0; REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument); - REQUIRE_THROWS_AS(CouponList<>::newList(bytes, size), std::invalid_argument); + REQUIRE_THROWS_AS(CouponList<std::allocator<uint8_t>>::newList(bytes, size, std::allocator<uint8_t>()), std::invalid_argument); bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = HllUtil<>::LIST_PREINTS; @@ -154,7 +154,7 @@ TEST_CASE("coupon list: check corrupt stream data", "[coupon_list]") { ss.put(0); ss.seekg(0); REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument); - REQUIRE_THROWS_AS(CouponList<>::newList(ss), std::invalid_argument); + REQUIRE_THROWS_AS(CouponList<std::allocator<uint8_t>>::newList(ss, std::allocator<uint8_t>()), std::invalid_argument); ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE); ss.put(HllUtil<>::LIST_PREINTS); diff --git a/hll/test/HllArrayTest.cpp b/hll/test/HllArrayTest.cpp index 8c9efa2..f067dbb 100644 --- a/hll/test/HllArrayTest.cpp +++ b/hll/test/HllArrayTest.cpp @@ -111,7 +111,7 @@ TEST_CASE("hll array: check corrupt bytearray", "[hll_array]") { bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = 0; REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument); - REQUIRE_THROWS_AS(HllArray<>::newHll(bytes, size), std::invalid_argument); + REQUIRE_THROWS_AS(HllArray<std::allocator<uint8_t>>::newHll(bytes, size, std::allocator<uint8_t>()), std::invalid_argument); bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = HllUtil<>::HLL_PREINTS; bytes[HllUtil<>::SER_VER_BYTE] = 0; @@ -150,7 +150,7 @@ TEST_CASE("hll array: check corrupt stream", "[hll_array]") { ss.put(0); ss.seekg(0); REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument); - REQUIRE_THROWS_AS(HllArray<>::newHll(ss), std::invalid_argument); + REQUIRE_THROWS_AS(HllArray<std::allocator<uint8_t>>::newHll(ss, std::allocator<uint8_t>()), std::invalid_argument); ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE); ss.put(HllUtil<>::HLL_PREINTS); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
