This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch hll_merge_speed
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit 067f645f8f8b7471ad5e651131a89c546909e1e3
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Wed Mar 22 22:37:38 2023 -0700

    HLL merge speed improvement
---
 hll/include/Hll4Array-internal.hpp |  5 ++--
 hll/include/Hll4Array.hpp          |  5 +---
 hll/include/Hll8Array-internal.hpp | 53 ++++++++++++++++++--------------------
 hll/include/Hll8Array.hpp          |  1 +
 hll/include/HllArray-internal.hpp  |  5 ++++
 hll/include/HllArray.hpp           |  2 ++
 6 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/hll/include/Hll4Array-internal.hpp 
b/hll/include/Hll4Array-internal.hpp
index 8e385ec..1f3d693 100644
--- a/hll/include/Hll4Array-internal.hpp
+++ b/hll/include/Hll4Array-internal.hpp
@@ -114,10 +114,9 @@ uint8_t Hll4Array<A>::getSlot(uint32_t slotNo) const {
 }
 
 template<typename A>
-uint8_t Hll4Array<A>::get_value(uint32_t index) const {
-  const uint8_t value = getSlot(index);
+uint8_t Hll4Array<A>::adjustRawValue(uint32_t slot, uint8_t value) const {
   if (value != hll_constants::AUX_TOKEN) return value + this->curMin_;
-  return auxHashMap_->mustFindValueFor(index);
+  return auxHashMap_->mustFindValueFor(slot);
 }
 
 template<typename A>
diff --git a/hll/include/Hll4Array.hpp b/hll/include/Hll4Array.hpp
index 4542dec..b310baa 100644
--- a/hll/include/Hll4Array.hpp
+++ b/hll/include/Hll4Array.hpp
@@ -25,9 +25,6 @@
 
 namespace datasketches {
 
-template<typename A>
-class Hll4Iterator;
-
 template<typename A>
 class Hll4Array final : public HllArray<A> {
   public:
@@ -41,7 +38,7 @@ class Hll4Array final : public HllArray<A> {
 
     inline uint8_t getSlot(uint32_t slotNo) const;
     inline void putSlot(uint32_t slotNo, uint8_t value);
-    inline uint8_t get_value(uint32_t index) const;
+    inline uint8_t adjustRawValue(uint32_t index, uint8_t value) const;
 
     virtual uint32_t getUpdatableSerializationBytes() const;
     virtual uint32_t getHllByteArrBytes() const;
diff --git a/hll/include/Hll8Array-internal.hpp 
b/hll/include/Hll8Array-internal.hpp
index b797359..5340a28 100644
--- a/hll/include/Hll8Array-internal.hpp
+++ b/hll/include/Hll8Array-internal.hpp
@@ -95,45 +95,42 @@ void Hll8Array<A>::mergeList(const CouponList<A>& src) {
 template<typename A>
 void Hll8Array<A>::mergeHll(const HllArray<A>& src) {
   // at this point src_k >= dst_k
-  const uint32_t src_k = 1 << src.getLgConfigK();
   const uint32_t dst_mask = (1 << this->getLgConfigK()) - 1;
-  // duplication below is to avoid a virtual method call in a loop
+  // special treatment below to optimize performance
+  // in particular to avoid a virtual method call in a loop
   if (src.getTgtHllType() == target_hll_type::HLL_8) {
-    for (uint32_t i = 0; i < src_k; i++) {
-      const uint8_t new_v = static_cast<const Hll8Array<A>&>(src).getSlot(i);
-      const uint32_t j = i & dst_mask;
-      const uint8_t old_v = this->hllByteArr_[j];
-      if (new_v > old_v) {
-        this->hllByteArr_[j] = new_v;
-        this->hipAndKxQIncrementalUpdate(old_v, new_v);
-        this->numAtCurMin_ -= old_v == 0;
-      }
+    uint32_t i = 0;
+    for (const auto value: src.getHllArray()) {
+      processValue(i++, dst_mask, value);
     }
   } else if (src.getTgtHllType() == target_hll_type::HLL_6) {
-    for (uint32_t i = 0; i < src_k; i++) {
+    const uint32_t src_k = 1 << src.getLgConfigK();
+    for (uint32_t i = 0; i < src_k; ++i) {
       const uint8_t new_v = static_cast<const Hll6Array<A>&>(src).getSlot(i);
-      const uint32_t j = i & dst_mask;
-      const uint8_t old_v = this->hllByteArr_[j];
-      if (new_v > old_v) {
-        this->hllByteArr_[j] = new_v;
-        this->hipAndKxQIncrementalUpdate(old_v, new_v);
-        this->numAtCurMin_ -= old_v == 0;
-      }
+      processValue(i, dst_mask, new_v);
     }
   } else { // HLL_4
-    for (uint32_t i = 0; i < src_k; i++) {
-      const uint8_t new_v = static_cast<const Hll4Array<A>&>(src).get_value(i);
-      const uint32_t j = i & dst_mask;
-      const uint8_t old_v = this->hllByteArr_[j];
-      if (new_v > old_v) {
-        this->hllByteArr_[j] = new_v;
-        this->hipAndKxQIncrementalUpdate(old_v, new_v);
-        this->numAtCurMin_ -= old_v == 0;
-      }
+    const auto& src4 = static_cast<const Hll4Array<A>&>(src);
+    uint32_t i = 0;
+    for (const auto byte: src.getHllArray()) {
+      processValue(i, dst_mask, src4.adjustRawValue(i, byte & 
hll_constants::loNibbleMask));
+      ++i;
+      processValue(i, dst_mask, src4.adjustRawValue(i, byte >> 4));
+      ++i;
     }
   }
 }
 
+template<typename A>
+void Hll8Array<A>::processValue(uint32_t slot, uint32_t mask, uint8_t new_val) 
{
+  const uint8_t old_val = this->hllByteArr_[slot & mask];
+  if (new_val > old_val) {
+    this->hllByteArr_[slot & mask] = new_val;
+    this->hipAndKxQIncrementalUpdate(old_val, new_val);
+    this->numAtCurMin_ -= old_val == 0;
+  }
+}
+
 }
 
 #endif // _HLL8ARRAY_INTERNAL_HPP_
diff --git a/hll/include/Hll8Array.hpp b/hll/include/Hll8Array.hpp
index 54db472..d64cdba 100644
--- a/hll/include/Hll8Array.hpp
+++ b/hll/include/Hll8Array.hpp
@@ -48,6 +48,7 @@ class Hll8Array final : public HllArray<A> {
 
   private:
     inline void internalCouponUpdate(uint32_t coupon);
+    inline void processValue(uint32_t slot, uint32_t mask, uint8_t new_val);
 };
 
 }
diff --git a/hll/include/HllArray-internal.hpp 
b/hll/include/HllArray-internal.hpp
index e4d25ac..4476356 100644
--- a/hll/include/HllArray-internal.hpp
+++ b/hll/include/HllArray-internal.hpp
@@ -512,6 +512,11 @@ AuxHashMap<A>* HllArray<A>::getAuxHashMap() const {
   return nullptr;
 }
 
+template<typename A>
+const vector_u8<A>& HllArray<A>::getHllArray() const {
+  return hllByteArr_;
+}
+
 template<typename A>
 void HllArray<A>::hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t 
newValue) {
   const uint32_t configK = 1 << this->getLgConfigK();
diff --git a/hll/include/HllArray.hpp b/hll/include/HllArray.hpp
index f3acea7..e428753 100644
--- a/hll/include/HllArray.hpp
+++ b/hll/include/HllArray.hpp
@@ -92,6 +92,8 @@ class HllArray : public HllSketchImpl<A> {
 
     virtual A getAllocator() const;
 
+    const vector_u8<A>& getHllArray() const;
+
   protected:
     void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue);
     double getHllBitMapEstimate() const;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to