morningman commented on a change in pull request #1845: Reduce size of 
HyperLogLog struct
URL: https://github.com/apache/incubator-doris/pull/1845#discussion_r326670122
 
 

 ##########
 File path: be/src/olap/hll.cpp
 ##########
 @@ -20,172 +20,236 @@
 #include <algorithm>
 #include <map>
 
+#include "common/logging.h"
+#include "util/coding.h"
+
 using std::map;
 using std::nothrow;
 using std::string;
 using std::stringstream;
 
 namespace doris {
 
-HyperLogLog::HyperLogLog(char* src) {
-    _type = (HllDataType)src[0];
+// TODO(zc): we should check if src is valid, it maybe corrupted
+HyperLogLog::HyperLogLog(const uint8_t* src) {
+    deserialize(src);
+}
+
+// Convert explicit values to register format, and clear explicit values.
+// NOTE: this function won't modify _type.
+void HyperLogLog::_convert_explicit_to_register() {
+    DCHECK(_type == HLL_DATA_EXPLICIT) << "_type(" << _type << ") should be 
explicit("
+        << HLL_DATA_EXPLICIT << ")";
+    _registers = new uint8_t[HLL_REGISTERS_COUNT];
     memset(_registers, 0, HLL_REGISTERS_COUNT);
-    char* sparse_data = nullptr;
-    switch (_type) {
-        case HLL_DATA_EXPLICIT:
-            // first byte : type
-            // second~five byte : hash values's number
-            // five byte later : hash value
-        {
-            auto _explicit_num = (uint8_t) (src[sizeof(SetTypeValueType)]);
-            auto *_explicit_value = (uint64_t *) (src + 
sizeof(SetTypeValueType) + sizeof(uint8_t));
-            for (int i = 0; i < _explicit_num; ++i) {
-                _hash_set.insert(_explicit_value[i]);
-            }
-        }
-            break;
-        case HLL_DATA_SPRASE:
-            // first byte : type
-            // second ~(2^HLL_COLUMN_PRECISION)/8 byte : bitmap mark which is 
not zero
-            // 2^HLL_COLUMN_PRECISION)/8 + 1以后value
-        {
-            auto* _sparse_count = (SparseLengthValueType*)(src + sizeof 
(SetTypeValueType));
-            sparse_data = src + sizeof(SetTypeValueType) + 
sizeof(SparseLengthValueType);
-            std::map<SparseIndexType, SparseValueType> _sparse_map;
-            for (int i = 0; i < *_sparse_count; i++) {
-                auto* index = (SparseIndexType*)sparse_data;
-                sparse_data += sizeof(SparseIndexType);
-                auto* value = (SparseValueType*)sparse_data;
-                _sparse_map[*index] = *value;
-                sparse_data += sizeof(SetTypeValueType);
-            }
+    for (auto value : _hash_set) {
+        _update_registers(value);
+    }
+    // clear _hash_set
+    std::set<uint64_t>().swap(_hash_set);
+}
 
-            for (auto iter: _sparse_map) {
-                _registers[iter.first] = (uint8_t)iter.second;
-            }
-        }
+// Change HLL_DATA_EXPLICIT to HLL_DATA_FULL directly because they are
+// implemented in the same way in memory.
 
 Review comment:
   What does this comment mean?
   It seems that HLL_DATA_EXPLICIT and HLL_DATA_FULL are not same in memory?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to