tutububug commented on code in PR #2142:
URL: https://github.com/apache/kvrocks/pull/2142#discussion_r1516008830


##########
src/types/redis_hyperloglog.cc:
##########
@@ -0,0 +1,359 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include <stdint.h>
+#include <math.h>
+#include "redis_hyperloglog.h"
+#include "db_util.h"
+
+#define HLL_P 14 /* The greater is P, the smaller the error. */
+#define HLL_Q (64-HLL_P) /* The number of bits of the hash value used for 
determining the number of leading zeros. */
+#define HLL_REGISTERS (1<<HLL_P) /* With P=14, 16384 registers. */
+#define HLL_P_MASK (HLL_REGISTERS-1) /* Mask to index register. */
+#define HLL_BITS 8
+#define HLL_REGISTER_MAX ((1<<HLL_BITS)-1)
+#define HLL_ALPHA_INF 0.721347520444481703680 /* constant for 0.5/ln(2) */
+
+namespace redis {
+
+rocksdb::Status Hyperloglog::GetMetadata(const Slice &ns_key, 
HyperloglogMetadata *metadata) {
+  return Database::GetMetadata({kRedisHyperloglog}, ns_key, metadata);
+}
+
+/* the max 0 pattern counter of the subset the element belongs to is 
incremented if needed */
+rocksdb::Status Hyperloglog::Add(const Slice &user_key, const 
std::vector<Slice>& elements, int *ret) {
+  *ret = 0;
+  std::string ns_key = AppendNamespacePrefix(user_key);
+
+  LockGuard guard(storage_->GetLockManager(), ns_key);
+  HyperloglogMetadata metadata;
+  rocksdb::Status s = GetMetadata(ns_key, &metadata);
+  if (!s.ok() && !s.IsNotFound()) return s;
+
+  auto batch = storage_->GetWriteBatchBase();
+  WriteBatchLogData log_data(kRedisHyperloglog);
+  batch->PutLogData(log_data.Encode());
+  if (s.IsNotFound()) {
+    std::string bytes;
+    metadata.size = HLL_REGISTERS; // 'size' must non-zone, or 'GetMetadata' 
will failed as 'expired'.
+    metadata.Encode(&bytes);
+    batch->Put(metadata_cf_handle_, ns_key, bytes);
+  }
+  for (const auto &element: elements) {
+    long index = 0;
+    uint8_t count = hllPatLen((unsigned char *)element.data(), element.size(), 
&index);
+
+    std::string sub_key = InternalKey(ns_key, std::to_string(index), 
metadata.version, storage_->IsSlotIdEncoded()).Encode();
+    std::string old_count = "0";
+    auto s = storage_->Get(rocksdb::ReadOptions(), sub_key, &old_count);
+    if (!s.ok() && !s.IsNotFound()) return s;
+    if (static_cast<unsigned long>(count) > std::stoul(old_count)) {
+      batch->Put(sub_key, std::to_string(count));
+      *ret = 1;
+    }
+  }
+  return storage_->Write(storage_->DefaultWriteOptions(), 
batch->GetWriteBatch());
+}
+
+rocksdb::Status Hyperloglog::Count(const Slice &user_key, int *ret) {
+  *ret = 0;
+  std::vector<uint8_t> counts(HLL_REGISTERS);
+  auto s = getRegisters(user_key, &counts);
+  if (!s.ok()) return s;
+  *ret = hllCount(counts);
+  return rocksdb::Status::OK();
+}
+
+rocksdb::Status Hyperloglog::Merge(const std::vector<Slice> &user_keys) {
+  std::vector<uint8_t> max(HLL_REGISTERS);
+  for (const auto &user_key : user_keys) {
+    std::vector<uint8_t> counts(HLL_REGISTERS);
+    auto s = getRegisters(user_key, &counts);
+    if (!s.ok()) return s;
+    hllMerge(&max[0], counts);
+  }
+
+  std::string ns_key = AppendNamespacePrefix(user_keys[0]);
+
+  LockGuard guard(storage_->GetLockManager(), ns_key);
+  HyperloglogMetadata metadata;
+  rocksdb::Status s = GetMetadata(ns_key, &metadata);
+  if (!s.ok() && !s.IsNotFound()) return s;
+
+  auto batch = storage_->GetWriteBatchBase();
+  WriteBatchLogData log_data(kRedisHyperloglog);
+  batch->PutLogData(log_data.Encode());
+  if (s.IsNotFound()) {
+    std::string bytes;
+    metadata.size = HLL_REGISTERS;
+    metadata.Encode(&bytes);
+    batch->Put(metadata_cf_handle_, ns_key, bytes);
+  }
+
+  for (auto i = 0; i < HLL_REGISTERS; i++) {
+    std::string sub_key = InternalKey(ns_key, std::to_string(i), 
metadata.version, storage_->IsSlotIdEncoded()).Encode();
+
+    batch->Put(sub_key, std::to_string(max[i]));
+  }
+  return storage_->Write(storage_->DefaultWriteOptions(), 
batch->GetWriteBatch());
+}
+
+/* Store the value of the register at position 'regnum' into variable 'target'.
+ * 'p' is an array of unsigned bytes. */
+#define HLL_DENSE_GET_REGISTER(target,p,regnum) do { \
+    uint8_t *_p = (uint8_t*) p; \
+    unsigned long _byte = regnum*HLL_BITS/8; \
+    unsigned long _fb = regnum*HLL_BITS&7; \
+    unsigned long _fb8 = 8 - _fb; \
+    unsigned long b0 = _p[_byte]; \
+    unsigned long b1 = _p[_byte+1]; \
+    target = ((b0 >> _fb) | (b1 << _fb8)) & HLL_REGISTER_MAX; \
+} while(0)
+
+/* ========================= HyperLogLog algorithm  ========================= 
*/
+
+/* Our hash function is MurmurHash2, 64 bit version.
+ * It was modified for Redis in order to provide the same result in
+ * big and little endian archs (endian neutral). */
+uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) {
+    const uint64_t m = 0xc6a4a7935bd1e995;
+    const int r = 47;
+    uint64_t h = seed ^ (len * m);
+    const uint8_t *data = (const uint8_t *)key;
+    const uint8_t *end = data + (len-(len&7));
+
+    while(data != end) {
+        uint64_t k;
+
+#if (BYTE_ORDER == LITTLE_ENDIAN)
+    #ifdef USE_ALIGNED_ACCESS

Review Comment:
   ok, got it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to