This is an automated email from the ASF dual-hosted git repository.

twice pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git


The following commit(s) were added to refs/heads/unstable by this push:
     new 7571034f Initialize the unified index updater for HASH and JSON data 
type (#2111)
7571034f is described below

commit 7571034f3516594dd1169fdfcdea6cabd085fe5e
Author: Twice <[email protected]>
AuthorDate: Sat Feb 24 09:46:23 2024 +0900

    Initialize the unified index updater for HASH and JSON data type (#2111)
---
 CMakeLists.txt                |   2 +
 NOTICE                        |   1 +
 cmake/trie.cmake              |  27 ++++++++++
 licenses/LICENSE-hat-trie.txt |  21 ++++++++
 src/common/status.h           |   3 ++
 src/search/indexer.cc         | 118 ++++++++++++++++++++++++++++++++++++++++++
 src/search/indexer.h          |  92 ++++++++++++++++++++++++++++++++
 src/search/search_encoding.h  |   8 +--
 src/types/redis_hash.h        |   2 +
 src/types/redis_json.h        |   2 +
 10 files changed, 272 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ea4d1bec..3e384e03 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -138,6 +138,7 @@ include(cmake/fmt.cmake)
 include(cmake/jsoncons.cmake)
 include(cmake/xxhash.cmake)
 include(cmake/span.cmake)
+include(cmake/trie.cmake)
 
 if (ENABLE_LUAJIT)
     include(cmake/luajit.cmake)
@@ -169,6 +170,7 @@ list(APPEND EXTERNAL_LIBS Threads::Threads)
 list(APPEND EXTERNAL_LIBS ${Backtrace_LIBRARY})
 list(APPEND EXTERNAL_LIBS xxhash)
 list(APPEND EXTERNAL_LIBS span-lite)
+list(APPEND EXTERNAL_LIBS tsl_hat_trie)
 
 # Add git sha to version.h
 find_package(Git REQUIRED)
diff --git a/NOTICE b/NOTICE
index c3ab6e56..a25a60d1 100644
--- a/NOTICE
+++ b/NOTICE
@@ -65,6 +65,7 @@ The text of each license is also included in 
licenses/LICENSE-[project].txt
 * fmt(https://github.com/fmtlib/fmt)
 * LuaJIT(https://github.com/KvrocksLabs/LuaJIT)
 * lua(https://github.com/KvrocksLabs/lua, alternative to LuaJIT)
+* hat-trie(https://github.com/Tessil/hat-trie)
 
 ================================================================
 Boost Software License Version 1.0
diff --git a/cmake/trie.cmake b/cmake/trie.cmake
new file mode 100644
index 00000000..30d63429
--- /dev/null
+++ b/cmake/trie.cmake
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+include_guard()
+
+include(cmake/utils.cmake)
+
+FetchContent_DeclareGitHubWithMirror(trie
+  Tessil/hat-trie 906e6abd1e7063f1dacd3a6b270aa654b525eb0a
+  MD5=a930364e9f6b60371319664bddf78000
+)
+
+FetchContent_MakeAvailableWithArgs(trie)
diff --git a/licenses/LICENSE-hat-trie.txt b/licenses/LICENSE-hat-trie.txt
new file mode 100644
index 00000000..e9c5ae95
--- /dev/null
+++ b/licenses/LICENSE-hat-trie.txt
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 Thibaut Goetghebuer-Planchon <[email protected]>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/common/status.h b/src/common/status.h
index 37eae9d8..ade19f86 100644
--- a/src/common/status.h
+++ b/src/common/status.h
@@ -66,6 +66,9 @@ class [[nodiscard]] Status {
 
     // Blocking
     BlockingCmd,
+
+    // Search
+    NoPrefixMatched,
   };
 
   Status() : impl_{nullptr} {}
diff --git a/src/search/indexer.cc b/src/search/indexer.cc
new file mode 100644
index 00000000..f608d3df
--- /dev/null
+++ b/src/search/indexer.cc
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "indexer.h"
+
+#include <variant>
+
+#include "storage/redis_metadata.h"
+#include "types/redis_hash.h"
+
+namespace redis {
+
+StatusOr<FieldValueRetriever> FieldValueRetriever::Create(SearchOnDataType 
type, std::string_view key,
+                                                          engine::Storage 
*storage, const std::string &ns) {
+  if (type == SearchOnDataType::HASH) {
+    Hash db(storage, ns);
+    std::string ns_key = db.AppendNamespacePrefix(key);
+    HashMetadata metadata(false);
+    auto s = db.GetMetadata(ns_key, &metadata);
+    if (!s.ok()) return {Status::NotOK, s.ToString()};
+    return FieldValueRetriever(db, metadata, key);
+  } else if (type == SearchOnDataType::JSON) {
+    Json db(storage, ns);
+    std::string ns_key = db.AppendNamespacePrefix(key);
+    JsonMetadata metadata(false);
+    JsonValue value;
+    auto s = db.read(ns_key, &metadata, &value);
+    if (!s.ok()) return {Status::NotOK, s.ToString()};
+    return FieldValueRetriever(value);
+  } else {
+    assert(false && "unreachable code: unexpected SearchOnDataType");
+    __builtin_unreachable();
+  }
+}
+
+rocksdb::Status FieldValueRetriever::Retrieve(std::string_view field, 
std::string *output) {
+  if (std::holds_alternative<HashData>(db)) {
+    auto &[hash, metadata, key] = std::get<HashData>(db);
+    std::string ns_key = hash.AppendNamespacePrefix(key);
+    LatestSnapShot ss(hash.storage_);
+    rocksdb::ReadOptions read_options;
+    read_options.snapshot = ss.GetSnapShot();
+    std::string sub_key = InternalKey(ns_key, field, metadata.version, 
hash.storage_->IsSlotIdEncoded()).Encode();
+    return hash.storage_->Get(read_options, sub_key, output);
+  } else if (std::holds_alternative<JsonData>(db)) {
+    auto &value = std::get<JsonData>(db);
+    auto s = value.Get(field);
+    if (!s.IsOK()) return rocksdb::Status::Corruption(s.Msg());
+    if (s->value.size() != 1)
+      return rocksdb::Status::NotFound("json value specified by the field 
(json path) should exist and be unique");
+    *output = s->value[0].as_string();
+    return rocksdb::Status::OK();
+  } else {
+    __builtin_unreachable();
+  }
+}
+
+StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key, 
const std::string &ns) {
+  Database db(indexer->storage, ns);
+
+  RedisType type = kRedisNone;
+  auto s = db.Type(key, &type);
+  if (!s.ok()) return {Status::NotOK, s.ToString()};
+
+  if (type != static_cast<RedisType>(on_data_type)) {
+    // not the expected type, stop record
+    return {Status::NotOK, "this data type cannot be indexed"};
+  }
+
+  auto retriever = GET_OR_RET(FieldValueRetriever::Create(on_data_type, key, 
indexer->storage, ns));
+
+  FieldValues values;
+  for (const auto &[field, info] : fields) {
+    std::string value;
+    auto s = retriever.Retrieve(field, &value);
+    if (s.IsNotFound()) continue;
+    if (!s.ok()) return {Status::NotOK, s.ToString()};
+
+    values.emplace(field, value);
+  }
+
+  return values;
+}
+
+void GlobalIndexer::Add(IndexUpdater updater) {
+  auto &up = updaters.emplace_back(std::move(updater));
+  for (const auto &prefix : up.prefixes) {
+    prefix_map.emplace(prefix, &up);
+  }
+}
+
+StatusOr<IndexUpdater::FieldValues> GlobalIndexer::Record(std::string_view 
key, const std::string &ns) {
+  auto iter = prefix_map.longest_prefix(key);
+  if (iter != prefix_map.end()) {
+    return iter.value()->Record(key, ns);
+  }
+
+  return {Status::NoPrefixMatched};
+}
+
+}  // namespace redis
diff --git a/src/search/indexer.h b/src/search/indexer.h
new file mode 100644
index 00000000..e153d555
--- /dev/null
+++ b/src/search/indexer.h
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#pragma once
+
+#include <tsl/htrie_map.h>
+
+#include <deque>
+#include <map>
+#include <utility>
+#include <variant>
+
+#include "commands/commander.h"
+#include "config/config.h"
+#include "indexer.h"
+#include "search/search_encoding.h"
+#include "server/server.h"
+#include "storage/redis_metadata.h"
+#include "storage/storage.h"
+#include "types/redis_hash.h"
+#include "types/redis_json.h"
+
+namespace redis {
+
+struct GlobalIndexer;
+
+struct FieldValueRetriever {
+  struct HashData {
+    Hash hash;
+    HashMetadata metadata;
+    std::string_view key;
+
+    HashData(Hash hash, HashMetadata metadata, std::string_view key)
+        : hash(std::move(hash)), metadata(std::move(metadata)), key(key) {}
+  };
+  using JsonData = JsonValue;
+
+  using Variant = std::variant<HashData, JsonData>;
+  Variant db;
+
+  static StatusOr<FieldValueRetriever> Create(SearchOnDataType type, 
std::string_view key, engine::Storage *storage,
+                                              const std::string &ns);
+
+  explicit FieldValueRetriever(Hash hash, HashMetadata metadata, 
std::string_view key)
+      : db(std::in_place_type<HashData>, std::move(hash), std::move(metadata), 
key) {}
+
+  explicit FieldValueRetriever(JsonValue json) : 
db(std::in_place_type<JsonData>, std::move(json)) {}
+
+  rocksdb::Status Retrieve(std::string_view field, std::string *output);
+};
+
+struct IndexUpdater {
+  using FieldValues = std::map<std::string, std::string>;
+
+  SearchOnDataType on_data_type;
+  std::vector<std::string> prefixes;
+  std::map<std::string, std::unique_ptr<SearchFieldMetadata>> fields;
+  GlobalIndexer *indexer = nullptr;
+
+  StatusOr<FieldValues> Record(std::string_view key, const std::string &ns);
+};
+
+struct GlobalIndexer {
+  std::deque<IndexUpdater> updaters;
+  tsl::htrie_map<char, IndexUpdater *> prefix_map;
+
+  engine::Storage *storage = nullptr;
+
+  explicit GlobalIndexer(engine::Storage *storage) : storage(storage) {}
+
+  void Add(IndexUpdater updater);
+  StatusOr<IndexUpdater::FieldValues> Record(std::string_view key, const 
std::string &ns);
+};
+
+}  // namespace redis
diff --git a/src/search/search_encoding.h b/src/search/search_encoding.h
index deea106b..2acec050 100644
--- a/src/search/search_encoding.h
+++ b/src/search/search_encoding.h
@@ -73,9 +73,9 @@ struct SearchFieldMetadata {
 
   void DecodeFlag(uint8_t flag) { noindex = flag & 1; }
 
-  void Encode(std::string *dst) const { PutFixed8(dst, MakeFlag()); }
+  virtual void Encode(std::string *dst) const { PutFixed8(dst, MakeFlag()); }
 
-  rocksdb::Status Decode(Slice *input) {
+  virtual rocksdb::Status Decode(Slice *input) {
     uint8_t flag = 0;
     if (!GetFixed8(input, &flag)) {
       return rocksdb::Status::Corruption(kErrorInsufficientLength);
@@ -96,13 +96,13 @@ struct SearchTagFieldMetadata : SearchFieldMetadata {
   char separator = ',';
   bool case_sensitive = false;
 
-  void Encode(std::string *dst) const {
+  void Encode(std::string *dst) const override {
     SearchFieldMetadata::Encode(dst);
     PutFixed8(dst, separator);
     PutFixed8(dst, case_sensitive);
   }
 
-  rocksdb::Status Decode(Slice *input) {
+  rocksdb::Status Decode(Slice *input) override {
     if (auto s = SearchFieldMetadata::Decode(input); !s.ok()) {
       return s;
     }
diff --git a/src/types/redis_hash.h b/src/types/redis_hash.h
index fc004ed8..8ae0a066 100644
--- a/src/types/redis_hash.h
+++ b/src/types/redis_hash.h
@@ -66,6 +66,8 @@ class Hash : public SubKeyScanner {
 
  private:
   rocksdb::Status GetMetadata(const Slice &ns_key, HashMetadata *metadata);
+
+  friend struct FieldValueRetriever;
 };
 
 }  // namespace redis
diff --git a/src/types/redis_json.h b/src/types/redis_json.h
index a2135b75..8d0f15cb 100644
--- a/src/types/redis_json.h
+++ b/src/types/redis_json.h
@@ -76,6 +76,8 @@ class Json : public Database {
   rocksdb::Status numop(JsonValue::NumOpEnum op, const std::string &user_key, 
const std::string &path,
                         const std::string &value, JsonValue *result);
   std::vector<rocksdb::Status> readMulti(const std::vector<Slice> &ns_keys, 
std::vector<JsonValue> &values);
+
+  friend struct FieldValueRetriever;
 };
 
 }  // namespace redis

Reply via email to