This is an automated email from the ASF dual-hosted git repository.
twice pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git
The following commit(s) were added to refs/heads/unstable by this push:
new 7571034f Initialize the unified index updater for HASH and JSON data
type (#2111)
7571034f is described below
commit 7571034f3516594dd1169fdfcdea6cabd085fe5e
Author: Twice <[email protected]>
AuthorDate: Sat Feb 24 09:46:23 2024 +0900
Initialize the unified index updater for HASH and JSON data type (#2111)
---
CMakeLists.txt | 2 +
NOTICE | 1 +
cmake/trie.cmake | 27 ++++++++++
licenses/LICENSE-hat-trie.txt | 21 ++++++++
src/common/status.h | 3 ++
src/search/indexer.cc | 118 ++++++++++++++++++++++++++++++++++++++++++
src/search/indexer.h | 92 ++++++++++++++++++++++++++++++++
src/search/search_encoding.h | 8 +--
src/types/redis_hash.h | 2 +
src/types/redis_json.h | 2 +
10 files changed, 272 insertions(+), 4 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ea4d1bec..3e384e03 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -138,6 +138,7 @@ include(cmake/fmt.cmake)
include(cmake/jsoncons.cmake)
include(cmake/xxhash.cmake)
include(cmake/span.cmake)
+include(cmake/trie.cmake)
if (ENABLE_LUAJIT)
include(cmake/luajit.cmake)
@@ -169,6 +170,7 @@ list(APPEND EXTERNAL_LIBS Threads::Threads)
list(APPEND EXTERNAL_LIBS ${Backtrace_LIBRARY})
list(APPEND EXTERNAL_LIBS xxhash)
list(APPEND EXTERNAL_LIBS span-lite)
+list(APPEND EXTERNAL_LIBS tsl_hat_trie)
# Add git sha to version.h
find_package(Git REQUIRED)
diff --git a/NOTICE b/NOTICE
index c3ab6e56..a25a60d1 100644
--- a/NOTICE
+++ b/NOTICE
@@ -65,6 +65,7 @@ The text of each license is also included in
licenses/LICENSE-[project].txt
* fmt(https://github.com/fmtlib/fmt)
* LuaJIT(https://github.com/KvrocksLabs/LuaJIT)
* lua(https://github.com/KvrocksLabs/lua, alternative to LuaJIT)
+* hat-trie(https://github.com/Tessil/hat-trie)
================================================================
Boost Software License Version 1.0
diff --git a/cmake/trie.cmake b/cmake/trie.cmake
new file mode 100644
index 00000000..30d63429
--- /dev/null
+++ b/cmake/trie.cmake
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+include_guard()
+
+include(cmake/utils.cmake)
+
+FetchContent_DeclareGitHubWithMirror(trie
+ Tessil/hat-trie 906e6abd1e7063f1dacd3a6b270aa654b525eb0a
+ MD5=a930364e9f6b60371319664bddf78000
+)
+
+FetchContent_MakeAvailableWithArgs(trie)
diff --git a/licenses/LICENSE-hat-trie.txt b/licenses/LICENSE-hat-trie.txt
new file mode 100644
index 00000000..e9c5ae95
--- /dev/null
+++ b/licenses/LICENSE-hat-trie.txt
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2017 Thibaut Goetghebuer-Planchon <[email protected]>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/common/status.h b/src/common/status.h
index 37eae9d8..ade19f86 100644
--- a/src/common/status.h
+++ b/src/common/status.h
@@ -66,6 +66,9 @@ class [[nodiscard]] Status {
// Blocking
BlockingCmd,
+
+ // Search
+ NoPrefixMatched,
};
Status() : impl_{nullptr} {}
diff --git a/src/search/indexer.cc b/src/search/indexer.cc
new file mode 100644
index 00000000..f608d3df
--- /dev/null
+++ b/src/search/indexer.cc
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "indexer.h"
+
+#include <variant>
+
+#include "storage/redis_metadata.h"
+#include "types/redis_hash.h"
+
+namespace redis {
+
+StatusOr<FieldValueRetriever> FieldValueRetriever::Create(SearchOnDataType
type, std::string_view key,
+ engine::Storage
*storage, const std::string &ns) {
+ if (type == SearchOnDataType::HASH) {
+ Hash db(storage, ns);
+ std::string ns_key = db.AppendNamespacePrefix(key);
+ HashMetadata metadata(false);
+ auto s = db.GetMetadata(ns_key, &metadata);
+ if (!s.ok()) return {Status::NotOK, s.ToString()};
+ return FieldValueRetriever(db, metadata, key);
+ } else if (type == SearchOnDataType::JSON) {
+ Json db(storage, ns);
+ std::string ns_key = db.AppendNamespacePrefix(key);
+ JsonMetadata metadata(false);
+ JsonValue value;
+ auto s = db.read(ns_key, &metadata, &value);
+ if (!s.ok()) return {Status::NotOK, s.ToString()};
+ return FieldValueRetriever(value);
+ } else {
+ assert(false && "unreachable code: unexpected SearchOnDataType");
+ __builtin_unreachable();
+ }
+}
+
+rocksdb::Status FieldValueRetriever::Retrieve(std::string_view field,
std::string *output) {
+ if (std::holds_alternative<HashData>(db)) {
+ auto &[hash, metadata, key] = std::get<HashData>(db);
+ std::string ns_key = hash.AppendNamespacePrefix(key);
+ LatestSnapShot ss(hash.storage_);
+ rocksdb::ReadOptions read_options;
+ read_options.snapshot = ss.GetSnapShot();
+ std::string sub_key = InternalKey(ns_key, field, metadata.version,
hash.storage_->IsSlotIdEncoded()).Encode();
+ return hash.storage_->Get(read_options, sub_key, output);
+ } else if (std::holds_alternative<JsonData>(db)) {
+ auto &value = std::get<JsonData>(db);
+ auto s = value.Get(field);
+ if (!s.IsOK()) return rocksdb::Status::Corruption(s.Msg());
+ if (s->value.size() != 1)
+ return rocksdb::Status::NotFound("json value specified by the field
(json path) should exist and be unique");
+ *output = s->value[0].as_string();
+ return rocksdb::Status::OK();
+ } else {
+ __builtin_unreachable();
+ }
+}
+
+StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key,
const std::string &ns) {
+ Database db(indexer->storage, ns);
+
+ RedisType type = kRedisNone;
+ auto s = db.Type(key, &type);
+ if (!s.ok()) return {Status::NotOK, s.ToString()};
+
+ if (type != static_cast<RedisType>(on_data_type)) {
+ // not the expected type, stop record
+ return {Status::NotOK, "this data type cannot be indexed"};
+ }
+
+ auto retriever = GET_OR_RET(FieldValueRetriever::Create(on_data_type, key,
indexer->storage, ns));
+
+ FieldValues values;
+ for (const auto &[field, info] : fields) {
+ std::string value;
+ auto s = retriever.Retrieve(field, &value);
+ if (s.IsNotFound()) continue;
+ if (!s.ok()) return {Status::NotOK, s.ToString()};
+
+ values.emplace(field, value);
+ }
+
+ return values;
+}
+
+void GlobalIndexer::Add(IndexUpdater updater) {
+ auto &up = updaters.emplace_back(std::move(updater));
+ for (const auto &prefix : up.prefixes) {
+ prefix_map.emplace(prefix, &up);
+ }
+}
+
+StatusOr<IndexUpdater::FieldValues> GlobalIndexer::Record(std::string_view
key, const std::string &ns) {
+ auto iter = prefix_map.longest_prefix(key);
+ if (iter != prefix_map.end()) {
+ return iter.value()->Record(key, ns);
+ }
+
+ return {Status::NoPrefixMatched};
+}
+
+} // namespace redis
diff --git a/src/search/indexer.h b/src/search/indexer.h
new file mode 100644
index 00000000..e153d555
--- /dev/null
+++ b/src/search/indexer.h
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#pragma once
+
+#include <tsl/htrie_map.h>
+
+#include <deque>
+#include <map>
+#include <utility>
+#include <variant>
+
+#include "commands/commander.h"
+#include "config/config.h"
+#include "indexer.h"
+#include "search/search_encoding.h"
+#include "server/server.h"
+#include "storage/redis_metadata.h"
+#include "storage/storage.h"
+#include "types/redis_hash.h"
+#include "types/redis_json.h"
+
+namespace redis {
+
+struct GlobalIndexer;
+
+struct FieldValueRetriever {
+ struct HashData {
+ Hash hash;
+ HashMetadata metadata;
+ std::string_view key;
+
+ HashData(Hash hash, HashMetadata metadata, std::string_view key)
+ : hash(std::move(hash)), metadata(std::move(metadata)), key(key) {}
+ };
+ using JsonData = JsonValue;
+
+ using Variant = std::variant<HashData, JsonData>;
+ Variant db;
+
+ static StatusOr<FieldValueRetriever> Create(SearchOnDataType type,
std::string_view key, engine::Storage *storage,
+ const std::string &ns);
+
+ explicit FieldValueRetriever(Hash hash, HashMetadata metadata,
std::string_view key)
+ : db(std::in_place_type<HashData>, std::move(hash), std::move(metadata),
key) {}
+
+ explicit FieldValueRetriever(JsonValue json) :
db(std::in_place_type<JsonData>, std::move(json)) {}
+
+ rocksdb::Status Retrieve(std::string_view field, std::string *output);
+};
+
+struct IndexUpdater {
+ using FieldValues = std::map<std::string, std::string>;
+
+ SearchOnDataType on_data_type;
+ std::vector<std::string> prefixes;
+ std::map<std::string, std::unique_ptr<SearchFieldMetadata>> fields;
+ GlobalIndexer *indexer = nullptr;
+
+ StatusOr<FieldValues> Record(std::string_view key, const std::string &ns);
+};
+
+struct GlobalIndexer {
+ std::deque<IndexUpdater> updaters;
+ tsl::htrie_map<char, IndexUpdater *> prefix_map;
+
+ engine::Storage *storage = nullptr;
+
+ explicit GlobalIndexer(engine::Storage *storage) : storage(storage) {}
+
+ void Add(IndexUpdater updater);
+ StatusOr<IndexUpdater::FieldValues> Record(std::string_view key, const
std::string &ns);
+};
+
+} // namespace redis
diff --git a/src/search/search_encoding.h b/src/search/search_encoding.h
index deea106b..2acec050 100644
--- a/src/search/search_encoding.h
+++ b/src/search/search_encoding.h
@@ -73,9 +73,9 @@ struct SearchFieldMetadata {
void DecodeFlag(uint8_t flag) { noindex = flag & 1; }
- void Encode(std::string *dst) const { PutFixed8(dst, MakeFlag()); }
+ virtual void Encode(std::string *dst) const { PutFixed8(dst, MakeFlag()); }
- rocksdb::Status Decode(Slice *input) {
+ virtual rocksdb::Status Decode(Slice *input) {
uint8_t flag = 0;
if (!GetFixed8(input, &flag)) {
return rocksdb::Status::Corruption(kErrorInsufficientLength);
@@ -96,13 +96,13 @@ struct SearchTagFieldMetadata : SearchFieldMetadata {
char separator = ',';
bool case_sensitive = false;
- void Encode(std::string *dst) const {
+ void Encode(std::string *dst) const override {
SearchFieldMetadata::Encode(dst);
PutFixed8(dst, separator);
PutFixed8(dst, case_sensitive);
}
- rocksdb::Status Decode(Slice *input) {
+ rocksdb::Status Decode(Slice *input) override {
if (auto s = SearchFieldMetadata::Decode(input); !s.ok()) {
return s;
}
diff --git a/src/types/redis_hash.h b/src/types/redis_hash.h
index fc004ed8..8ae0a066 100644
--- a/src/types/redis_hash.h
+++ b/src/types/redis_hash.h
@@ -66,6 +66,8 @@ class Hash : public SubKeyScanner {
private:
rocksdb::Status GetMetadata(const Slice &ns_key, HashMetadata *metadata);
+
+ friend struct FieldValueRetriever;
};
} // namespace redis
diff --git a/src/types/redis_json.h b/src/types/redis_json.h
index a2135b75..8d0f15cb 100644
--- a/src/types/redis_json.h
+++ b/src/types/redis_json.h
@@ -76,6 +76,8 @@ class Json : public Database {
rocksdb::Status numop(JsonValue::NumOpEnum op, const std::string &user_key,
const std::string &path,
const std::string &value, JsonValue *result);
std::vector<rocksdb::Status> readMulti(const std::vector<Slice> &ns_keys,
std::vector<JsonValue> &values);
+
+ friend struct FieldValueRetriever;
};
} // namespace redis