This is an automated email from the ASF dual-hosted git repository.
twice pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git
The following commit(s) were added to refs/heads/unstable by this push:
new d2e0feb9 feat(search): add index building method to IndexUpdater
(#2346)
d2e0feb9 is described below
commit d2e0feb989bd5855480deee61ebf932327e5ca93
Author: Twice <[email protected]>
AuthorDate: Sat Jun 1 16:32:06 2024 +0900
feat(search): add index building method to IndexUpdater (#2346)
---
src/common/db_util.h | 2 ++
src/search/index_info.h | 4 ++-
src/search/indexer.cc | 51 +++++++++++++++++++++++++++--------
src/search/indexer.h | 11 +++++---
tests/cppunit/indexer_test.cc | 49 ++++++++++++++++++++++++++++-----
tests/cppunit/ir_dot_dumper_test.cc | 2 +-
tests/cppunit/ir_pass_test.cc | 2 +-
tests/cppunit/ir_sema_checker_test.cc | 2 +-
tests/cppunit/plan_executor_test.cc | 5 ++--
9 files changed, 99 insertions(+), 29 deletions(-)
diff --git a/src/common/db_util.h b/src/common/db_util.h
index 8df34daa..d29262f2 100644
--- a/src/common/db_util.h
+++ b/src/common/db_util.h
@@ -37,6 +37,8 @@ struct UniqueIterator : std::unique_ptr<rocksdb::Iterator> {
UniqueIterator(engine::Storage* storage, const rocksdb::ReadOptions& options,
rocksdb::ColumnFamilyHandle* column_family)
: BaseType(storage->NewIterator(options, column_family)) {}
+ UniqueIterator(engine::Storage* storage, const rocksdb::ReadOptions&
options, ColumnFamilyID cf)
+ : BaseType(storage->NewIterator(options, storage->GetCFHandle(cf))) {}
UniqueIterator(engine::Storage* storage, const rocksdb::ReadOptions& options)
: BaseType(storage->NewIterator(options)) {}
};
diff --git a/src/search/index_info.h b/src/search/index_info.h
index 59abc694..ba5e6af3 100644
--- a/src/search/index_info.h
+++ b/src/search/index_info.h
@@ -23,6 +23,7 @@
#include <map>
#include <memory>
#include <string>
+#include <utility>
#include "search_encoding.h"
@@ -56,7 +57,8 @@ struct IndexInfo {
redis::IndexPrefixes prefixes;
std::string ns;
- IndexInfo(std::string name, redis::IndexMetadata metadata) :
name(std::move(name)), metadata(std::move(metadata)) {}
+ IndexInfo(std::string name, redis::IndexMetadata metadata, std::string ns)
+ : name(std::move(name)), metadata(std::move(metadata)),
ns(std::move(ns)) {}
void Add(FieldInfo &&field) {
const auto &name = field.name;
diff --git a/src/search/indexer.cc b/src/search/indexer.cc
index 752d42bd..8a90c8f0 100644
--- a/src/search/indexer.cc
+++ b/src/search/indexer.cc
@@ -23,6 +23,7 @@
#include <algorithm>
#include <variant>
+#include "db_util.h"
#include "parse_util.h"
#include "search/search_encoding.h"
#include "storage/redis_metadata.h"
@@ -77,7 +78,8 @@ rocksdb::Status
FieldValueRetriever::Retrieve(std::string_view field, std::strin
}
}
-StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key,
const std::string &ns) const {
+StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key)
const {
+ const auto &ns = info->ns;
Database db(indexer->storage, ns);
RedisType type = kRedisNone;
@@ -191,7 +193,7 @@ Status IndexUpdater::UpdateNumericIndex(std::string_view
key, std::string_view o
}
Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view
key, std::string_view original,
- std::string_view current, const std::string
&ns) const {
+ std::string_view current) const {
if (original == current) {
// the value of this field is unchanged, no need to update
return Status::OK();
@@ -203,7 +205,7 @@ Status IndexUpdater::UpdateIndex(const std::string &field,
std::string_view key,
}
auto *metadata = iter->second.metadata.get();
- SearchKey search_key(ns, info->name, field);
+ SearchKey search_key(info->ns, info->name, field);
if (auto tag = dynamic_cast<TagFieldMetadata *>(metadata)) {
GET_OR_RET(UpdateTagIndex(key, original, current, search_key, tag));
} else if (auto numeric [[maybe_unused]] = dynamic_cast<NumericFieldMetadata
*>(metadata)) {
@@ -215,8 +217,8 @@ Status IndexUpdater::UpdateIndex(const std::string &field,
std::string_view key,
return Status::OK();
}
-Status IndexUpdater::Update(const FieldValues &original, std::string_view key,
const std::string &ns) const {
- auto current = GET_OR_RET(Record(key, ns));
+Status IndexUpdater::Update(const FieldValues &original, std::string_view key)
const {
+ auto current = GET_OR_RET(Record(key));
for (const auto &[field, i] : info->fields) {
if (i.metadata->noindex) {
@@ -232,7 +234,29 @@ Status IndexUpdater::Update(const FieldValues &original,
std::string_view key, c
current_val = it->second;
}
- GET_OR_RET(UpdateIndex(field, key, original_val, current_val, ns));
+ GET_OR_RET(UpdateIndex(field, key, original_val, current_val));
+ }
+
+ return Status::OK();
+}
+
+Status IndexUpdater::Build() const {
+ auto storage = indexer->storage;
+ util::UniqueIterator iter(storage, storage->DefaultScanOptions(),
ColumnFamilyID::Metadata);
+
+ for (const auto &prefix : info->prefixes) {
+ auto ns_key = ComposeNamespaceKey(info->ns, prefix,
storage->IsSlotIdEncoded());
+ for (iter->Seek(ns_key); iter->Valid(); iter->Next()) {
+ if (!iter->key().starts_with(ns_key)) {
+ break;
+ }
+
+ auto [_, key] = ExtractNamespaceKey(iter->key(),
storage->IsSlotIdEncoded());
+
+ auto s = Update({}, key.ToStringView());
+ if (s.Is<Status::TypeMismatched>()) continue;
+ if (!s.OK()) return s;
+ }
}
return Status::OK();
@@ -241,22 +265,27 @@ Status IndexUpdater::Update(const FieldValues &original,
std::string_view key, c
void GlobalIndexer::Add(IndexUpdater updater) {
updater.indexer = this;
for (const auto &prefix : updater.info->prefixes) {
- prefix_map.insert(prefix, updater);
+ prefix_map.insert(ComposeNamespaceKey(updater.info->ns, prefix,
storage->IsSlotIdEncoded()), updater);
}
+ updater_list.push_back(updater);
}
StatusOr<GlobalIndexer::RecordResult> GlobalIndexer::Record(std::string_view
key, const std::string &ns) {
- auto iter = prefix_map.longest_prefix(key);
+ if (updater_list.empty()) {
+ return Status::NoPrefixMatched;
+ }
+
+ auto iter = prefix_map.longest_prefix(ComposeNamespaceKey(ns, key,
storage->IsSlotIdEncoded()));
if (iter != prefix_map.end()) {
auto updater = iter.value();
- return std::make_pair(updater, GET_OR_RET(updater.Record(key, ns)));
+ return std::make_pair(updater, GET_OR_RET(updater.Record(key)));
}
return {Status::NoPrefixMatched};
}
-Status GlobalIndexer::Update(const RecordResult &original, std::string_view
key, const std::string &ns) {
- return original.first.Update(original.second, key, ns);
+Status GlobalIndexer::Update(const RecordResult &original, std::string_view
key) {
+ return original.first.Update(original.second, key);
}
} // namespace redis
diff --git a/src/search/indexer.h b/src/search/indexer.h
index d6bf37de..34a4cf93 100644
--- a/src/search/indexer.h
+++ b/src/search/indexer.h
@@ -75,10 +75,12 @@ struct IndexUpdater {
explicit IndexUpdater(const kqir::IndexInfo *info) : info(info) {}
- StatusOr<FieldValues> Record(std::string_view key, const std::string &ns)
const;
+ StatusOr<FieldValues> Record(std::string_view key) const;
Status UpdateIndex(const std::string &field, std::string_view key,
std::string_view original,
- std::string_view current, const std::string &ns) const;
- Status Update(const FieldValues &original, std::string_view key, const
std::string &ns) const;
+ std::string_view current) const;
+ Status Update(const FieldValues &original, std::string_view key) const;
+
+ Status Build() const;
Status UpdateTagIndex(std::string_view key, std::string_view original,
std::string_view current,
const SearchKey &search_key, const TagFieldMetadata
*tag) const;
@@ -91,6 +93,7 @@ struct GlobalIndexer {
using RecordResult = std::pair<IndexUpdater, FieldValues>;
tsl::htrie_map<char, IndexUpdater> prefix_map;
+ std::vector<IndexUpdater> updater_list;
engine::Storage *storage = nullptr;
@@ -98,7 +101,7 @@ struct GlobalIndexer {
void Add(IndexUpdater updater);
StatusOr<RecordResult> Record(std::string_view key, const std::string &ns);
- static Status Update(const RecordResult &original, std::string_view key,
const std::string &ns);
+ static Status Update(const RecordResult &original, std::string_view key);
};
} // namespace redis
diff --git a/tests/cppunit/indexer_test.cc b/tests/cppunit/indexer_test.cc
index 13779892..b5d7c3f9 100644
--- a/tests/cppunit/indexer_test.cc
+++ b/tests/cppunit/indexer_test.cc
@@ -39,7 +39,7 @@ struct IndexerTest : TestBase {
redis::IndexMetadata hash_field_meta;
hash_field_meta.on_data_type = redis::IndexOnDataType::HASH;
- auto hash_info = std::make_unique<kqir::IndexInfo>("hashtest",
hash_field_meta);
+ auto hash_info = std::make_unique<kqir::IndexInfo>("hashtest",
hash_field_meta, ns);
hash_info->Add(kqir::FieldInfo("x",
std::make_unique<redis::TagFieldMetadata>()));
hash_info->Add(kqir::FieldInfo("y",
std::make_unique<redis::NumericFieldMetadata>()));
hash_info->prefixes.prefixes.emplace_back("idxtesthash");
@@ -51,7 +51,7 @@ struct IndexerTest : TestBase {
redis::IndexMetadata json_field_meta;
json_field_meta.on_data_type = redis::IndexOnDataType::JSON;
- auto json_info = std::make_unique<kqir::IndexInfo>("jsontest",
json_field_meta);
+ auto json_info = std::make_unique<kqir::IndexInfo>("jsontest",
json_field_meta, ns);
json_info->Add(kqir::FieldInfo("$.x",
std::make_unique<redis::TagFieldMetadata>()));
json_info->Add(kqir::FieldInfo("$.y",
std::make_unique<redis::NumericFieldMetadata>()));
json_info->prefixes.prefixes.emplace_back("idxtestjson");
@@ -79,7 +79,7 @@ TEST_F(IndexerTest, HashTag) {
{
auto s = indexer.Record(key1, ns);
- ASSERT_TRUE(s);
+ ASSERT_EQ(s.Msg(), Status::ok_msg);
ASSERT_EQ(s->first.info->name, idxname);
ASSERT_TRUE(s->second.empty());
@@ -87,7 +87,7 @@ TEST_F(IndexerTest, HashTag) {
db.Set(key1, "x", "food,kitChen,Beauty", &cnt);
ASSERT_EQ(cnt, 1);
- auto s2 = indexer.Update(*s, key1, ns);
+ auto s2 = indexer.Update(*s, key1);
ASSERT_TRUE(s2);
auto key = redis::SearchKey(ns, idxname,
"x").ConstructTagFieldData("food", key1);
@@ -122,7 +122,7 @@ TEST_F(IndexerTest, HashTag) {
ASSERT_EQ(cnt, 0);
ASSERT_TRUE(s_set.ok());
- auto s2 = indexer.Update(*s, key1, ns);
+ auto s2 = indexer.Update(*s, key1);
ASSERT_TRUE(s2);
auto key = redis::SearchKey(ns, idxname,
"x").ConstructTagFieldData("food", key1);
@@ -177,7 +177,7 @@ TEST_F(IndexerTest, JsonTag) {
auto s_set = db.Set(key1, "$", R"({"x": "food,kitChen,Beauty"})");
ASSERT_TRUE(s_set.ok());
- auto s2 = indexer.Update(*s, key1, ns);
+ auto s2 = indexer.Update(*s, key1);
ASSERT_TRUE(s2);
auto key = redis::SearchKey(ns, idxname,
"$.x").ConstructTagFieldData("food", key1);
@@ -210,7 +210,7 @@ TEST_F(IndexerTest, JsonTag) {
auto s_set = db.Set(key1, "$.x", "\"Clothing,FOOD,sport\"");
ASSERT_TRUE(s_set.ok());
- auto s2 = indexer.Update(*s, key1, ns);
+ auto s2 = indexer.Update(*s, key1);
ASSERT_TRUE(s2);
auto key = redis::SearchKey(ns, idxname,
"$.x").ConstructTagFieldData("food", key1);
@@ -243,3 +243,38 @@ TEST_F(IndexerTest, JsonTag) {
ASSERT_TRUE(s3.IsNotFound());
}
}
+
+TEST_F(IndexerTest, JsonTagBuildIndex) {
+ redis::Json db(storage_.get(), ns);
+ auto cfhandler = storage_->GetCFHandle(ColumnFamilyID::Search);
+
+ auto key1 = "idxtestjson:k2";
+ auto idxname = "jsontest";
+
+ {
+ auto s_set = db.Set(key1, "$", R"({"x": "food,kitChen,Beauty"})");
+ ASSERT_TRUE(s_set.ok());
+
+ auto s2 = indexer.updater_list[1].Build();
+ ASSERT_EQ(s2.Msg(), Status::ok_msg);
+
+ auto key = redis::SearchKey(ns, idxname,
"$.x").ConstructTagFieldData("food", key1);
+
+ std::string val;
+ auto s3 = storage_->Get(storage_->DefaultMultiGetOptions(), cfhandler,
key, &val);
+ ASSERT_TRUE(s3.ok());
+ ASSERT_EQ(val, "");
+
+ key = redis::SearchKey(ns, idxname,
"$.x").ConstructTagFieldData("kitchen", key1);
+
+ s3 = storage_->Get(storage_->DefaultMultiGetOptions(), cfhandler, key,
&val);
+ ASSERT_TRUE(s3.ok());
+ ASSERT_EQ(val, "");
+
+ key = redis::SearchKey(ns, idxname, "$.x").ConstructTagFieldData("beauty",
key1);
+
+ s3 = storage_->Get(storage_->DefaultMultiGetOptions(), cfhandler, key,
&val);
+ ASSERT_TRUE(s3.ok());
+ ASSERT_EQ(val, "");
+ }
+}
diff --git a/tests/cppunit/ir_dot_dumper_test.cc
b/tests/cppunit/ir_dot_dumper_test.cc
index d616f290..bc50c7a9 100644
--- a/tests/cppunit/ir_dot_dumper_test.cc
+++ b/tests/cppunit/ir_dot_dumper_test.cc
@@ -72,7 +72,7 @@ static IndexMap MakeIndexMap() {
auto f4 = FieldInfo("n2", std::make_unique<redis::NumericFieldMetadata>());
auto f5 = FieldInfo("n3", std::make_unique<redis::NumericFieldMetadata>());
f5.metadata->noindex = true;
- auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata());
+ auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata(), "");
ia->Add(std::move(f1));
ia->Add(std::move(f2));
ia->Add(std::move(f3));
diff --git a/tests/cppunit/ir_pass_test.cc b/tests/cppunit/ir_pass_test.cc
index 76318d79..70811f81 100644
--- a/tests/cppunit/ir_pass_test.cc
+++ b/tests/cppunit/ir_pass_test.cc
@@ -176,7 +176,7 @@ static IndexMap MakeIndexMap() {
auto f4 = FieldInfo("n2", std::make_unique<redis::NumericFieldMetadata>());
auto f5 = FieldInfo("n3", std::make_unique<redis::NumericFieldMetadata>());
f5.metadata->noindex = true;
- auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata());
+ auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata(), "");
ia->Add(std::move(f1));
ia->Add(std::move(f2));
ia->Add(std::move(f3));
diff --git a/tests/cppunit/ir_sema_checker_test.cc
b/tests/cppunit/ir_sema_checker_test.cc
index a12beea7..8223e5ed 100644
--- a/tests/cppunit/ir_sema_checker_test.cc
+++ b/tests/cppunit/ir_sema_checker_test.cc
@@ -38,7 +38,7 @@ static IndexMap MakeIndexMap() {
auto f1 = FieldInfo("f1", std::make_unique<redis::TagFieldMetadata>());
auto f2 = FieldInfo("f2", std::make_unique<redis::NumericFieldMetadata>());
auto f3 = FieldInfo("f3", std::make_unique<redis::NumericFieldMetadata>());
- auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata());
+ auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata(), "");
ia->Add(std::move(f1));
ia->Add(std::move(f2));
ia->Add(std::move(f3));
diff --git a/tests/cppunit/plan_executor_test.cc
b/tests/cppunit/plan_executor_test.cc
index bad978d0..e41ea94b 100644
--- a/tests/cppunit/plan_executor_test.cc
+++ b/tests/cppunit/plan_executor_test.cc
@@ -40,8 +40,7 @@ static IndexMap MakeIndexMap() {
auto f1 = FieldInfo("f1", std::make_unique<redis::TagFieldMetadata>());
auto f2 = FieldInfo("f2", std::make_unique<redis::NumericFieldMetadata>());
auto f3 = FieldInfo("f3", std::make_unique<redis::NumericFieldMetadata>());
- auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata());
- ia->ns = "search_ns";
+ auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata(),
"search_ns");
ia->metadata.on_data_type = redis::IndexOnDataType::JSON;
ia->prefixes.prefixes.emplace_back("test2:");
ia->prefixes.prefixes.emplace_back("test4:");
@@ -318,7 +317,7 @@ struct ScopedUpdate {
ScopedUpdate& operator=(ScopedUpdate&&) = delete;
~ScopedUpdate() {
- auto s = redis::GlobalIndexer::Update(rr, key, ns);
+ auto s = redis::GlobalIndexer::Update(rr, key);
EXPECT_EQ(s.Msg(), Status::ok_msg);
}
};