This is an automated email from the ASF dual-hosted git repository.

twice pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git


The following commit(s) were added to refs/heads/unstable by this push:
     new d2e0feb9 feat(search): add index building method to IndexUpdater 
(#2346)
d2e0feb9 is described below

commit d2e0feb989bd5855480deee61ebf932327e5ca93
Author: Twice <[email protected]>
AuthorDate: Sat Jun 1 16:32:06 2024 +0900

    feat(search): add index building method to IndexUpdater (#2346)
---
 src/common/db_util.h                  |  2 ++
 src/search/index_info.h               |  4 ++-
 src/search/indexer.cc                 | 51 +++++++++++++++++++++++++++--------
 src/search/indexer.h                  | 11 +++++---
 tests/cppunit/indexer_test.cc         | 49 ++++++++++++++++++++++++++++-----
 tests/cppunit/ir_dot_dumper_test.cc   |  2 +-
 tests/cppunit/ir_pass_test.cc         |  2 +-
 tests/cppunit/ir_sema_checker_test.cc |  2 +-
 tests/cppunit/plan_executor_test.cc   |  5 ++--
 9 files changed, 99 insertions(+), 29 deletions(-)

diff --git a/src/common/db_util.h b/src/common/db_util.h
index 8df34daa..d29262f2 100644
--- a/src/common/db_util.h
+++ b/src/common/db_util.h
@@ -37,6 +37,8 @@ struct UniqueIterator : std::unique_ptr<rocksdb::Iterator> {
   UniqueIterator(engine::Storage* storage, const rocksdb::ReadOptions& options,
                  rocksdb::ColumnFamilyHandle* column_family)
       : BaseType(storage->NewIterator(options, column_family)) {}
+  UniqueIterator(engine::Storage* storage, const rocksdb::ReadOptions& 
options, ColumnFamilyID cf)
+      : BaseType(storage->NewIterator(options, storage->GetCFHandle(cf))) {}
   UniqueIterator(engine::Storage* storage, const rocksdb::ReadOptions& options)
       : BaseType(storage->NewIterator(options)) {}
 };
diff --git a/src/search/index_info.h b/src/search/index_info.h
index 59abc694..ba5e6af3 100644
--- a/src/search/index_info.h
+++ b/src/search/index_info.h
@@ -23,6 +23,7 @@
 #include <map>
 #include <memory>
 #include <string>
+#include <utility>
 
 #include "search_encoding.h"
 
@@ -56,7 +57,8 @@ struct IndexInfo {
   redis::IndexPrefixes prefixes;
   std::string ns;
 
-  IndexInfo(std::string name, redis::IndexMetadata metadata) : 
name(std::move(name)), metadata(std::move(metadata)) {}
+  IndexInfo(std::string name, redis::IndexMetadata metadata, std::string ns)
+      : name(std::move(name)), metadata(std::move(metadata)), 
ns(std::move(ns)) {}
 
   void Add(FieldInfo &&field) {
     const auto &name = field.name;
diff --git a/src/search/indexer.cc b/src/search/indexer.cc
index 752d42bd..8a90c8f0 100644
--- a/src/search/indexer.cc
+++ b/src/search/indexer.cc
@@ -23,6 +23,7 @@
 #include <algorithm>
 #include <variant>
 
+#include "db_util.h"
 #include "parse_util.h"
 #include "search/search_encoding.h"
 #include "storage/redis_metadata.h"
@@ -77,7 +78,8 @@ rocksdb::Status 
FieldValueRetriever::Retrieve(std::string_view field, std::strin
   }
 }
 
-StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key, 
const std::string &ns) const {
+StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key) 
const {
+  const auto &ns = info->ns;
   Database db(indexer->storage, ns);
 
   RedisType type = kRedisNone;
@@ -191,7 +193,7 @@ Status IndexUpdater::UpdateNumericIndex(std::string_view 
key, std::string_view o
 }
 
 Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view 
key, std::string_view original,
-                                 std::string_view current, const std::string 
&ns) const {
+                                 std::string_view current) const {
   if (original == current) {
     // the value of this field is unchanged, no need to update
     return Status::OK();
@@ -203,7 +205,7 @@ Status IndexUpdater::UpdateIndex(const std::string &field, 
std::string_view key,
   }
 
   auto *metadata = iter->second.metadata.get();
-  SearchKey search_key(ns, info->name, field);
+  SearchKey search_key(info->ns, info->name, field);
   if (auto tag = dynamic_cast<TagFieldMetadata *>(metadata)) {
     GET_OR_RET(UpdateTagIndex(key, original, current, search_key, tag));
   } else if (auto numeric [[maybe_unused]] = dynamic_cast<NumericFieldMetadata 
*>(metadata)) {
@@ -215,8 +217,8 @@ Status IndexUpdater::UpdateIndex(const std::string &field, 
std::string_view key,
   return Status::OK();
 }
 
-Status IndexUpdater::Update(const FieldValues &original, std::string_view key, 
const std::string &ns) const {
-  auto current = GET_OR_RET(Record(key, ns));
+Status IndexUpdater::Update(const FieldValues &original, std::string_view key) 
const {
+  auto current = GET_OR_RET(Record(key));
 
   for (const auto &[field, i] : info->fields) {
     if (i.metadata->noindex) {
@@ -232,7 +234,29 @@ Status IndexUpdater::Update(const FieldValues &original, 
std::string_view key, c
       current_val = it->second;
     }
 
-    GET_OR_RET(UpdateIndex(field, key, original_val, current_val, ns));
+    GET_OR_RET(UpdateIndex(field, key, original_val, current_val));
+  }
+
+  return Status::OK();
+}
+
+Status IndexUpdater::Build() const {
+  auto storage = indexer->storage;
+  util::UniqueIterator iter(storage, storage->DefaultScanOptions(), 
ColumnFamilyID::Metadata);
+
+  for (const auto &prefix : info->prefixes) {
+    auto ns_key = ComposeNamespaceKey(info->ns, prefix, 
storage->IsSlotIdEncoded());
+    for (iter->Seek(ns_key); iter->Valid(); iter->Next()) {
+      if (!iter->key().starts_with(ns_key)) {
+        break;
+      }
+
+      auto [_, key] = ExtractNamespaceKey(iter->key(), 
storage->IsSlotIdEncoded());
+
+      auto s = Update({}, key.ToStringView());
+      if (s.Is<Status::TypeMismatched>()) continue;
+      if (!s.OK()) return s;
+    }
   }
 
   return Status::OK();
@@ -241,22 +265,27 @@ Status IndexUpdater::Update(const FieldValues &original, 
std::string_view key, c
 void GlobalIndexer::Add(IndexUpdater updater) {
   updater.indexer = this;
   for (const auto &prefix : updater.info->prefixes) {
-    prefix_map.insert(prefix, updater);
+    prefix_map.insert(ComposeNamespaceKey(updater.info->ns, prefix, 
storage->IsSlotIdEncoded()), updater);
   }
+  updater_list.push_back(updater);
 }
 
 StatusOr<GlobalIndexer::RecordResult> GlobalIndexer::Record(std::string_view 
key, const std::string &ns) {
-  auto iter = prefix_map.longest_prefix(key);
+  if (updater_list.empty()) {
+    return Status::NoPrefixMatched;
+  }
+
+  auto iter = prefix_map.longest_prefix(ComposeNamespaceKey(ns, key, 
storage->IsSlotIdEncoded()));
   if (iter != prefix_map.end()) {
     auto updater = iter.value();
-    return std::make_pair(updater, GET_OR_RET(updater.Record(key, ns)));
+    return std::make_pair(updater, GET_OR_RET(updater.Record(key)));
   }
 
   return {Status::NoPrefixMatched};
 }
 
-Status GlobalIndexer::Update(const RecordResult &original, std::string_view 
key, const std::string &ns) {
-  return original.first.Update(original.second, key, ns);
+Status GlobalIndexer::Update(const RecordResult &original, std::string_view 
key) {
+  return original.first.Update(original.second, key);
 }
 
 }  // namespace redis
diff --git a/src/search/indexer.h b/src/search/indexer.h
index d6bf37de..34a4cf93 100644
--- a/src/search/indexer.h
+++ b/src/search/indexer.h
@@ -75,10 +75,12 @@ struct IndexUpdater {
 
   explicit IndexUpdater(const kqir::IndexInfo *info) : info(info) {}
 
-  StatusOr<FieldValues> Record(std::string_view key, const std::string &ns) 
const;
+  StatusOr<FieldValues> Record(std::string_view key) const;
   Status UpdateIndex(const std::string &field, std::string_view key, 
std::string_view original,
-                     std::string_view current, const std::string &ns) const;
-  Status Update(const FieldValues &original, std::string_view key, const 
std::string &ns) const;
+                     std::string_view current) const;
+  Status Update(const FieldValues &original, std::string_view key) const;
+
+  Status Build() const;
 
   Status UpdateTagIndex(std::string_view key, std::string_view original, 
std::string_view current,
                         const SearchKey &search_key, const TagFieldMetadata 
*tag) const;
@@ -91,6 +93,7 @@ struct GlobalIndexer {
   using RecordResult = std::pair<IndexUpdater, FieldValues>;
 
   tsl::htrie_map<char, IndexUpdater> prefix_map;
+  std::vector<IndexUpdater> updater_list;
 
   engine::Storage *storage = nullptr;
 
@@ -98,7 +101,7 @@ struct GlobalIndexer {
 
   void Add(IndexUpdater updater);
   StatusOr<RecordResult> Record(std::string_view key, const std::string &ns);
-  static Status Update(const RecordResult &original, std::string_view key, 
const std::string &ns);
+  static Status Update(const RecordResult &original, std::string_view key);
 };
 
 }  // namespace redis
diff --git a/tests/cppunit/indexer_test.cc b/tests/cppunit/indexer_test.cc
index 13779892..b5d7c3f9 100644
--- a/tests/cppunit/indexer_test.cc
+++ b/tests/cppunit/indexer_test.cc
@@ -39,7 +39,7 @@ struct IndexerTest : TestBase {
     redis::IndexMetadata hash_field_meta;
     hash_field_meta.on_data_type = redis::IndexOnDataType::HASH;
 
-    auto hash_info = std::make_unique<kqir::IndexInfo>("hashtest", 
hash_field_meta);
+    auto hash_info = std::make_unique<kqir::IndexInfo>("hashtest", 
hash_field_meta, ns);
     hash_info->Add(kqir::FieldInfo("x", 
std::make_unique<redis::TagFieldMetadata>()));
     hash_info->Add(kqir::FieldInfo("y", 
std::make_unique<redis::NumericFieldMetadata>()));
     hash_info->prefixes.prefixes.emplace_back("idxtesthash");
@@ -51,7 +51,7 @@ struct IndexerTest : TestBase {
     redis::IndexMetadata json_field_meta;
     json_field_meta.on_data_type = redis::IndexOnDataType::JSON;
 
-    auto json_info = std::make_unique<kqir::IndexInfo>("jsontest", 
json_field_meta);
+    auto json_info = std::make_unique<kqir::IndexInfo>("jsontest", 
json_field_meta, ns);
     json_info->Add(kqir::FieldInfo("$.x", 
std::make_unique<redis::TagFieldMetadata>()));
     json_info->Add(kqir::FieldInfo("$.y", 
std::make_unique<redis::NumericFieldMetadata>()));
     json_info->prefixes.prefixes.emplace_back("idxtestjson");
@@ -79,7 +79,7 @@ TEST_F(IndexerTest, HashTag) {
 
   {
     auto s = indexer.Record(key1, ns);
-    ASSERT_TRUE(s);
+    ASSERT_EQ(s.Msg(), Status::ok_msg);
     ASSERT_EQ(s->first.info->name, idxname);
     ASSERT_TRUE(s->second.empty());
 
@@ -87,7 +87,7 @@ TEST_F(IndexerTest, HashTag) {
     db.Set(key1, "x", "food,kitChen,Beauty", &cnt);
     ASSERT_EQ(cnt, 1);
 
-    auto s2 = indexer.Update(*s, key1, ns);
+    auto s2 = indexer.Update(*s, key1);
     ASSERT_TRUE(s2);
 
     auto key = redis::SearchKey(ns, idxname, 
"x").ConstructTagFieldData("food", key1);
@@ -122,7 +122,7 @@ TEST_F(IndexerTest, HashTag) {
     ASSERT_EQ(cnt, 0);
     ASSERT_TRUE(s_set.ok());
 
-    auto s2 = indexer.Update(*s, key1, ns);
+    auto s2 = indexer.Update(*s, key1);
     ASSERT_TRUE(s2);
 
     auto key = redis::SearchKey(ns, idxname, 
"x").ConstructTagFieldData("food", key1);
@@ -177,7 +177,7 @@ TEST_F(IndexerTest, JsonTag) {
     auto s_set = db.Set(key1, "$", R"({"x": "food,kitChen,Beauty"})");
     ASSERT_TRUE(s_set.ok());
 
-    auto s2 = indexer.Update(*s, key1, ns);
+    auto s2 = indexer.Update(*s, key1);
     ASSERT_TRUE(s2);
 
     auto key = redis::SearchKey(ns, idxname, 
"$.x").ConstructTagFieldData("food", key1);
@@ -210,7 +210,7 @@ TEST_F(IndexerTest, JsonTag) {
     auto s_set = db.Set(key1, "$.x", "\"Clothing,FOOD,sport\"");
     ASSERT_TRUE(s_set.ok());
 
-    auto s2 = indexer.Update(*s, key1, ns);
+    auto s2 = indexer.Update(*s, key1);
     ASSERT_TRUE(s2);
 
     auto key = redis::SearchKey(ns, idxname, 
"$.x").ConstructTagFieldData("food", key1);
@@ -243,3 +243,38 @@ TEST_F(IndexerTest, JsonTag) {
     ASSERT_TRUE(s3.IsNotFound());
   }
 }
+
+TEST_F(IndexerTest, JsonTagBuildIndex) {
+  redis::Json db(storage_.get(), ns);
+  auto cfhandler = storage_->GetCFHandle(ColumnFamilyID::Search);
+
+  auto key1 = "idxtestjson:k2";
+  auto idxname = "jsontest";
+
+  {
+    auto s_set = db.Set(key1, "$", R"({"x": "food,kitChen,Beauty"})");
+    ASSERT_TRUE(s_set.ok());
+
+    auto s2 = indexer.updater_list[1].Build();
+    ASSERT_EQ(s2.Msg(), Status::ok_msg);
+
+    auto key = redis::SearchKey(ns, idxname, 
"$.x").ConstructTagFieldData("food", key1);
+
+    std::string val;
+    auto s3 = storage_->Get(storage_->DefaultMultiGetOptions(), cfhandler, 
key, &val);
+    ASSERT_TRUE(s3.ok());
+    ASSERT_EQ(val, "");
+
+    key = redis::SearchKey(ns, idxname, 
"$.x").ConstructTagFieldData("kitchen", key1);
+
+    s3 = storage_->Get(storage_->DefaultMultiGetOptions(), cfhandler, key, 
&val);
+    ASSERT_TRUE(s3.ok());
+    ASSERT_EQ(val, "");
+
+    key = redis::SearchKey(ns, idxname, "$.x").ConstructTagFieldData("beauty", 
key1);
+
+    s3 = storage_->Get(storage_->DefaultMultiGetOptions(), cfhandler, key, 
&val);
+    ASSERT_TRUE(s3.ok());
+    ASSERT_EQ(val, "");
+  }
+}
diff --git a/tests/cppunit/ir_dot_dumper_test.cc 
b/tests/cppunit/ir_dot_dumper_test.cc
index d616f290..bc50c7a9 100644
--- a/tests/cppunit/ir_dot_dumper_test.cc
+++ b/tests/cppunit/ir_dot_dumper_test.cc
@@ -72,7 +72,7 @@ static IndexMap MakeIndexMap() {
   auto f4 = FieldInfo("n2", std::make_unique<redis::NumericFieldMetadata>());
   auto f5 = FieldInfo("n3", std::make_unique<redis::NumericFieldMetadata>());
   f5.metadata->noindex = true;
-  auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata());
+  auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata(), "");
   ia->Add(std::move(f1));
   ia->Add(std::move(f2));
   ia->Add(std::move(f3));
diff --git a/tests/cppunit/ir_pass_test.cc b/tests/cppunit/ir_pass_test.cc
index 76318d79..70811f81 100644
--- a/tests/cppunit/ir_pass_test.cc
+++ b/tests/cppunit/ir_pass_test.cc
@@ -176,7 +176,7 @@ static IndexMap MakeIndexMap() {
   auto f4 = FieldInfo("n2", std::make_unique<redis::NumericFieldMetadata>());
   auto f5 = FieldInfo("n3", std::make_unique<redis::NumericFieldMetadata>());
   f5.metadata->noindex = true;
-  auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata());
+  auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata(), "");
   ia->Add(std::move(f1));
   ia->Add(std::move(f2));
   ia->Add(std::move(f3));
diff --git a/tests/cppunit/ir_sema_checker_test.cc 
b/tests/cppunit/ir_sema_checker_test.cc
index a12beea7..8223e5ed 100644
--- a/tests/cppunit/ir_sema_checker_test.cc
+++ b/tests/cppunit/ir_sema_checker_test.cc
@@ -38,7 +38,7 @@ static IndexMap MakeIndexMap() {
   auto f1 = FieldInfo("f1", std::make_unique<redis::TagFieldMetadata>());
   auto f2 = FieldInfo("f2", std::make_unique<redis::NumericFieldMetadata>());
   auto f3 = FieldInfo("f3", std::make_unique<redis::NumericFieldMetadata>());
-  auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata());
+  auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata(), "");
   ia->Add(std::move(f1));
   ia->Add(std::move(f2));
   ia->Add(std::move(f3));
diff --git a/tests/cppunit/plan_executor_test.cc 
b/tests/cppunit/plan_executor_test.cc
index bad978d0..e41ea94b 100644
--- a/tests/cppunit/plan_executor_test.cc
+++ b/tests/cppunit/plan_executor_test.cc
@@ -40,8 +40,7 @@ static IndexMap MakeIndexMap() {
   auto f1 = FieldInfo("f1", std::make_unique<redis::TagFieldMetadata>());
   auto f2 = FieldInfo("f2", std::make_unique<redis::NumericFieldMetadata>());
   auto f3 = FieldInfo("f3", std::make_unique<redis::NumericFieldMetadata>());
-  auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata());
-  ia->ns = "search_ns";
+  auto ia = std::make_unique<IndexInfo>("ia", redis::IndexMetadata(), 
"search_ns");
   ia->metadata.on_data_type = redis::IndexOnDataType::JSON;
   ia->prefixes.prefixes.emplace_back("test2:");
   ia->prefixes.prefixes.emplace_back("test4:");
@@ -318,7 +317,7 @@ struct ScopedUpdate {
   ScopedUpdate& operator=(ScopedUpdate&&) = delete;
 
   ~ScopedUpdate() {
-    auto s = redis::GlobalIndexer::Update(rr, key, ns);
+    auto s = redis::GlobalIndexer::Update(rr, key);
     EXPECT_EQ(s.Msg(), Status::ok_msg);
   }
 };

Reply via email to