This is an automated email from the ASF dual-hosted git repository.
twice pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git
The following commit(s) were added to refs/heads/unstable by this push:
new 8fae9033 Unify index info structure between indexer and sema checker
(#2240)
8fae9033 is described below
commit 8fae903350c00b60bc780e5217eecbad649c2d6c
Author: Twice <[email protected]>
AuthorDate: Sat Apr 13 14:42:47 2024 +0900
Unify index info structure between indexer and sema checker (#2240)
---
src/search/index_info.h | 61 +++++++++++++++++++++++++++++++++++
src/search/indexer.cc | 44 ++++++++++++-------------
src/search/indexer.h | 25 +++++---------
src/search/ir.h | 3 ++
src/search/ir_sema_checker.h | 43 ++++--------------------
tests/cppunit/indexer_test.cc | 32 +++++++++++-------
tests/cppunit/ir_sema_checker_test.cc | 2 --
7 files changed, 120 insertions(+), 90 deletions(-)
diff --git a/src/search/index_info.h b/src/search/index_info.h
new file mode 100644
index 00000000..e6e95b72
--- /dev/null
+++ b/src/search/index_info.h
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <string>
+
+#include "search_encoding.h"
+
+namespace kqir {
+
+struct IndexInfo;
+
+struct FieldInfo {
+ std::string name;
+ IndexInfo *index = nullptr;
+ std::unique_ptr<redis::SearchFieldMetadata> metadata;
+
+ FieldInfo(std::string name, std::unique_ptr<redis::SearchFieldMetadata>
&&metadata)
+ : name(std::move(name)), metadata(std::move(metadata)) {}
+};
+
+struct IndexInfo {
+ using FieldMap = std::map<std::string, FieldInfo>;
+
+ std::string name;
+ SearchMetadata metadata;
+ FieldMap fields;
+ redis::SearchPrefixesMetadata prefixes;
+
+ IndexInfo(std::string name, SearchMetadata metadata) :
name(std::move(name)), metadata(std::move(metadata)) {}
+
+ void Add(FieldInfo &&field) {
+ const auto &name = field.name;
+ field.index = this;
+ fields.emplace(name, std::move(field));
+ }
+};
+
+using IndexMap = std::map<std::string, IndexInfo>;
+
+} // namespace kqir
diff --git a/src/search/indexer.cc b/src/search/indexer.cc
index df8f8eb3..3e7bbf1f 100644
--- a/src/search/indexer.cc
+++ b/src/search/indexer.cc
@@ -77,7 +77,7 @@ rocksdb::Status
FieldValueRetriever::Retrieve(std::string_view field, std::strin
}
}
-StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key,
const std::string &ns) {
+StatusOr<IndexUpdater::FieldValues> IndexUpdater::Record(std::string_view key,
const std::string &ns) const {
Database db(indexer->storage, ns);
RedisType type = kRedisNone;
@@ -87,16 +87,16 @@ StatusOr<IndexUpdater::FieldValues>
IndexUpdater::Record(std::string_view key, c
// key not exist
if (type == kRedisNone) return FieldValues();
- if (type != static_cast<RedisType>(metadata.on_data_type)) {
+ if (type != static_cast<RedisType>(info->metadata.on_data_type)) {
// not the expected type, stop record
return {Status::TypeMismatched};
}
- auto retriever =
GET_OR_RET(FieldValueRetriever::Create(metadata.on_data_type, key,
indexer->storage, ns));
+ auto retriever =
GET_OR_RET(FieldValueRetriever::Create(info->metadata.on_data_type, key,
indexer->storage, ns));
FieldValues values;
- for (const auto &[field, info] : fields) {
- if (info->noindex) {
+ for (const auto &[field, i] : info->fields) {
+ if (i.metadata->noindex) {
continue;
}
@@ -112,20 +112,20 @@ StatusOr<IndexUpdater::FieldValues>
IndexUpdater::Record(std::string_view key, c
}
Status IndexUpdater::UpdateIndex(const std::string &field, std::string_view
key, std::string_view original,
- std::string_view current, const std::string
&ns) {
+ std::string_view current, const std::string
&ns) const {
if (original == current) {
// the value of this field is unchanged, no need to update
return Status::OK();
}
- auto iter = fields.find(field);
- if (iter == fields.end()) {
+ auto iter = info->fields.find(field);
+ if (iter == info->fields.end()) {
return {Status::NotOK, "No such field to do index updating"};
}
- auto *metadata = iter->second.get();
+ auto *metadata = iter->second.metadata.get();
auto *storage = indexer->storage;
- auto ns_key = ComposeNamespaceKey(ns, name, storage->IsSlotIdEncoded());
+ auto ns_key = ComposeNamespaceKey(ns, info->name,
storage->IsSlotIdEncoded());
if (auto tag = dynamic_cast<SearchTagFieldMetadata *>(metadata)) {
const char delim[] = {tag->separator, '\0'};
auto original_tags = util::Split(original, delim);
@@ -163,14 +163,14 @@ Status IndexUpdater::UpdateIndex(const std::string
&field, std::string_view key,
for (const auto &tag : tags_to_delete) {
auto sub_key = ConstructTagFieldSubkey(field, tag, key);
- auto index_key = InternalKey(ns_key, sub_key, this->metadata.version,
storage->IsSlotIdEncoded());
+ auto index_key = InternalKey(ns_key, sub_key, info->metadata.version,
storage->IsSlotIdEncoded());
batch->Delete(cf_handle, index_key.Encode());
}
for (const auto &tag : tags_to_add) {
auto sub_key = ConstructTagFieldSubkey(field, tag, key);
- auto index_key = InternalKey(ns_key, sub_key, this->metadata.version,
storage->IsSlotIdEncoded());
+ auto index_key = InternalKey(ns_key, sub_key, info->metadata.version,
storage->IsSlotIdEncoded());
batch->Put(cf_handle, index_key.Encode(), Slice());
}
@@ -184,7 +184,7 @@ Status IndexUpdater::UpdateIndex(const std::string &field,
std::string_view key,
if (!original.empty()) {
auto original_num = GET_OR_RET(ParseFloat(std::string(original.begin(),
original.end())));
auto sub_key = ConstructNumericFieldSubkey(field, original_num, key);
- auto index_key = InternalKey(ns_key, sub_key, this->metadata.version,
storage->IsSlotIdEncoded());
+ auto index_key = InternalKey(ns_key, sub_key, info->metadata.version,
storage->IsSlotIdEncoded());
batch->Delete(cf_handle, index_key.Encode());
}
@@ -192,7 +192,7 @@ Status IndexUpdater::UpdateIndex(const std::string &field,
std::string_view key,
if (!current.empty()) {
auto current_num = GET_OR_RET(ParseFloat(std::string(current.begin(),
current.end())));
auto sub_key = ConstructNumericFieldSubkey(field, current_num, key);
- auto index_key = InternalKey(ns_key, sub_key, this->metadata.version,
storage->IsSlotIdEncoded());
+ auto index_key = InternalKey(ns_key, sub_key, info->metadata.version,
storage->IsSlotIdEncoded());
batch->Put(cf_handle, index_key.Encode(), Slice());
}
@@ -206,11 +206,11 @@ Status IndexUpdater::UpdateIndex(const std::string
&field, std::string_view key,
return Status::OK();
}
-Status IndexUpdater::Update(const FieldValues &original, std::string_view key,
const std::string &ns) {
+Status IndexUpdater::Update(const FieldValues &original, std::string_view key,
const std::string &ns) const {
auto current = GET_OR_RET(Record(key, ns));
- for (const auto &[field, info] : fields) {
- if (info->noindex) {
+ for (const auto &[field, i] : info->fields) {
+ if (i.metadata->noindex) {
continue;
}
@@ -230,9 +230,9 @@ Status IndexUpdater::Update(const FieldValues &original,
std::string_view key, c
}
void GlobalIndexer::Add(IndexUpdater updater) {
- auto &up = updaters.emplace_back(std::move(updater));
- for (const auto &prefix : up.prefixes) {
- prefix_map.insert(prefix, &up);
+ updater.indexer = this;
+ for (const auto &prefix : updater.info->prefixes.prefixes) {
+ prefix_map.insert(prefix, updater);
}
}
@@ -240,14 +240,14 @@ StatusOr<GlobalIndexer::RecordResult>
GlobalIndexer::Record(std::string_view key
auto iter = prefix_map.longest_prefix(key);
if (iter != prefix_map.end()) {
auto updater = iter.value();
- return std::make_pair(updater, GET_OR_RET(updater->Record(key, ns)));
+ return std::make_pair(updater, GET_OR_RET(updater.Record(key, ns)));
}
return {Status::NoPrefixMatched};
}
Status GlobalIndexer::Update(const RecordResult &original, std::string_view
key, const std::string &ns) {
- return original.first->Update(original.second, key, ns);
+ return original.first.Update(original.second, key, ns);
}
} // namespace redis
diff --git a/src/search/indexer.h b/src/search/indexer.h
index c404ecbb..dc1f03e0 100644
--- a/src/search/indexer.h
+++ b/src/search/indexer.h
@@ -29,6 +29,7 @@
#include "commands/commander.h"
#include "config/config.h"
+#include "index_info.h"
#include "indexer.h"
#include "search/search_encoding.h"
#include "server/server.h"
@@ -69,32 +70,22 @@ struct FieldValueRetriever {
struct IndexUpdater {
using FieldValues = std::map<std::string, std::string>;
- std::string name;
- SearchMetadata metadata;
- std::vector<std::string> prefixes;
- std::map<std::string, std::unique_ptr<SearchFieldMetadata>> fields;
+ const kqir::IndexInfo *info = nullptr;
GlobalIndexer *indexer = nullptr;
- IndexUpdater(const IndexUpdater &) = delete;
- IndexUpdater(IndexUpdater &&) = default;
+ explicit IndexUpdater(const kqir::IndexInfo *info) : info(info) {}
- IndexUpdater &operator=(IndexUpdater &&) = default;
- IndexUpdater &operator=(const IndexUpdater &) = delete;
-
- ~IndexUpdater() = default;
-
- StatusOr<FieldValues> Record(std::string_view key, const std::string &ns);
+ StatusOr<FieldValues> Record(std::string_view key, const std::string &ns)
const;
Status UpdateIndex(const std::string &field, std::string_view key,
std::string_view original,
- std::string_view current, const std::string &ns);
- Status Update(const FieldValues &original, std::string_view key, const
std::string &ns);
+ std::string_view current, const std::string &ns) const;
+ Status Update(const FieldValues &original, std::string_view key, const
std::string &ns) const;
};
struct GlobalIndexer {
using FieldValues = IndexUpdater::FieldValues;
- using RecordResult = std::pair<IndexUpdater *, FieldValues>;
+ using RecordResult = std::pair<IndexUpdater, FieldValues>;
- std::deque<IndexUpdater> updaters;
- tsl::htrie_map<char, IndexUpdater *> prefix_map;
+ tsl::htrie_map<char, IndexUpdater> prefix_map;
engine::Storage *storage = nullptr;
diff --git a/src/search/ir.h b/src/search/ir.h
index 6bcabff7..2b85b458 100644
--- a/src/search/ir.h
+++ b/src/search/ir.h
@@ -34,6 +34,7 @@
#include "fmt/core.h"
#include "ir_iterator.h"
+#include "search/index_info.h"
#include "string_util.h"
#include "type_util.h"
@@ -76,6 +77,7 @@ struct Ref : Node {};
struct FieldRef : Ref {
std::string name;
+ const FieldInfo *info = nullptr;
explicit FieldRef(std::string name) : name(std::move(name)) {}
@@ -348,6 +350,7 @@ struct SelectClause : Node {
struct IndexRef : Ref {
std::string name;
+ const IndexInfo *info = nullptr;
explicit IndexRef(std::string name) : name(std::move(name)) {}
diff --git a/src/search/ir_sema_checker.h b/src/search/ir_sema_checker.h
index 21c0ea0f..471c3a3f 100644
--- a/src/search/ir_sema_checker.h
+++ b/src/search/ir_sema_checker.h
@@ -23,49 +23,18 @@
#include <map>
#include <memory>
+#include "index_info.h"
#include "ir.h"
#include "search_encoding.h"
#include "storage/redis_metadata.h"
namespace kqir {
-struct IndexInfo;
-
-struct FieldInfo {
- std::string name;
- IndexInfo *index = nullptr;
- std::unique_ptr<redis::SearchFieldMetadata> metadata;
-
- FieldInfo(std::string name, std::unique_ptr<redis::SearchFieldMetadata>
&&metadata)
- : name(std::move(name)), metadata(std::move(metadata)) {}
-};
-
-struct IndexInfo {
- using FieldMap = std::map<std::string, FieldInfo>;
-
- std::string name;
- SearchMetadata metadata;
- FieldMap fields;
-
- IndexInfo(std::string name, SearchMetadata metadata) :
name(std::move(name)), metadata(std::move(metadata)) {}
-
- void Add(FieldInfo &&field) {
- const auto &name = field.name;
- field.index = this;
- fields.emplace(name, std::move(field));
- }
-};
-
-using IndexMap = std::map<std::string, IndexInfo>;
-
struct SemaChecker {
const IndexMap &index_map;
const IndexInfo *current_index = nullptr;
- using Result = std::map<const Node *, std::variant<const FieldInfo *, const
IndexInfo *>>;
- Result result;
-
explicit SemaChecker(const IndexMap &index_map) : index_map(index_map) {}
Status Check(Node *node) {
@@ -73,7 +42,7 @@ struct SemaChecker {
auto index_name = v->index->name;
if (auto iter = index_map.find(index_name); iter != index_map.end()) {
current_index = &iter->second;
- result.emplace(v->index.get(), current_index);
+ v->index->info = current_index;
GET_OR_RET(Check(v->select.get()));
GET_OR_RET(Check(v->query_expr.get()));
@@ -88,7 +57,7 @@ struct SemaChecker {
if (auto iter = current_index->fields.find(v->field->name); iter ==
current_index->fields.end()) {
return {Status::NotOK, fmt::format("field `{}` not found in index
`{}`", v->field->name, current_index->name)};
} else {
- result.emplace(v->field.get(), &iter->second);
+ v->field->info = &iter->second;
}
} else if (auto v = dynamic_cast<AndExpr *>(node)) {
for (const auto &n : v->inners) {
@@ -106,7 +75,7 @@ struct SemaChecker {
} else if (auto meta = dynamic_cast<redis::SearchTagFieldMetadata
*>(iter->second.metadata.get()); !meta) {
return {Status::NotOK, fmt::format("field `{}` is not a tag field",
v->field->name)};
} else {
- result.emplace(v->field.get(), &iter->second);
+ v->field->info = &iter->second;
if (v->tag->val.empty()) {
return {Status::NotOK, "tag cannot be an empty string"};
@@ -122,14 +91,14 @@ struct SemaChecker {
} else if (!dynamic_cast<redis::SearchNumericFieldMetadata
*>(iter->second.metadata.get())) {
return {Status::NotOK, fmt::format("field `{}` is not a numeric
field", v->field->name)};
} else {
- result.emplace(v->field.get(), &iter->second);
+ v->field->info = &iter->second;
}
} else if (auto v = dynamic_cast<SelectClause *>(node)) {
for (const auto &n : v->fields) {
if (auto iter = current_index->fields.find(n->name); iter ==
current_index->fields.end()) {
return {Status::NotOK, fmt::format("field `{}` not found in index
`{}`", n->name, current_index->name)};
} else {
- result.emplace(n.get(), &iter->second);
+ n->info = &iter->second;
}
}
} else if (auto v [[maybe_unused]] = dynamic_cast<BoolLiteral *>(node)) {
diff --git a/tests/cppunit/indexer_test.cc b/tests/cppunit/indexer_test.cc
index f30b45ca..ae5a045e 100644
--- a/tests/cppunit/indexer_test.cc
+++ b/tests/cppunit/indexer_test.cc
@@ -25,32 +25,40 @@
#include <memory>
+#include "search/index_info.h"
#include "search/search_encoding.h"
#include "storage/redis_metadata.h"
#include "types/redis_hash.h"
struct IndexerTest : TestBase {
redis::GlobalIndexer indexer;
+ kqir::IndexMap map;
std::string ns = "index_test";
IndexerTest() : indexer(storage_.get()) {
SearchMetadata hash_field_meta(false);
hash_field_meta.on_data_type = SearchOnDataType::HASH;
- std::map<std::string, std::unique_ptr<redis::SearchFieldMetadata>>
hash_fields;
- hash_fields.emplace("x",
std::make_unique<redis::SearchTagFieldMetadata>());
- hash_fields.emplace("y",
std::make_unique<redis::SearchNumericFieldMetadata>());
+ kqir::IndexInfo hash_info("hashtest", hash_field_meta);
+ hash_info.Add(kqir::FieldInfo("x",
std::make_unique<redis::SearchTagFieldMetadata>()));
+ hash_info.Add(kqir::FieldInfo("y",
std::make_unique<redis::SearchNumericFieldMetadata>()));
+ hash_info.prefixes.prefixes.emplace_back("idxtesthash");
- redis::IndexUpdater hash_updater{"hashtest", hash_field_meta,
{"idxtesthash"}, std::move(hash_fields), &indexer};
+ map.emplace("hashtest", std::move(hash_info));
+
+ redis::IndexUpdater hash_updater{&map.at("hashtest")};
SearchMetadata json_field_meta(false);
json_field_meta.on_data_type = SearchOnDataType::JSON;
- std::map<std::string, std::unique_ptr<redis::SearchFieldMetadata>>
json_fields;
- json_fields.emplace("$.x",
std::make_unique<redis::SearchTagFieldMetadata>());
- json_fields.emplace("$.y",
std::make_unique<redis::SearchNumericFieldMetadata>());
+ kqir::IndexInfo json_info("jsontest", json_field_meta);
+ json_info.Add(kqir::FieldInfo("$.x",
std::make_unique<redis::SearchTagFieldMetadata>()));
+ json_info.Add(kqir::FieldInfo("$.y",
std::make_unique<redis::SearchNumericFieldMetadata>()));
+ json_info.prefixes.prefixes.emplace_back("idxtestjson");
+
+ map.emplace("jsontest", std::move(json_info));
- redis::IndexUpdater json_updater{"jsontest", json_field_meta,
{"idxtestjson"}, std::move(json_fields), &indexer};
+ redis::IndexUpdater json_updater{&map.at("jsontest")};
indexer.Add(std::move(hash_updater));
indexer.Add(std::move(json_updater));
@@ -72,7 +80,7 @@ TEST_F(IndexerTest, HashTag) {
{
auto s = indexer.Record(key1, ns);
ASSERT_TRUE(s);
- ASSERT_EQ(s->first->name, idxname);
+ ASSERT_EQ(s->first.info->name, idxname);
ASSERT_TRUE(s->second.empty());
uint64_t cnt = 0;
@@ -111,7 +119,7 @@ TEST_F(IndexerTest, HashTag) {
{
auto s = indexer.Record(key1, ns);
ASSERT_TRUE(s);
- ASSERT_EQ(s->first->name, idxname);
+ ASSERT_EQ(s->first.info->name, idxname);
ASSERT_EQ(s->second.size(), 1);
ASSERT_EQ(s->second["x"], "food,kitChen,Beauty");
@@ -179,7 +187,7 @@ TEST_F(IndexerTest, JsonTag) {
{
auto s = indexer.Record(key1, ns);
ASSERT_TRUE(s);
- ASSERT_EQ(s->first->name, idxname);
+ ASSERT_EQ(s->first.info->name, idxname);
ASSERT_TRUE(s->second.empty());
auto s_set = db.Set(key1, "$", R"({"x": "food,kitChen,Beauty"})");
@@ -217,7 +225,7 @@ TEST_F(IndexerTest, JsonTag) {
{
auto s = indexer.Record(key1, ns);
ASSERT_TRUE(s);
- ASSERT_EQ(s->first->name, idxname);
+ ASSERT_EQ(s->first.info->name, idxname);
ASSERT_EQ(s->second.size(), 1);
ASSERT_EQ(s->second["$.x"], "food,kitChen,Beauty");
diff --git a/tests/cppunit/ir_sema_checker_test.cc
b/tests/cppunit/ir_sema_checker_test.cc
index db222f6c..75beb686 100644
--- a/tests/cppunit/ir_sema_checker_test.cc
+++ b/tests/cppunit/ir_sema_checker_test.cc
@@ -76,7 +76,5 @@ TEST(SemaCheckerTest, Simple) {
auto root = *Parse("select f1 from ia where f1 hastag \"a\" and f2 = 1
order by f3");
ASSERT_EQ(checker.Check(root.get()).Msg(), "ok");
-
- ASSERT_EQ(checker.result.size(), 5);
}
}