This is an automated email from the ASF dual-hosted git repository.
twice pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/kvrocks.git
The following commit(s) were added to refs/heads/unstable by this push:
new 0a43bade feat(search): support the FT.TAGVALS command (#2564)
0a43bade is described below
commit 0a43bade1a4c3c885947fdd42eacefac934bf1bd
Author: Jonathan Chen <[email protected]>
AuthorDate: Fri Oct 4 05:38:32 2024 -0400
feat(search): support the FT.TAGVALS command (#2564)
Co-authored-by: Twice <[email protected]>
Co-authored-by: Twice <[email protected]>
---
src/commands/cmd_search.cc | 19 +++++++++-
src/search/index_manager.h | 63 ++++++++++++++++++++++++++++++++-
tests/gocase/unit/search/search_test.go | 38 ++++++++++++++++++++
3 files changed, 118 insertions(+), 2 deletions(-)
diff --git a/src/commands/cmd_search.cc b/src/commands/cmd_search.cc
index 11423678..b697e10c 100644
--- a/src/commands/cmd_search.cc
+++ b/src/commands/cmd_search.cc
@@ -34,6 +34,7 @@
#include "search/sql_transformer.h"
#include "server/redis_reply.h"
#include "server/server.h"
+#include "status.h"
#include "string_util.h"
#include "tao/pegtl/string_input.hpp"
@@ -493,6 +494,21 @@ class CommandFTDrop : public Commander {
};
};
+class CommandFTTagVals : public Commander {
+ Status Execute(Server *srv, Connection *conn, std::string *output) override {
+ const auto &index_name = args_[1];
+ const auto &tag_field_name = args_[2];
+ engine::Context ctx(srv->storage);
+ auto field_values = GET_OR_RET(srv->index_mgr.FieldValues(ctx, index_name,
tag_field_name, conn->GetNamespace()));
+
+ std::vector<std::string> result_vec(field_values.begin(),
field_values.end());
+
+ *output = conn->SetOfBulkStrings(result_vec);
+
+ return Status::OK();
+ };
+};
+
REDIS_REGISTER_COMMANDS(Search,
MakeCmdAttr<CommandFTCreate>("ft.create", -2, "write
exclusive no-multi no-script slow", 0, 0,
0),
@@ -502,6 +518,7 @@ REDIS_REGISTER_COMMANDS(Search,
MakeCmdAttr<CommandFTExplain>("ft.explain", -3,
"read-only", 0, 0, 0),
MakeCmdAttr<CommandFTInfo>("ft.info", 2, "read-only",
0, 0, 0),
MakeCmdAttr<CommandFTList>("ft._list", 1, "read-only",
0, 0, 0),
- MakeCmdAttr<CommandFTDrop>("ft.dropindex", 2, "write
exclusive no-multi no-script", 0, 0, 0));
+ MakeCmdAttr<CommandFTDrop>("ft.dropindex", 2, "write
exclusive no-multi no-script", 0, 0, 0),
+ MakeCmdAttr<CommandFTTagVals>("ft.tagvals", 3,
"read-only slow", 0, 0, 0));
} // namespace redis
diff --git a/src/search/index_manager.h b/src/search/index_manager.h
index 6ab88397..aae3489b 100644
--- a/src/search/index_manager.h
+++ b/src/search/index_manager.h
@@ -29,6 +29,7 @@
#include "search/passes/manager.h"
#include "search/plan_executor.h"
#include "search/search_encoding.h"
+#include "search/value.h"
#include "status.h"
#include "storage/storage.h"
#include "string_util.h"
@@ -272,6 +273,66 @@ struct IndexManager {
return Status::OK();
}
-};
+ StatusOr<std::unordered_set<std::string>> FieldValues(engine::Context &ctx,
std::string_view index_name,
+ std::string_view
tag_field_name, const std::string &ns) {
+ auto iter = index_map.Find(index_name, ns);
+ if (iter == index_map.end()) {
+ return {Status::NotOK, fmt::format("Index '{}' not found in namespace
'{}'", index_name, ns)};
+ }
+ const auto &info = iter->second;
+
+ std::string tag_field_name_str(tag_field_name);
+ auto field_it = info->fields.find(tag_field_name_str);
+ if (field_it == info->fields.end()) {
+ return std::unordered_set<std::string>{};
+ }
+ const auto &[field_name, field_info] = *field_it;
+
+ if (!field_info.metadata || field_info.metadata->type !=
IndexFieldType::TAG) {
+ return std::unordered_set<std::string>{};
+ }
+
+ std::unordered_set<std::string> matching_values;
+ util::UniqueIterator index_iter(ctx, ctx.DefaultScanOptions(),
ColumnFamilyID::Search);
+
+ auto index_key = SearchKey(ns, index_name, field_name);
+ std::string field_prefix;
+ index_key.PutNamespace(&field_prefix);
+ SearchKey::PutType(&field_prefix, SearchSubkeyType::FIELD);
+ index_key.PutIndex(&field_prefix);
+ PutSizedString(&field_prefix, field_name);
+
+ std::string last_tag;
+
+ for (index_iter->Seek(field_prefix); index_iter->Valid();
index_iter->Next()) {
+ auto key = index_iter->key();
+
+ if (!key.starts_with(field_prefix)) {
+ break;
+ }
+
+ Slice key_slice = key;
+ key_slice.remove_prefix(field_prefix.size());
+
+ Slice tag_slice;
+ if (!GetSizedString(&key_slice, &tag_slice)) continue;
+
+ std::string current_tag = tag_slice.ToString();
+
+ if (current_tag == last_tag) {
+ continue;
+ }
+
+ last_tag = current_tag;
+ matching_values.insert(std::move(current_tag));
+ }
+
+ if (auto s = index_iter->status(); !s.ok()) {
+ return {Status::NotOK, fmt::format("Failed to iterate over index data:
{}", s.ToString())};
+ }
+
+ return matching_values;
+ }
+};
} // namespace redis
diff --git a/tests/gocase/unit/search/search_test.go
b/tests/gocase/unit/search/search_test.go
index dc4d1c09..b67d1d7f 100644
--- a/tests/gocase/unit/search/search_test.go
+++ b/tests/gocase/unit/search/search_test.go
@@ -154,6 +154,44 @@ func TestSearch(t *testing.T) {
verify(t, res)
})
+ t.Run("FT.TAGVALS with updated index", func(t *testing.T) {
+ require.NoError(t, rdb.Do(ctx, "JSON.SET", "test1:k1", "$",
`{"a": "x,y", "b": 11, "c": [1, 2, 3]}`).Err())
+ require.NoError(t, rdb.Do(ctx, "JSON.SET", "test1:k2", "$",
`{"a": "x,z", "b": 22, "c": [4, 5, 6]}`).Err())
+ require.NoError(t, rdb.Do(ctx, "JSON.SET", "test1:k3", "$",
`{"a": "y,z", "b": 33, "c": [7, 8, 9]}`).Err())
+ require.NoError(t, rdb.Do(ctx, "JSON.SET", "test1:k4", "$",
`{"a": "a,b,c", "b": 44, "c": [10, 11, 12]}`).Err())
+
+ // Helper function to verify tag values
+ verifyTagVals := func(t *testing.T, res *redis.Cmd, expected
map[string]struct{}) {
+ require.NoError(t, res.Err())
+ values, err := res.Result()
+ require.NoError(t, err)
+ for _, val := range values.([]interface{}) {
+ tag := val.(string)
+ _, exists := expected[tag]
+ require.True(t, exists, "Unexpected tag value:
%s", tag)
+ delete(expected, tag)
+ }
+ require.Empty(t, expected, "Missing expected tag
values")
+ }
+
+ // Query tag values for the field 'a', which is indexed as a
tag field
+ res := rdb.Do(ctx, "FT.TAGVALS", "testidx1", "a")
+ expectedA := map[string]struct{}{
+ "a": {}, "b": {}, "c": {}, "x": {}, "y": {}, "z": {},
+ }
+ verifyTagVals(t, res, expectedA)
+
+ // Querying tag values for the field 'b', which is a numeric
field, should return an empty result.
+ res = rdb.Do(ctx, "FT.TAGVALS", "testidx1", "b")
+ expectedB := map[string]struct{}{}
+ verifyTagVals(t, res, expectedB)
+
+ // Querying tag values for the field 'c', which is a vector
field, should return an empty result.
+ res = rdb.Do(ctx, "FT.TAGVALS", "testidx1", "c")
+ expectedC := map[string]struct{}{}
+ verifyTagVals(t, res, expectedC)
+ })
+
t.Run("FT.DROPINDEX", func(t *testing.T) {
require.NoError(t, rdb.Do(ctx, "FT.DROPINDEX",
"testidx1").Err())