This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new a89097f0efe branch-4.0: [feature](search) add variant subcolumn
suppport for search function #56718 (#57049)
a89097f0efe is described below
commit a89097f0efe4f2a0fd1872c93388d84b06ffcbc4
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Oct 17 15:01:01 2025 +0800
branch-4.0: [feature](search) add variant subcolumn suppport for search
function #56718 (#57049)
Cherry-picked from #56718
Co-authored-by: Jack <[email protected]>
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 10 +-
be/src/vec/exprs/vsearch.cpp | 87 ++++++------
be/src/vec/exprs/vsearch.h | 2 +
be/src/vec/functions/function_search.cpp | 61 +++++++--
be/src/vec/functions/function_search.h | 20 ++-
.../org/apache/doris/nereids/search/SearchLexer.g4 | 1 +
.../apache/doris/nereids/search/SearchParser.g4 | 7 +-
.../org/apache/doris/analysis/SearchPredicate.java | 24 +++-
.../doris/nereids/jobs/executor/Rewriter.java | 7 +-
.../rules/rewrite/RewriteSearchToSlots.java | 61 +++++++--
.../trees/expressions/SearchExpression.java | 8 +-
.../functions/scalar/SearchDslParser.java | 28 +++-
gensrc/thrift/Exprs.thrift | 5 +-
.../variant_p0/test_variant_search_subcolumn.out | 32 +++++
.../test_variant_search_subcolumn.groovy | 152 +++++++++++++++++++++
15 files changed, 429 insertions(+), 76 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e189156d68e..b60cfc5e1f6 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -605,10 +605,16 @@ Status
SegmentIterator::_get_row_ranges_by_column_conditions() {
{
if (_opts.runtime_state &&
_opts.runtime_state->query_options().enable_inverted_index_query &&
- has_index_in_iterators()) {
+ (has_index_in_iterators() ||
!_common_expr_ctxs_push_down.empty())) {
SCOPED_RAW_TIMER(&_opts.stats->inverted_index_filter_timer);
size_t input_rows = _row_bitmap.cardinality();
- RETURN_IF_ERROR(_apply_inverted_index());
+ // Only apply column-level inverted index if we have iterators
+ if (has_index_in_iterators()) {
+ RETURN_IF_ERROR(_apply_inverted_index());
+ }
+ // Always apply expr-level index (e.g., search expressions) if we
have common_expr_pushdown
+ // This allows search expressions with variant subcolumns to be
evaluated even when
+ // the segment doesn't have all subcolumns
RETURN_IF_ERROR(_apply_index_expr());
for (auto it = _common_expr_ctxs_push_down.begin();
it != _common_expr_ctxs_push_down.end();) {
diff --git a/be/src/vec/exprs/vsearch.cpp b/be/src/vec/exprs/vsearch.cpp
index fefe61ddb5f..e32209d614b 100644
--- a/be/src/vec/exprs/vsearch.cpp
+++ b/be/src/vec/exprs/vsearch.cpp
@@ -48,39 +48,63 @@ Status collect_search_inputs(const VSearchExpr& expr,
VExprContext* context,
auto index_context = context->get_inverted_index_context();
if (index_context == nullptr) {
- return Status::OK();
+ LOG(WARNING) << "collect_search_inputs: No inverted index context
available";
+ return Status::InternalError("No inverted index context available");
}
+ // Get field bindings for variant subcolumn support
+ const auto& search_param = expr.get_search_param();
+ const auto& field_bindings = search_param.field_bindings;
+
+ int child_index = 0; // Index for iterating through children
for (const auto& child : expr.children()) {
if (child->is_slot_ref()) {
auto* column_slot_ref = assert_cast<VSlotRef*>(child.get());
int column_id = column_slot_ref->column_id();
auto* iterator =
index_context->get_inverted_index_iterator_by_column_id(column_id);
- if (iterator == nullptr) {
- continue;
+
+ // Determine the field_name from field_bindings (for variant
subcolumns)
+ // field_bindings and children should have the same order
+ std::string field_name;
+ if (child_index < field_bindings.size()) {
+ // Use field_name from binding (may include "parent.subcolumn"
for variant)
+ field_name = field_bindings[child_index].field_name;
+ } else {
+ // Fallback to column_name if binding not found
+ field_name = column_slot_ref->column_name();
}
- const auto* storage_name_type =
-
index_context->get_storage_name_and_type_by_column_id(column_id);
- if (storage_name_type == nullptr) {
- auto err_msg = fmt::format(
- "storage_name_type cannot be found for column {} while
in {} evaluate",
- column_id, expr.expr_name());
- LOG(ERROR) << err_msg;
- return Status::InternalError(err_msg);
+ // Only collect fields that have iterators (materialized columns
with indexes)
+ if (iterator != nullptr) {
+ const auto* storage_name_type =
+
index_context->get_storage_name_and_type_by_column_id(column_id);
+ if (storage_name_type == nullptr) {
+ return Status::InternalError("storage_name_type not found
for column {} in {}",
+ column_id, expr.expr_name());
+ }
+
+ bundle->iterators.emplace(field_name, iterator);
+ bundle->field_types.emplace(field_name, *storage_name_type);
+ bundle->column_ids.emplace_back(column_id);
}
- auto column_name = column_slot_ref->column_name();
- bundle->iterators.emplace(column_name, iterator);
- bundle->field_types.emplace(column_name, *storage_name_type);
- bundle->column_ids.emplace_back(column_id);
+ child_index++;
} else if (child->is_literal()) {
auto* literal = assert_cast<VLiteral*>(child.get());
bundle->literal_args.emplace_back(literal->get_column_ptr(),
literal->get_data_type(),
literal->expr_name());
} else {
- LOG(WARNING) << "VSearchExpr: Unsupported child node type
encountered";
- return Status::InvalidArgument("search expression child type
unsupported");
+ // Check if this is ElementAt expression (for variant subcolumn
access)
+ if (child->expr_name() == "element_at" && child_index <
field_bindings.size() &&
+ field_bindings[child_index].__isset.is_variant_subcolumn &&
+ field_bindings[child_index].is_variant_subcolumn) {
+ // Variant subcolumn not materialized - skip, will create
empty BitmapQuery in function_search
+ child_index++;
+ continue;
+ }
+
+ // Not a supported child type
+ return Status::InvalidArgument("Unsupported child node type: {}",
child->expr_name());
}
}
@@ -94,16 +118,6 @@ VSearchExpr::VSearchExpr(const TExprNode& node) :
VExpr(node) {
_search_param = node.search_param;
_original_dsl = _search_param.original_dsl;
}
-
- LOG(INFO) << "VSearchExpr constructor: dsl='" << _original_dsl
- << "', num_children=" << node.num_children
- << ", has_search_param=" << node.__isset.search_param
- << ", children_size=" << _children.size();
-
- for (size_t i = 0; i < _children.size(); i++) {
- LOG(INFO) << "VSearchExpr constructor: child[" << i
- << "] expr_name=" << _children[i]->expr_name();
- }
}
const std::string& VSearchExpr::expr_name() const {
@@ -120,7 +134,7 @@ Status VSearchExpr::execute(VExprContext* context, Block*
block, int* result_col
}
Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t
segment_num_rows) {
- LOG(INFO) << "VSearchExpr::evaluate_inverted_index called with DSL: " <<
_original_dsl;
+ LOG(INFO) << "VSearchExpr::evaluate_inverted_index called, DSL: " <<
_search_param.original_dsl;
if (_search_param.original_dsl.empty()) {
return Status::InvalidArgument("search DSL is empty");
@@ -135,8 +149,14 @@ Status VSearchExpr::evaluate_inverted_index(VExprContext*
context, uint32_t segm
SearchInputBundle bundle;
RETURN_IF_ERROR(collect_search_inputs(*this, context, &bundle));
+ VLOG_DEBUG << "VSearchExpr: bundle.iterators.size()=" <<
bundle.iterators.size();
+
if (bundle.iterators.empty()) {
- LOG(WARNING) << "VSearchExpr: No indexed columns available for
evaluation";
+ LOG(WARNING) << "VSearchExpr: No indexed columns available for
evaluation, DSL: "
+ << _original_dsl;
+ auto empty_bitmap =
InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
+
std::make_shared<roaring::Roaring>());
+ index_context->set_inverted_index_result_for_expr(this,
std::move(empty_bitmap));
return Status::OK();
}
@@ -155,15 +175,6 @@ Status VSearchExpr::evaluate_inverted_index(VExprContext*
context, uint32_t segm
index_context->set_true_for_inverted_index_status(this, column_id);
}
- const auto& data_bitmap = result_bitmap.get_data_bitmap();
- const uint64_t match_count = data_bitmap ? data_bitmap->cardinality() : 0;
- if (match_count > 0) {
- LOG(INFO) << "VSearchExpr: Found " << match_count
- << " matching rows for DSL: " << _search_param.original_dsl;
- } else {
- LOG(INFO) << "VSearchExpr: No matches found for DSL: " <<
_search_param.original_dsl;
- }
-
return Status::OK();
}
diff --git a/be/src/vec/exprs/vsearch.h b/be/src/vec/exprs/vsearch.h
index 3fa8399bc77..d9b5e40985c 100644
--- a/be/src/vec/exprs/vsearch.h
+++ b/be/src/vec/exprs/vsearch.h
@@ -39,6 +39,8 @@ public:
bool can_push_down_to_index() const override { return true; }
+ const TSearchParam& get_search_param() const { return _search_param; }
+
private:
TSearchParam _search_param;
std::string _original_dsl;
diff --git a/be/src/vec/functions/function_search.cpp
b/be/src/vec/functions/function_search.cpp
index 45a6f4c1acf..d53024a0dde 100644
--- a/be/src/vec/functions/function_search.cpp
+++ b/be/src/vec/functions/function_search.cpp
@@ -35,6 +35,7 @@
#include "olap/rowset/segment_v2/index_file_reader.h"
#include "olap/rowset/segment_v2/index_query_context.h"
#include "olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h"
+#include
"olap/rowset/segment_v2/inverted_index/query_v2/bitmap_query/bitmap_query.h"
#include
"olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_query.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/operator.h"
#include
"olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_query.h"
@@ -52,8 +53,21 @@ Status FieldReaderResolver::resolve(const std::string&
field_name,
InvertedIndexQueryType query_type,
FieldReaderBinding* binding) {
DCHECK(binding != nullptr);
+
+ // Check if this is a variant subcolumn
+ bool is_variant_sub = is_variant_subcolumn(field_name);
+
auto data_it = _data_type_with_names.find(field_name);
if (data_it == _data_type_with_names.end()) {
+ // For variant subcolumns, not finding the index is normal (the
subcolumn may not exist in this segment)
+ // Return OK but with null binding to signal "no match"
+ if (is_variant_sub) {
+ VLOG_DEBUG << "Variant subcolumn '" << field_name
+ << "' not found in this segment, treating as no match";
+ *binding = FieldReaderBinding();
+ return Status::OK();
+ }
+ // For normal fields, this is an error
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
"field '{}' not found in inverted index metadata", field_name);
}
@@ -69,6 +83,13 @@ Status FieldReaderResolver::resolve(const std::string&
field_name,
auto iterator_it = _iterators.find(field_name);
if (iterator_it == _iterators.end() || iterator_it->second == nullptr) {
+ // For variant subcolumns, not finding the iterator is normal
+ if (is_variant_sub) {
+ VLOG_DEBUG << "Variant subcolumn '" << field_name
+ << "' iterator not found in this segment, treating as
no match";
+ *binding = FieldReaderBinding();
+ return Status::OK();
+ }
return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
"iterator not found for field '{}'", field_name);
}
@@ -171,12 +192,11 @@ Status
FunctionSearch::evaluate_inverted_index_with_search_param(
data_type_with_names,
std::unordered_map<std::string, IndexIterator*> iterators, uint32_t
num_rows,
InvertedIndexResultBitmap& bitmap_result) const {
- VLOG_DEBUG << "search: Processing DSL '" << search_param.original_dsl <<
"' with "
- << data_type_with_names.size() << " indexed columns and " <<
iterators.size()
- << " iterators";
-
if (iterators.empty() || data_type_with_names.empty()) {
- LOG(INFO) << "No indexed columns or iterators available, returning
empty result";
+ LOG(INFO) << "No indexed columns or iterators available, returning
empty result, dsl:"
+ << search_param.original_dsl;
+ bitmap_result =
InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
+
std::make_shared<roaring::Roaring>());
return Status::OK();
}
@@ -184,14 +204,19 @@ Status
FunctionSearch::evaluate_inverted_index_with_search_param(
context->collection_statistics = std::make_shared<CollectionStatistics>();
context->collection_similarity = std::make_shared<CollectionSimilarity>();
- FieldReaderResolver resolver(data_type_with_names, iterators, context);
+ // Pass field_bindings to resolver for variant subcolumn detection
+ FieldReaderResolver resolver(data_type_with_names, iterators, context,
+ search_param.field_bindings);
query_v2::QueryPtr root_query;
std::string root_binding_key;
RETURN_IF_ERROR(build_query_recursive(*this, search_param.root, context,
resolver, &root_query,
&root_binding_key));
if (root_query == nullptr) {
- LOG(INFO) << "search: Query tree resolved to empty query";
+ LOG(INFO) << "search: Query tree resolved to empty query, dsl:"
+ << search_param.original_dsl;
+ bitmap_result =
InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
+
std::make_shared<roaring::Roaring>());
return Status::OK();
}
@@ -393,9 +418,12 @@ Status FunctionSearch::build_query_recursive(const
FunctionSearch& function,
std::string child_binding_key;
RETURN_IF_ERROR(build_query_recursive(function, child_clause,
context, resolver,
&child_query,
&child_binding_key));
- if (child_query != nullptr) {
- builder.add(child_query, std::move(child_binding_key));
- }
+ // Add all children including empty BitmapQuery
+ // BooleanQuery will handle the logic:
+ // - AND with empty bitmap → result is empty
+ // - OR with empty bitmap → empty bitmap is ignored by OR logic
+ // - NOT with empty bitmap → NOT(empty) = all rows (handled by
BooleanQuery)
+ builder.add(child_query, std::move(child_binding_key));
}
}
@@ -429,6 +457,19 @@ Status FunctionSearch::build_leaf_query(const
FunctionSearch& function, const TS
FieldReaderBinding binding;
RETURN_IF_ERROR(resolver.resolve(field_name, query_type, &binding));
+
+ // Check if binding is empty (variant subcolumn not found in this segment)
+ if (binding.lucene_reader == nullptr) {
+ VLOG_DEBUG << "build_leaf_query: Variant subcolumn '" << field_name
+ << "' has no index in this segment, creating empty
BitmapQuery (no matches)";
+ // Variant subcolumn doesn't exist - create empty BitmapQuery (no
matches)
+ *out = std::make_shared<query_v2::BitmapQuery>(roaring::Roaring());
+ if (binding_key) {
+ binding_key->clear();
+ }
+ return Status::OK();
+ }
+
if (binding_key) {
*binding_key = binding.binding_key;
}
diff --git a/be/src/vec/functions/function_search.h
b/be/src/vec/functions/function_search.h
index 75977aa2440..45cc398915c 100644
--- a/be/src/vec/functions/function_search.h
+++ b/be/src/vec/functions/function_search.h
@@ -57,14 +57,28 @@ public:
const std::unordered_map<std::string,
vectorized::IndexFieldNameAndTypePair>&
data_type_with_names,
const std::unordered_map<std::string, IndexIterator*>& iterators,
- std::shared_ptr<IndexQueryContext> context)
+ std::shared_ptr<IndexQueryContext> context,
+ const std::vector<TSearchFieldBinding>& field_bindings = {})
: _data_type_with_names(data_type_with_names),
_iterators(iterators),
- _context(std::move(context)) {}
+ _context(std::move(context)),
+ _field_bindings(field_bindings) {
+ // Build a lookup map for quick variant subcolumn checks
+ for (const auto& binding : _field_bindings) {
+ if (binding.__isset.is_variant_subcolumn &&
binding.is_variant_subcolumn) {
+ _variant_subcolumn_fields.insert(binding.field_name);
+ }
+ }
+ }
Status resolve(const std::string& field_name, InvertedIndexQueryType
query_type,
FieldReaderBinding* binding);
+ // Check if a field is a variant subcolumn
+ bool is_variant_subcolumn(const std::string& field_name) const {
+ return _variant_subcolumn_fields.count(field_name) > 0;
+ }
+
const std::vector<std::shared_ptr<lucene::index::IndexReader>>& readers()
const {
return _readers;
}
@@ -94,6 +108,8 @@ private:
_data_type_with_names;
const std::unordered_map<std::string, IndexIterator*>& _iterators;
std::shared_ptr<IndexQueryContext> _context;
+ std::vector<TSearchFieldBinding> _field_bindings;
+ std::unordered_set<std::string> _variant_subcolumn_fields;
std::unordered_map<std::string, FieldReaderBinding> _cache;
std::vector<std::shared_ptr<lucene::index::IndexReader>> _readers;
std::unordered_map<std::string,
std::shared_ptr<lucene::index::IndexReader>> _binding_readers;
diff --git
a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
index 812aaf4a7ef..4dab0af2ed2 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
@@ -48,6 +48,7 @@ NOT : 'NOT' | 'not' | '!' ;
LPAREN : '(' ;
RPAREN : ')' ;
COLON : ':' ;
+DOT : '.' ; // Support for variant subcolumn access (e.g.,
field.subcolumn)
QUOTED : '"' QUOTED_CHAR* '"' ;
TERM : TERM_START_CHAR TERM_CHAR* ;
diff --git
a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
index a2a357450bf..0b3b9734883 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
@@ -25,8 +25,11 @@ orClause : andClause (OR andClause)* ;
andClause : notClause (AND notClause)* ;
notClause : NOT atomClause | atomClause ;
atomClause : LPAREN clause RPAREN | fieldQuery ;
-fieldQuery : fieldName COLON searchValue ;
-fieldName : TERM | QUOTED ;
+
+// Support for variant subcolumn paths (e.g., field.subcolumn, field.sub1.sub2)
+fieldQuery : fieldPath COLON searchValue ;
+fieldPath : fieldSegment (DOT fieldSegment)* ;
+fieldSegment : TERM | QUOTED ;
searchValue
: TERM
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
index e8213157991..659e3acee3a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
@@ -146,7 +146,29 @@ public class SearchPredicate extends Predicate {
for (int i = 0; i < qsPlan.fieldBindings.size(); i++) {
SearchDslParser.QsFieldBinding binding =
qsPlan.fieldBindings.get(i);
TSearchFieldBinding thriftBinding = new TSearchFieldBinding();
- thriftBinding.setFieldName(binding.fieldName);
+
+ String fieldPath = binding.fieldName;
+ thriftBinding.setFieldName(fieldPath);
+
+ // Check if this is a variant subcolumn (contains dot)
+ if (fieldPath.contains(".")) {
+ // Parse variant subcolumn path
+ int firstDotPos = fieldPath.indexOf('.');
+ String parentField = fieldPath.substring(0, firstDotPos);
+ String subcolumnPath = fieldPath.substring(firstDotPos + 1);
+
+ thriftBinding.setIsVariantSubcolumn(true);
+ thriftBinding.setParentFieldName(parentField);
+ thriftBinding.setSubcolumnPath(subcolumnPath);
+
+ LOG.info("buildThriftParam: variant subcolumn field='{}',
parent='{}', subcolumn='{}'",
+ fieldPath, parentField, subcolumnPath);
+ } else {
+ thriftBinding.setIsVariantSubcolumn(false);
+ }
+
+ // Set slot index - this is the index in the children array, not
the slotId
+ thriftBinding.setSlotIndex(i);
if (i < this.children.size() && this.children.get(i) instanceof
SlotRef) {
SlotRef slotRef = (SlotRef) this.children.get(i);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index f3f998bce89..7eed68241f5 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -799,7 +799,6 @@ public class Rewriter extends AbstractBatchJobExecutor {
custom(RuleType.ADJUST_CONJUNCTS_RETURN_TYPE,
AdjustConjunctsReturnType::new),
bottomUp(
new
ExpressionRewrite(CheckLegalityAfterRewrite.INSTANCE),
- new RewriteSearchToSlots(),
new CheckMatchExpression(),
new CheckMultiDistinct(),
new CheckRestorePartition(),
@@ -899,6 +898,12 @@ public class Rewriter extends AbstractBatchJobExecutor {
rewriteJobs.addAll(jobs(topic("split multi distinct",
custom(RuleType.DISTINCT_AGG_STRATEGY_SELECTOR, () ->
DistinctAggStrategySelector.INSTANCE))));
+ // Rewrite search function before VariantSubPathPruning
+ // so that ElementAt expressions from search can be processed
+ rewriteJobs.addAll(jobs(
+ bottomUp(new RewriteSearchToSlots())
+ ));
+
if (needSubPathPushDown) {
rewriteJobs.addAll(jobs(
topic("variant element_at push down",
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
index d8a7d47522d..3114fff0593 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
@@ -23,8 +23,10 @@ import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.SearchExpression;
import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Search;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.SearchDslParser;
+import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
@@ -101,16 +103,56 @@ public class RewriteSearchToSlots extends
OneRewriteRuleFactory {
// Create slot reference children from field bindings
List<Expression> slotChildren = new ArrayList<>();
for (SearchDslParser.QsFieldBinding binding :
qsPlan.fieldBindings) {
- Slot slot = findSlotByName(binding.fieldName, scan);
- if (slot == null) {
- throw new AnalysisException(String.format(
- "Field '%s' not found in table for search: %s",
- binding.fieldName, search.getDslString()));
+ String originalFieldName = binding.fieldName;
+ Expression childExpr;
+ String normalizedFieldName;
+
+ // Check if this is a variant subcolumn (contains dot)
+ if (originalFieldName.contains(".")) {
+ int firstDotPos = originalFieldName.indexOf('.');
+ String parentFieldName = originalFieldName.substring(0,
firstDotPos);
+ String subcolumnPath =
originalFieldName.substring(firstDotPos + 1);
+
+ // Find parent slot
+ Slot parentSlot = findSlotByName(parentFieldName, scan);
+ if (parentSlot == null) {
+ throw new AnalysisException(String.format(
+ "Parent field '%s' not found in table for
search: %s",
+ parentFieldName, search.getDslString()));
+ }
+
+ // Verify it's a variant type
+ if (!parentSlot.getDataType().isVariantType()) {
+ throw new AnalysisException(String.format(
+ "Field '%s' is not VARIANT type for subcolumn
access: %s",
+ parentFieldName, search.getDslString()));
+ }
+
+ // Create ElementAt expression for variant subcolumn
+ // This will be converted to an extracted column slot by
VariantSubPathPruning rule
+ // If the subcolumn doesn't exist, ElementAt will remain
and BE will handle it gracefully
+ childExpr = new ElementAt(parentSlot, new
StringLiteral(subcolumnPath));
+ normalizedFieldName = originalFieldName; // Keep full path
for field binding
+
+ LOG.info(
+ "Created ElementAt expression for variant
subcolumn: parent='{}', "
+ + "subcolumn='{}', field_name='{}'",
+ parentFieldName, subcolumnPath,
normalizedFieldName);
+ } else {
+ // Normal field - find slot directly
+ Slot slot = findSlotByName(originalFieldName, scan);
+ if (slot == null) {
+ throw new AnalysisException(String.format(
+ "Field '%s' not found in table for search: %s",
+ originalFieldName, search.getDslString()));
+ }
+ childExpr = slot;
+ normalizedFieldName = slot.getName();
}
- String normalized = slot.getName();
- normalizedFields.put(binding.fieldName, normalized);
- binding.fieldName = normalized;
- slotChildren.add(slot);
+
+ normalizedFields.put(originalFieldName, normalizedFieldName);
+ binding.fieldName = normalizedFieldName;
+ slotChildren.add(childExpr);
}
LOG.info("Rewriting search function: dsl='{}' with {} slot
children",
@@ -127,6 +169,7 @@ public class RewriteSearchToSlots extends
OneRewriteRuleFactory {
}
private Slot findSlotByName(String fieldName, LogicalOlapScan scan) {
+ // Direct match only - variant subcolumns are handled by caller
for (Slot slot : scan.getOutput()) {
if (slot.getName().equalsIgnoreCase(fieldName)) {
return slot;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SearchExpression.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SearchExpression.java
index 6caca420c9f..a811d39552b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SearchExpression.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SearchExpression.java
@@ -18,6 +18,7 @@
package org.apache.doris.nereids.trees.expressions;
import org.apache.doris.nereids.exceptions.UnboundException;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.SearchDslParser;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.BooleanType;
@@ -71,10 +72,11 @@ public class SearchExpression extends Expression {
@Override
public SearchExpression withChildren(List<Expression> children) {
- // Validate that all children are SlotReference
+ // Validate that all children are SlotReference or ElementAt (for
variant subcolumns)
for (Expression child : children) {
- if (!(child instanceof SlotReference)) {
- throw new IllegalArgumentException("SearchExpression children
must be SlotReference instances");
+ if (!(child instanceof SlotReference || child instanceof
ElementAt)) {
+ throw new IllegalArgumentException(
+ "SearchExpression children must be SlotReference or
ElementAt instances");
}
}
return new SearchExpression(dslString, qsPlan, children);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index a913c88f3ad..e9c79acf9dc 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -234,18 +234,32 @@ public class SearchDslParser {
@Override
public QsNode visitFieldQuery(SearchParser.FieldQueryContext ctx) {
- if (ctx.fieldName() == null) {
- throw new RuntimeException("Invalid field query: missing field
name");
+ if (ctx.fieldPath() == null) {
+ throw new RuntimeException("Invalid field query: missing field
path");
}
- String fieldName = ctx.fieldName().getText();
- if (fieldName.startsWith("\"") && fieldName.endsWith("\"")) {
- fieldName = fieldName.substring(1, fieldName.length() - 1);
+
+ // Build complete field path from segments (support
field.subcolumn syntax)
+ StringBuilder fullPath = new StringBuilder();
+ List<SearchParser.FieldSegmentContext> segments =
ctx.fieldPath().fieldSegment();
+
+ for (int i = 0; i < segments.size(); i++) {
+ if (i > 0) {
+ fullPath.append('.');
+ }
+ String segment = segments.get(i).getText();
+ // Remove quotes if present
+ if (segment.startsWith("\"") && segment.endsWith("\"")) {
+ segment = segment.substring(1, segment.length() - 1);
+ }
+ fullPath.append(segment);
}
- fieldNames.add(fieldName);
+
+ String fieldPath = fullPath.toString();
+ fieldNames.add(fieldPath);
// Set current field context before visiting search value
String previousFieldName = currentFieldName;
- currentFieldName = fieldName;
+ currentFieldName = fieldPath;
try {
if (ctx.searchValue() == null) {
diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift
index b741a123f43..3c69243d537 100644
--- a/gensrc/thrift/Exprs.thrift
+++ b/gensrc/thrift/Exprs.thrift
@@ -246,8 +246,11 @@ struct TSearchClause {
}
struct TSearchFieldBinding {
- 1: required string field_name // Field name from DSL
+ 1: required string field_name // Field name from DSL (may include path
like "field.subcolumn")
2: required i32 slot_index // Index in the slot reference arguments
+ 3: optional string parent_field_name // Parent field name for variant
subcolumns
+ 4: optional string subcolumn_path // Subcolumn path for variant fields
(e.g., "subcolumn" or "sub1.sub2")
+ 5: optional bool is_variant_subcolumn // True if this is a variant
subcolumn access
}
struct TSearchParam {
diff --git a/regression-test/data/variant_p0/test_variant_search_subcolumn.out
b/regression-test/data/variant_p0/test_variant_search_subcolumn.out
new file mode 100644
index 00000000000..f2e9d8eec79
--- /dev/null
+++ b/regression-test/data/variant_p0/test_variant_search_subcolumn.out
@@ -0,0 +1,32 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !test1 --
+1
+3
+
+-- !test2 --
+1
+
+-- !test3 --
+1
+
+-- !test4 --
+2
+4
+
+-- !test5 --
+0
+
+-- !test6 --
+6
+
+-- !test7 --
+1
+3
+5
+
+-- !test8 --
+7
+
+-- !test10 --
+7
+
diff --git
a/regression-test/suites/variant_p0/test_variant_search_subcolumn.groovy
b/regression-test/suites/variant_p0/test_variant_search_subcolumn.groovy
new file mode 100644
index 00000000000..0571dcc2f0b
--- /dev/null
+++ b/regression-test/suites/variant_p0/test_variant_search_subcolumn.groovy
@@ -0,0 +1,152 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_variant_search_subcolumn") {
+ def table_name = "test_variant_search_subcolumn"
+
+ sql "DROP TABLE IF EXISTS ${table_name}"
+
+ // Create table with variant column and inverted index
+ sql """
+ CREATE TABLE ${table_name} (
+ id BIGINT,
+ overflowpropertiesfulltext
VARIANT<PROPERTIES("variant_max_subcolumns_count"="0")>,
+ INDEX idx_overflow (overflowpropertiesfulltext) USING INVERTED
PROPERTIES (
+ "parser" = "unicode",
+ "lower_case" = "true",
+ "support_phrase" = "true"
+ )
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 4
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "inverted_index_storage_format" = "V2"
+ )
+ """
+
+ // Insert test data
+ sql """
+ INSERT INTO ${table_name} VALUES
+ (1, '{"string4": "0ff dpr test"}'),
+ (2, '{"string4": "hello world"}'),
+ (3, '{"string4": "0ff test"}'),
+ (4, '{"string5": "0ff dpr"}'),
+ (5, '{"string4": "dpr only"}'),
+ (6, '{"nested": {"field": "0ff dpr"}}')
+ """
+
+ // Wait for data to be flushed and index to be built
+ Thread.sleep(10000)
+
+ // Test 1: Single term search on variant subcolumn
+ logger.info("Test 1: Single term search on variant subcolumn")
+ qt_test1 """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true,
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+ WHERE search('overflowpropertiesfulltext.string4:0ff')
+ ORDER BY id
+ """
+ // Expected: 1, 3
+
+ // Test 2: AND query on same variant subcolumn
+ logger.info("Test 2: AND query on same variant subcolumn")
+ qt_test2 """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true,
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+ WHERE search('overflowpropertiesfulltext.string4:0ff AND
overflowpropertiesfulltext.string4:dpr')
+ ORDER BY id
+ """
+ // Expected: 1
+
+ // Test 3: ALL search on variant subcolumn
+ logger.info("Test 3: ALL search on variant subcolumn")
+ qt_test3 """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true,
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+ WHERE search('overflowpropertiesfulltext.string4:ALL(0ff dpr)')
+ ORDER BY id
+ """
+ // Expected: 1
+
+ // Test 4: Search on different variant subcolumns (OR)
+ logger.info("Test 4: Search on different variant subcolumns")
+ qt_test4 """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true,
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+ WHERE search('overflowpropertiesfulltext.string4:hello OR
overflowpropertiesfulltext.string5:dpr')
+ ORDER BY id
+ """
+ // Expected: 2, 4
+
+ // Test 5: Search on non-existent subcolumn
+ logger.info("Test 5: Search on non-existent subcolumn")
+ qt_test5 """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true,
default_variant_max_subcolumns_count=0)*/ COUNT(*) FROM ${table_name}
+ WHERE search('overflowpropertiesfulltext.nonexistent:value')
+ """
+ // Expected: 0
+
+ // Test 6: Nested variant path
+ logger.info("Test 6: Nested variant path")
+ qt_test6 """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true,
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+ WHERE search('overflowpropertiesfulltext.nested.field:0ff')
+ ORDER BY id
+ """
+ // Expected: 6
+
+ // Test 7: Complex query with variant subcolumns
+ logger.info("Test 7: Complex query with variant subcolumns")
+ qt_test7 """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true,
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+ WHERE search('(overflowpropertiesfulltext.string4:0ff OR
overflowpropertiesfulltext.string4:dpr) AND NOT
overflowpropertiesfulltext.string4:hello')
+ ORDER BY id
+ """
+ // Expected: 1, 3, 5
+
+ // Test 8: Quoted field names with special characters
+ logger.info("Test 8: Quoted field names")
+ sql """
+ INSERT INTO ${table_name} VALUES
+ (7, '{"field-name": "test value"}')
+ """
+ Thread.sleep(5000)
+
+ qt_test8 """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true,
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+ WHERE search('overflowpropertiesfulltext.field-name:test')
+ ORDER BY id
+ """
+ // Expected: 7
+
+ // Test 9: Wildcard search on variant subcolumn
+ //logger.info("Test 9: Wildcard search on variant subcolumn")
+ //qt_test9 """
+ // SELECT /*+SET_VAR(enable_common_expr_pushdown=true,
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+ // WHERE search('overflowpropertiesfulltext.string4:0*')
+ // ORDER BY id
+ //"""
+ // Expected: 1, 3
+
+ // Test 10: Verify normal field search still works
+ logger.info("Test 10: Verify normal field search still works (if id has
index)")
+ // This test verifies we didn't break normal field search
+ qt_test10 """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true,
default_variant_max_subcolumns_count=0)*/ COUNT(*) FROM ${table_name}
+ WHERE id > 0
+ """
+ // Expected: 7
+
+ logger.info("Variant subcolumn search tests completed successfully!")
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]