This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new a89097f0efe branch-4.0: [feature](search) add variant subcolumn 
suppport for search function #56718 (#57049)
a89097f0efe is described below

commit a89097f0efe4f2a0fd1872c93388d84b06ffcbc4
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Oct 17 15:01:01 2025 +0800

    branch-4.0: [feature](search) add variant subcolumn suppport for search 
function #56718 (#57049)
    
    Cherry-picked from #56718
    
    Co-authored-by: Jack <[email protected]>
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  10 +-
 be/src/vec/exprs/vsearch.cpp                       |  87 ++++++------
 be/src/vec/exprs/vsearch.h                         |   2 +
 be/src/vec/functions/function_search.cpp           |  61 +++++++--
 be/src/vec/functions/function_search.h             |  20 ++-
 .../org/apache/doris/nereids/search/SearchLexer.g4 |   1 +
 .../apache/doris/nereids/search/SearchParser.g4    |   7 +-
 .../org/apache/doris/analysis/SearchPredicate.java |  24 +++-
 .../doris/nereids/jobs/executor/Rewriter.java      |   7 +-
 .../rules/rewrite/RewriteSearchToSlots.java        |  61 +++++++--
 .../trees/expressions/SearchExpression.java        |   8 +-
 .../functions/scalar/SearchDslParser.java          |  28 +++-
 gensrc/thrift/Exprs.thrift                         |   5 +-
 .../variant_p0/test_variant_search_subcolumn.out   |  32 +++++
 .../test_variant_search_subcolumn.groovy           | 152 +++++++++++++++++++++
 15 files changed, 429 insertions(+), 76 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e189156d68e..b60cfc5e1f6 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -605,10 +605,16 @@ Status 
SegmentIterator::_get_row_ranges_by_column_conditions() {
     {
         if (_opts.runtime_state &&
             _opts.runtime_state->query_options().enable_inverted_index_query &&
-            has_index_in_iterators()) {
+            (has_index_in_iterators() || 
!_common_expr_ctxs_push_down.empty())) {
             SCOPED_RAW_TIMER(&_opts.stats->inverted_index_filter_timer);
             size_t input_rows = _row_bitmap.cardinality();
-            RETURN_IF_ERROR(_apply_inverted_index());
+            // Only apply column-level inverted index if we have iterators
+            if (has_index_in_iterators()) {
+                RETURN_IF_ERROR(_apply_inverted_index());
+            }
+            // Always apply expr-level index (e.g., search expressions) if we 
have common_expr_pushdown
+            // This allows search expressions with variant subcolumns to be 
evaluated even when
+            // the segment doesn't have all subcolumns
             RETURN_IF_ERROR(_apply_index_expr());
             for (auto it = _common_expr_ctxs_push_down.begin();
                  it != _common_expr_ctxs_push_down.end();) {
diff --git a/be/src/vec/exprs/vsearch.cpp b/be/src/vec/exprs/vsearch.cpp
index fefe61ddb5f..e32209d614b 100644
--- a/be/src/vec/exprs/vsearch.cpp
+++ b/be/src/vec/exprs/vsearch.cpp
@@ -48,39 +48,63 @@ Status collect_search_inputs(const VSearchExpr& expr, 
VExprContext* context,
 
     auto index_context = context->get_inverted_index_context();
     if (index_context == nullptr) {
-        return Status::OK();
+        LOG(WARNING) << "collect_search_inputs: No inverted index context 
available";
+        return Status::InternalError("No inverted index context available");
     }
 
+    // Get field bindings for variant subcolumn support
+    const auto& search_param = expr.get_search_param();
+    const auto& field_bindings = search_param.field_bindings;
+
+    int child_index = 0; // Index for iterating through children
     for (const auto& child : expr.children()) {
         if (child->is_slot_ref()) {
             auto* column_slot_ref = assert_cast<VSlotRef*>(child.get());
             int column_id = column_slot_ref->column_id();
             auto* iterator = 
index_context->get_inverted_index_iterator_by_column_id(column_id);
-            if (iterator == nullptr) {
-                continue;
+
+            // Determine the field_name from field_bindings (for variant 
subcolumns)
+            // field_bindings and children should have the same order
+            std::string field_name;
+            if (child_index < field_bindings.size()) {
+                // Use field_name from binding (may include "parent.subcolumn" 
for variant)
+                field_name = field_bindings[child_index].field_name;
+            } else {
+                // Fallback to column_name if binding not found
+                field_name = column_slot_ref->column_name();
             }
 
-            const auto* storage_name_type =
-                    
index_context->get_storage_name_and_type_by_column_id(column_id);
-            if (storage_name_type == nullptr) {
-                auto err_msg = fmt::format(
-                        "storage_name_type cannot be found for column {} while 
in {} evaluate",
-                        column_id, expr.expr_name());
-                LOG(ERROR) << err_msg;
-                return Status::InternalError(err_msg);
+            // Only collect fields that have iterators (materialized columns 
with indexes)
+            if (iterator != nullptr) {
+                const auto* storage_name_type =
+                        
index_context->get_storage_name_and_type_by_column_id(column_id);
+                if (storage_name_type == nullptr) {
+                    return Status::InternalError("storage_name_type not found 
for column {} in {}",
+                                                 column_id, expr.expr_name());
+                }
+
+                bundle->iterators.emplace(field_name, iterator);
+                bundle->field_types.emplace(field_name, *storage_name_type);
+                bundle->column_ids.emplace_back(column_id);
             }
 
-            auto column_name = column_slot_ref->column_name();
-            bundle->iterators.emplace(column_name, iterator);
-            bundle->field_types.emplace(column_name, *storage_name_type);
-            bundle->column_ids.emplace_back(column_id);
+            child_index++;
         } else if (child->is_literal()) {
             auto* literal = assert_cast<VLiteral*>(child.get());
             bundle->literal_args.emplace_back(literal->get_column_ptr(), 
literal->get_data_type(),
                                               literal->expr_name());
         } else {
-            LOG(WARNING) << "VSearchExpr: Unsupported child node type 
encountered";
-            return Status::InvalidArgument("search expression child type 
unsupported");
+            // Check if this is ElementAt expression (for variant subcolumn 
access)
+            if (child->expr_name() == "element_at" && child_index < 
field_bindings.size() &&
+                field_bindings[child_index].__isset.is_variant_subcolumn &&
+                field_bindings[child_index].is_variant_subcolumn) {
+                // Variant subcolumn not materialized - skip, will create 
empty BitmapQuery in function_search
+                child_index++;
+                continue;
+            }
+
+            // Not a supported child type
+            return Status::InvalidArgument("Unsupported child node type: {}", 
child->expr_name());
         }
     }
 
@@ -94,16 +118,6 @@ VSearchExpr::VSearchExpr(const TExprNode& node) : 
VExpr(node) {
         _search_param = node.search_param;
         _original_dsl = _search_param.original_dsl;
     }
-
-    LOG(INFO) << "VSearchExpr constructor: dsl='" << _original_dsl
-              << "', num_children=" << node.num_children
-              << ", has_search_param=" << node.__isset.search_param
-              << ", children_size=" << _children.size();
-
-    for (size_t i = 0; i < _children.size(); i++) {
-        LOG(INFO) << "VSearchExpr constructor: child[" << i
-                  << "] expr_name=" << _children[i]->expr_name();
-    }
 }
 
 const std::string& VSearchExpr::expr_name() const {
@@ -120,7 +134,7 @@ Status VSearchExpr::execute(VExprContext* context, Block* 
block, int* result_col
 }
 
 Status VSearchExpr::evaluate_inverted_index(VExprContext* context, uint32_t 
segment_num_rows) {
-    LOG(INFO) << "VSearchExpr::evaluate_inverted_index called with DSL: " << 
_original_dsl;
+    LOG(INFO) << "VSearchExpr::evaluate_inverted_index called, DSL: " << 
_search_param.original_dsl;
 
     if (_search_param.original_dsl.empty()) {
         return Status::InvalidArgument("search DSL is empty");
@@ -135,8 +149,14 @@ Status VSearchExpr::evaluate_inverted_index(VExprContext* 
context, uint32_t segm
     SearchInputBundle bundle;
     RETURN_IF_ERROR(collect_search_inputs(*this, context, &bundle));
 
+    VLOG_DEBUG << "VSearchExpr: bundle.iterators.size()=" << 
bundle.iterators.size();
+
     if (bundle.iterators.empty()) {
-        LOG(WARNING) << "VSearchExpr: No indexed columns available for 
evaluation";
+        LOG(WARNING) << "VSearchExpr: No indexed columns available for 
evaluation, DSL: "
+                     << _original_dsl;
+        auto empty_bitmap = 
InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
+                                                      
std::make_shared<roaring::Roaring>());
+        index_context->set_inverted_index_result_for_expr(this, 
std::move(empty_bitmap));
         return Status::OK();
     }
 
@@ -155,15 +175,6 @@ Status VSearchExpr::evaluate_inverted_index(VExprContext* 
context, uint32_t segm
         index_context->set_true_for_inverted_index_status(this, column_id);
     }
 
-    const auto& data_bitmap = result_bitmap.get_data_bitmap();
-    const uint64_t match_count = data_bitmap ? data_bitmap->cardinality() : 0;
-    if (match_count > 0) {
-        LOG(INFO) << "VSearchExpr: Found " << match_count
-                  << " matching rows for DSL: " << _search_param.original_dsl;
-    } else {
-        LOG(INFO) << "VSearchExpr: No matches found for DSL: " << 
_search_param.original_dsl;
-    }
-
     return Status::OK();
 }
 
diff --git a/be/src/vec/exprs/vsearch.h b/be/src/vec/exprs/vsearch.h
index 3fa8399bc77..d9b5e40985c 100644
--- a/be/src/vec/exprs/vsearch.h
+++ b/be/src/vec/exprs/vsearch.h
@@ -39,6 +39,8 @@ public:
 
     bool can_push_down_to_index() const override { return true; }
 
+    const TSearchParam& get_search_param() const { return _search_param; }
+
 private:
     TSearchParam _search_param;
     std::string _original_dsl;
diff --git a/be/src/vec/functions/function_search.cpp 
b/be/src/vec/functions/function_search.cpp
index 45a6f4c1acf..d53024a0dde 100644
--- a/be/src/vec/functions/function_search.cpp
+++ b/be/src/vec/functions/function_search.cpp
@@ -35,6 +35,7 @@
 #include "olap/rowset/segment_v2/index_file_reader.h"
 #include "olap/rowset/segment_v2/index_query_context.h"
 #include "olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h"
+#include 
"olap/rowset/segment_v2/inverted_index/query_v2/bitmap_query/bitmap_query.h"
 #include 
"olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_query.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/operator.h"
 #include 
"olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_query.h"
@@ -52,8 +53,21 @@ Status FieldReaderResolver::resolve(const std::string& 
field_name,
                                     InvertedIndexQueryType query_type,
                                     FieldReaderBinding* binding) {
     DCHECK(binding != nullptr);
+
+    // Check if this is a variant subcolumn
+    bool is_variant_sub = is_variant_subcolumn(field_name);
+
     auto data_it = _data_type_with_names.find(field_name);
     if (data_it == _data_type_with_names.end()) {
+        // For variant subcolumns, not finding the index is normal (the 
subcolumn may not exist in this segment)
+        // Return OK but with null binding to signal "no match"
+        if (is_variant_sub) {
+            VLOG_DEBUG << "Variant subcolumn '" << field_name
+                       << "' not found in this segment, treating as no match";
+            *binding = FieldReaderBinding();
+            return Status::OK();
+        }
+        // For normal fields, this is an error
         return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
                 "field '{}' not found in inverted index metadata", field_name);
     }
@@ -69,6 +83,13 @@ Status FieldReaderResolver::resolve(const std::string& 
field_name,
 
     auto iterator_it = _iterators.find(field_name);
     if (iterator_it == _iterators.end() || iterator_it->second == nullptr) {
+        // For variant subcolumns, not finding the iterator is normal
+        if (is_variant_sub) {
+            VLOG_DEBUG << "Variant subcolumn '" << field_name
+                       << "' iterator not found in this segment, treating as 
no match";
+            *binding = FieldReaderBinding();
+            return Status::OK();
+        }
         return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
                 "iterator not found for field '{}'", field_name);
     }
@@ -171,12 +192,11 @@ Status 
FunctionSearch::evaluate_inverted_index_with_search_param(
                 data_type_with_names,
         std::unordered_map<std::string, IndexIterator*> iterators, uint32_t 
num_rows,
         InvertedIndexResultBitmap& bitmap_result) const {
-    VLOG_DEBUG << "search: Processing DSL '" << search_param.original_dsl << 
"' with "
-               << data_type_with_names.size() << " indexed columns and " << 
iterators.size()
-               << " iterators";
-
     if (iterators.empty() || data_type_with_names.empty()) {
-        LOG(INFO) << "No indexed columns or iterators available, returning 
empty result";
+        LOG(INFO) << "No indexed columns or iterators available, returning 
empty result, dsl:"
+                  << search_param.original_dsl;
+        bitmap_result = 
InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
+                                                  
std::make_shared<roaring::Roaring>());
         return Status::OK();
     }
 
@@ -184,14 +204,19 @@ Status 
FunctionSearch::evaluate_inverted_index_with_search_param(
     context->collection_statistics = std::make_shared<CollectionStatistics>();
     context->collection_similarity = std::make_shared<CollectionSimilarity>();
 
-    FieldReaderResolver resolver(data_type_with_names, iterators, context);
+    // Pass field_bindings to resolver for variant subcolumn detection
+    FieldReaderResolver resolver(data_type_with_names, iterators, context,
+                                 search_param.field_bindings);
 
     query_v2::QueryPtr root_query;
     std::string root_binding_key;
     RETURN_IF_ERROR(build_query_recursive(*this, search_param.root, context, 
resolver, &root_query,
                                           &root_binding_key));
     if (root_query == nullptr) {
-        LOG(INFO) << "search: Query tree resolved to empty query";
+        LOG(INFO) << "search: Query tree resolved to empty query, dsl:"
+                  << search_param.original_dsl;
+        bitmap_result = 
InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
+                                                  
std::make_shared<roaring::Roaring>());
         return Status::OK();
     }
 
@@ -393,9 +418,12 @@ Status FunctionSearch::build_query_recursive(const 
FunctionSearch& function,
                 std::string child_binding_key;
                 RETURN_IF_ERROR(build_query_recursive(function, child_clause, 
context, resolver,
                                                       &child_query, 
&child_binding_key));
-                if (child_query != nullptr) {
-                    builder.add(child_query, std::move(child_binding_key));
-                }
+                // Add all children including empty BitmapQuery
+                // BooleanQuery will handle the logic:
+                // - AND with empty bitmap → result is empty
+                // - OR with empty bitmap → empty bitmap is ignored by OR logic
+                // - NOT with empty bitmap → NOT(empty) = all rows (handled by 
BooleanQuery)
+                builder.add(child_query, std::move(child_binding_key));
             }
         }
 
@@ -429,6 +457,19 @@ Status FunctionSearch::build_leaf_query(const 
FunctionSearch& function, const TS
 
     FieldReaderBinding binding;
     RETURN_IF_ERROR(resolver.resolve(field_name, query_type, &binding));
+
+    // Check if binding is empty (variant subcolumn not found in this segment)
+    if (binding.lucene_reader == nullptr) {
+        VLOG_DEBUG << "build_leaf_query: Variant subcolumn '" << field_name
+                   << "' has no index in this segment, creating empty 
BitmapQuery (no matches)";
+        // Variant subcolumn doesn't exist - create empty BitmapQuery (no 
matches)
+        *out = std::make_shared<query_v2::BitmapQuery>(roaring::Roaring());
+        if (binding_key) {
+            binding_key->clear();
+        }
+        return Status::OK();
+    }
+
     if (binding_key) {
         *binding_key = binding.binding_key;
     }
diff --git a/be/src/vec/functions/function_search.h 
b/be/src/vec/functions/function_search.h
index 75977aa2440..45cc398915c 100644
--- a/be/src/vec/functions/function_search.h
+++ b/be/src/vec/functions/function_search.h
@@ -57,14 +57,28 @@ public:
             const std::unordered_map<std::string, 
vectorized::IndexFieldNameAndTypePair>&
                     data_type_with_names,
             const std::unordered_map<std::string, IndexIterator*>& iterators,
-            std::shared_ptr<IndexQueryContext> context)
+            std::shared_ptr<IndexQueryContext> context,
+            const std::vector<TSearchFieldBinding>& field_bindings = {})
             : _data_type_with_names(data_type_with_names),
               _iterators(iterators),
-              _context(std::move(context)) {}
+              _context(std::move(context)),
+              _field_bindings(field_bindings) {
+        // Build a lookup map for quick variant subcolumn checks
+        for (const auto& binding : _field_bindings) {
+            if (binding.__isset.is_variant_subcolumn && 
binding.is_variant_subcolumn) {
+                _variant_subcolumn_fields.insert(binding.field_name);
+            }
+        }
+    }
 
     Status resolve(const std::string& field_name, InvertedIndexQueryType 
query_type,
                    FieldReaderBinding* binding);
 
+    // Check if a field is a variant subcolumn
+    bool is_variant_subcolumn(const std::string& field_name) const {
+        return _variant_subcolumn_fields.count(field_name) > 0;
+    }
+
     const std::vector<std::shared_ptr<lucene::index::IndexReader>>& readers() 
const {
         return _readers;
     }
@@ -94,6 +108,8 @@ private:
             _data_type_with_names;
     const std::unordered_map<std::string, IndexIterator*>& _iterators;
     std::shared_ptr<IndexQueryContext> _context;
+    std::vector<TSearchFieldBinding> _field_bindings;
+    std::unordered_set<std::string> _variant_subcolumn_fields;
     std::unordered_map<std::string, FieldReaderBinding> _cache;
     std::vector<std::shared_ptr<lucene::index::IndexReader>> _readers;
     std::unordered_map<std::string, 
std::shared_ptr<lucene::index::IndexReader>> _binding_readers;
diff --git 
a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4 
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
index 812aaf4a7ef..4dab0af2ed2 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchLexer.g4
@@ -48,6 +48,7 @@ NOT : 'NOT' | 'not' | '!' ;
 LPAREN   : '(' ;
 RPAREN   : ')' ;
 COLON    : ':' ;
+DOT      : '.' ;  // Support for variant subcolumn access (e.g., 
field.subcolumn)
 
 QUOTED   : '"' QUOTED_CHAR* '"' ;
 TERM     : TERM_START_CHAR TERM_CHAR* ;
diff --git 
a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4 
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
index a2a357450bf..0b3b9734883 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
@@ -25,8 +25,11 @@ orClause   : andClause (OR andClause)* ;
 andClause  : notClause (AND notClause)* ;
 notClause  : NOT atomClause | atomClause ;
 atomClause : LPAREN clause RPAREN | fieldQuery ;
-fieldQuery : fieldName COLON searchValue ;
-fieldName  : TERM | QUOTED ;
+
+// Support for variant subcolumn paths (e.g., field.subcolumn, field.sub1.sub2)
+fieldQuery : fieldPath COLON searchValue ;
+fieldPath  : fieldSegment (DOT fieldSegment)* ;
+fieldSegment : TERM | QUOTED ;
 
 searchValue
     : TERM
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
index e8213157991..659e3acee3a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
@@ -146,7 +146,29 @@ public class SearchPredicate extends Predicate {
         for (int i = 0; i < qsPlan.fieldBindings.size(); i++) {
             SearchDslParser.QsFieldBinding binding = 
qsPlan.fieldBindings.get(i);
             TSearchFieldBinding thriftBinding = new TSearchFieldBinding();
-            thriftBinding.setFieldName(binding.fieldName);
+
+            String fieldPath = binding.fieldName;
+            thriftBinding.setFieldName(fieldPath);
+
+            // Check if this is a variant subcolumn (contains dot)
+            if (fieldPath.contains(".")) {
+                // Parse variant subcolumn path
+                int firstDotPos = fieldPath.indexOf('.');
+                String parentField = fieldPath.substring(0, firstDotPos);
+                String subcolumnPath = fieldPath.substring(firstDotPos + 1);
+
+                thriftBinding.setIsVariantSubcolumn(true);
+                thriftBinding.setParentFieldName(parentField);
+                thriftBinding.setSubcolumnPath(subcolumnPath);
+
+                LOG.info("buildThriftParam: variant subcolumn field='{}', 
parent='{}', subcolumn='{}'",
+                        fieldPath, parentField, subcolumnPath);
+            } else {
+                thriftBinding.setIsVariantSubcolumn(false);
+            }
+
+            // Set slot index - this is the index in the children array, not 
the slotId
+            thriftBinding.setSlotIndex(i);
 
             if (i < this.children.size() && this.children.get(i) instanceof 
SlotRef) {
                 SlotRef slotRef = (SlotRef) this.children.get(i);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index f3f998bce89..7eed68241f5 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -799,7 +799,6 @@ public class Rewriter extends AbstractBatchJobExecutor {
                         custom(RuleType.ADJUST_CONJUNCTS_RETURN_TYPE, 
AdjustConjunctsReturnType::new),
                         bottomUp(
                                 new 
ExpressionRewrite(CheckLegalityAfterRewrite.INSTANCE),
-                                new RewriteSearchToSlots(),
                                 new CheckMatchExpression(),
                                 new CheckMultiDistinct(),
                                 new CheckRestorePartition(),
@@ -899,6 +898,12 @@ public class Rewriter extends AbstractBatchJobExecutor {
                 rewriteJobs.addAll(jobs(topic("split multi distinct",
                         custom(RuleType.DISTINCT_AGG_STRATEGY_SELECTOR, () -> 
DistinctAggStrategySelector.INSTANCE))));
 
+                // Rewrite search function before VariantSubPathPruning
+                // so that ElementAt expressions from search can be processed
+                rewriteJobs.addAll(jobs(
+                        bottomUp(new RewriteSearchToSlots())
+                ));
+
                 if (needSubPathPushDown) {
                     rewriteJobs.addAll(jobs(
                             topic("variant element_at push down",
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
index d8a7d47522d..3114fff0593 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java
@@ -23,8 +23,10 @@ import org.apache.doris.nereids.rules.RuleType;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.SearchExpression;
 import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt;
 import org.apache.doris.nereids.trees.expressions.functions.scalar.Search;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.SearchDslParser;
+import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
 import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
@@ -101,16 +103,56 @@ public class RewriteSearchToSlots extends 
OneRewriteRuleFactory {
             // Create slot reference children from field bindings
             List<Expression> slotChildren = new ArrayList<>();
             for (SearchDslParser.QsFieldBinding binding : 
qsPlan.fieldBindings) {
-                Slot slot = findSlotByName(binding.fieldName, scan);
-                if (slot == null) {
-                    throw new AnalysisException(String.format(
-                            "Field '%s' not found in table for search: %s",
-                            binding.fieldName, search.getDslString()));
+                String originalFieldName = binding.fieldName;
+                Expression childExpr;
+                String normalizedFieldName;
+
+                // Check if this is a variant subcolumn (contains dot)
+                if (originalFieldName.contains(".")) {
+                    int firstDotPos = originalFieldName.indexOf('.');
+                    String parentFieldName = originalFieldName.substring(0, 
firstDotPos);
+                    String subcolumnPath = 
originalFieldName.substring(firstDotPos + 1);
+
+                    // Find parent slot
+                    Slot parentSlot = findSlotByName(parentFieldName, scan);
+                    if (parentSlot == null) {
+                        throw new AnalysisException(String.format(
+                                "Parent field '%s' not found in table for 
search: %s",
+                                parentFieldName, search.getDslString()));
+                    }
+
+                    // Verify it's a variant type
+                    if (!parentSlot.getDataType().isVariantType()) {
+                        throw new AnalysisException(String.format(
+                                "Field '%s' is not VARIANT type for subcolumn 
access: %s",
+                                parentFieldName, search.getDslString()));
+                    }
+
+                    // Create ElementAt expression for variant subcolumn
+                    // This will be converted to an extracted column slot by 
VariantSubPathPruning rule
+                    // If the subcolumn doesn't exist, ElementAt will remain 
and BE will handle it gracefully
+                    childExpr = new ElementAt(parentSlot, new 
StringLiteral(subcolumnPath));
+                    normalizedFieldName = originalFieldName; // Keep full path 
for field binding
+
+                    LOG.info(
+                            "Created ElementAt expression for variant 
subcolumn: parent='{}', "
+                                    + "subcolumn='{}', field_name='{}'",
+                            parentFieldName, subcolumnPath, 
normalizedFieldName);
+                } else {
+                    // Normal field - find slot directly
+                    Slot slot = findSlotByName(originalFieldName, scan);
+                    if (slot == null) {
+                        throw new AnalysisException(String.format(
+                                "Field '%s' not found in table for search: %s",
+                                originalFieldName, search.getDslString()));
+                    }
+                    childExpr = slot;
+                    normalizedFieldName = slot.getName();
                 }
-                String normalized = slot.getName();
-                normalizedFields.put(binding.fieldName, normalized);
-                binding.fieldName = normalized;
-                slotChildren.add(slot);
+
+                normalizedFields.put(originalFieldName, normalizedFieldName);
+                binding.fieldName = normalizedFieldName;
+                slotChildren.add(childExpr);
             }
 
             LOG.info("Rewriting search function: dsl='{}' with {} slot 
children",
@@ -127,6 +169,7 @@ public class RewriteSearchToSlots extends 
OneRewriteRuleFactory {
     }
 
     private Slot findSlotByName(String fieldName, LogicalOlapScan scan) {
+        // Direct match only - variant subcolumns are handled by caller
         for (Slot slot : scan.getOutput()) {
             if (slot.getName().equalsIgnoreCase(fieldName)) {
                 return slot;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SearchExpression.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SearchExpression.java
index 6caca420c9f..a811d39552b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SearchExpression.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/SearchExpression.java
@@ -18,6 +18,7 @@
 package org.apache.doris.nereids.trees.expressions;
 
 import org.apache.doris.nereids.exceptions.UnboundException;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.SearchDslParser;
 import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
 import org.apache.doris.nereids.types.BooleanType;
@@ -71,10 +72,11 @@ public class SearchExpression extends Expression {
 
     @Override
     public SearchExpression withChildren(List<Expression> children) {
-        // Validate that all children are SlotReference
+        // Validate that all children are SlotReference or ElementAt (for 
variant subcolumns)
         for (Expression child : children) {
-            if (!(child instanceof SlotReference)) {
-                throw new IllegalArgumentException("SearchExpression children 
must be SlotReference instances");
+            if (!(child instanceof SlotReference || child instanceof 
ElementAt)) {
+                throw new IllegalArgumentException(
+                        "SearchExpression children must be SlotReference or 
ElementAt instances");
             }
         }
         return new SearchExpression(dslString, qsPlan, children);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index a913c88f3ad..e9c79acf9dc 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -234,18 +234,32 @@ public class SearchDslParser {
 
         @Override
         public QsNode visitFieldQuery(SearchParser.FieldQueryContext ctx) {
-            if (ctx.fieldName() == null) {
-                throw new RuntimeException("Invalid field query: missing field 
name");
+            if (ctx.fieldPath() == null) {
+                throw new RuntimeException("Invalid field query: missing field 
path");
             }
-            String fieldName = ctx.fieldName().getText();
-            if (fieldName.startsWith("\"") && fieldName.endsWith("\"")) {
-                fieldName = fieldName.substring(1, fieldName.length() - 1);
+
+            // Build complete field path from segments (support 
field.subcolumn syntax)
+            StringBuilder fullPath = new StringBuilder();
+            List<SearchParser.FieldSegmentContext> segments = 
ctx.fieldPath().fieldSegment();
+
+            for (int i = 0; i < segments.size(); i++) {
+                if (i > 0) {
+                    fullPath.append('.');
+                }
+                String segment = segments.get(i).getText();
+                // Remove quotes if present
+                if (segment.startsWith("\"") && segment.endsWith("\"")) {
+                    segment = segment.substring(1, segment.length() - 1);
+                }
+                fullPath.append(segment);
             }
-            fieldNames.add(fieldName);
+
+            String fieldPath = fullPath.toString();
+            fieldNames.add(fieldPath);
 
             // Set current field context before visiting search value
             String previousFieldName = currentFieldName;
-            currentFieldName = fieldName;
+            currentFieldName = fieldPath;
 
             try {
                 if (ctx.searchValue() == null) {
diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift
index b741a123f43..3c69243d537 100644
--- a/gensrc/thrift/Exprs.thrift
+++ b/gensrc/thrift/Exprs.thrift
@@ -246,8 +246,11 @@ struct TSearchClause {
 }
 
 struct TSearchFieldBinding {
-  1: required string field_name   // Field name from DSL
+  1: required string field_name   // Field name from DSL (may include path 
like "field.subcolumn")
   2: required i32 slot_index      // Index in the slot reference arguments
+  3: optional string parent_field_name    // Parent field name for variant 
subcolumns
+  4: optional string subcolumn_path       // Subcolumn path for variant fields 
(e.g., "subcolumn" or "sub1.sub2")
+  5: optional bool is_variant_subcolumn   // True if this is a variant 
subcolumn access
 }
 
 struct TSearchParam {
diff --git a/regression-test/data/variant_p0/test_variant_search_subcolumn.out 
b/regression-test/data/variant_p0/test_variant_search_subcolumn.out
new file mode 100644
index 00000000000..f2e9d8eec79
--- /dev/null
+++ b/regression-test/data/variant_p0/test_variant_search_subcolumn.out
@@ -0,0 +1,32 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !test1 --
+1
+3
+
+-- !test2 --
+1
+
+-- !test3 --
+1
+
+-- !test4 --
+2
+4
+
+-- !test5 --
+0
+
+-- !test6 --
+6
+
+-- !test7 --
+1
+3
+5
+
+-- !test8 --
+7
+
+-- !test10 --
+7
+
diff --git 
a/regression-test/suites/variant_p0/test_variant_search_subcolumn.groovy 
b/regression-test/suites/variant_p0/test_variant_search_subcolumn.groovy
new file mode 100644
index 00000000000..0571dcc2f0b
--- /dev/null
+++ b/regression-test/suites/variant_p0/test_variant_search_subcolumn.groovy
@@ -0,0 +1,152 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_variant_search_subcolumn") {
+    def table_name = "test_variant_search_subcolumn"
+
+    sql "DROP TABLE IF EXISTS ${table_name}"
+
+    // Create table with variant column and inverted index
+    sql """
+        CREATE TABLE ${table_name} (
+            id BIGINT,
+            overflowpropertiesfulltext 
VARIANT<PROPERTIES("variant_max_subcolumns_count"="0")>,
+            INDEX idx_overflow (overflowpropertiesfulltext) USING INVERTED 
PROPERTIES (
+                "parser" = "unicode",
+                "lower_case" = "true",
+                "support_phrase" = "true"
+            )
+        ) ENGINE=OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 4
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "inverted_index_storage_format" = "V2"
+        )
+    """
+
+    // Insert test data
+    sql """
+        INSERT INTO ${table_name} VALUES
+        (1, '{"string4": "0ff dpr test"}'),
+        (2, '{"string4": "hello world"}'),
+        (3, '{"string4": "0ff test"}'),
+        (4, '{"string5": "0ff dpr"}'),
+        (5, '{"string4": "dpr only"}'),
+        (6, '{"nested": {"field": "0ff dpr"}}')
+    """
+
+    // Wait for data to be flushed and index to be built
+    Thread.sleep(10000)
+
+    // Test 1: Single term search on variant subcolumn
+    logger.info("Test 1: Single term search on variant subcolumn")
+    qt_test1 """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true, 
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+        WHERE search('overflowpropertiesfulltext.string4:0ff')
+        ORDER BY id
+    """
+    // Expected: 1, 3
+
+    // Test 2: AND query on same variant subcolumn
+    logger.info("Test 2: AND query on same variant subcolumn")
+    qt_test2 """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true, 
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+        WHERE search('overflowpropertiesfulltext.string4:0ff AND 
overflowpropertiesfulltext.string4:dpr')
+        ORDER BY id
+    """
+    // Expected: 1
+
+    // Test 3: ALL search on variant subcolumn
+    logger.info("Test 3: ALL search on variant subcolumn")
+    qt_test3 """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true, 
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+        WHERE search('overflowpropertiesfulltext.string4:ALL(0ff dpr)')
+        ORDER BY id
+    """
+    // Expected: 1
+
+    // Test 4: Search on different variant subcolumns (OR)
+    logger.info("Test 4: Search on different variant subcolumns")
+    qt_test4 """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true, 
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+        WHERE search('overflowpropertiesfulltext.string4:hello OR 
overflowpropertiesfulltext.string5:dpr')
+        ORDER BY id
+    """
+    // Expected: 2, 4
+
+    // Test 5: Search on non-existent subcolumn
+    logger.info("Test 5: Search on non-existent subcolumn")
+    qt_test5 """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true, 
default_variant_max_subcolumns_count=0)*/ COUNT(*) FROM ${table_name}
+        WHERE search('overflowpropertiesfulltext.nonexistent:value')
+    """
+    // Expected: 0
+
+    // Test 6: Nested variant path
+    logger.info("Test 6: Nested variant path")
+    qt_test6 """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true, 
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+        WHERE search('overflowpropertiesfulltext.nested.field:0ff')
+        ORDER BY id
+    """
+    // Expected: 6
+
+    // Test 7: Complex query with variant subcolumns
+    logger.info("Test 7: Complex query with variant subcolumns")
+    qt_test7 """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true, 
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+        WHERE search('(overflowpropertiesfulltext.string4:0ff OR 
overflowpropertiesfulltext.string4:dpr) AND NOT 
overflowpropertiesfulltext.string4:hello')
+        ORDER BY id
+    """
+    // Expected: 1, 3, 5
+
+    // Test 8: Quoted field names with special characters
+    logger.info("Test 8: Quoted field names")
+    sql """
+        INSERT INTO ${table_name} VALUES
+        (7, '{"field-name": "test value"}')
+    """
+    Thread.sleep(5000)
+
+    qt_test8 """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true, 
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+        WHERE search('overflowpropertiesfulltext.field-name:test')
+        ORDER BY id
+    """
+    // Expected: 7
+
+    // Test 9: Wildcard search on variant subcolumn
+    //logger.info("Test 9: Wildcard search on variant subcolumn")
+    //qt_test9 """
+    //    SELECT /*+SET_VAR(enable_common_expr_pushdown=true, 
default_variant_max_subcolumns_count=0)*/ id FROM ${table_name}
+    //    WHERE search('overflowpropertiesfulltext.string4:0*')
+    //    ORDER BY id
+    //"""
+    // Expected: 1, 3
+
+    // Test 10: Verify normal field search still works
+    logger.info("Test 10: Verify normal field search still works (if id has 
index)")
+    // This test verifies we didn't break normal field search
+    qt_test10 """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true, 
default_variant_max_subcolumns_count=0)*/ COUNT(*) FROM ${table_name}
+        WHERE id > 0
+    """
+    // Expected: 7
+
+    logger.info("Variant subcolumn search tests completed successfully!")
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to