This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 807de1f6927 [feature](search) support MATCH projection as virtual 
column for inverted index evaluation (#61092)
807de1f6927 is described below

commit 807de1f6927c58692a876ff5761066a1d713668e
Author: Jack <[email protected]>
AuthorDate: Thu Mar 12 10:26:04 2026 +0800

    [feature](search) support MATCH projection as virtual column for inverted 
index evaluation (#61092)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Problem Summary:
    
    In FULL OUTER JOIN queries, MATCH expressions in the SELECT list cannot
    be pushed down as filters (this would violate join semantics by
    incorrectly filtering rows). This means the inverted index cannot be
    used for MATCH evaluation, resulting in slow-path expression evaluation.
    
    This PR enables MATCH expressions used as **projections** to be pushed
    down as virtual columns on OlapScan, allowing the BE to evaluate them
    via inverted index using the existing `fast_execute()` caching
    mechanism.
    
    **Example:**
    ```sql
    -- Before: MATCH evaluated via slow path (no index)
    SELECT A.k1, A.content MATCH_ANY 'hello' as match_result
    FROM A FULL OUTER JOIN B ON A.k1 = B.k1;
    
    -- After: MATCH pushed as virtual column, evaluated via inverted index
    ```
    
    **FE changes:**
    - `Match.java`: Add `PreferPushDownProject` interface so
    `PushDownProject` rule moves MATCH from join output into scan
    projections
    - `PushDownMatchProjectionAsVirtualColumn.java`: New rewrite rule
    converting MATCH projections to virtual columns on OlapScan
    - `RuleType.java` + `Rewriter.java`: Rule registration
    
    **BE changes (segment_iterator.cpp):**
    - `_construct_compound_expr_context()`: Set shared `IndexExecContext` on
    virtual column exprs
    - `_apply_index_expr()`: Evaluate inverted index for virtual column
    MATCH (bitmap only, no row filtering)
    - `_output_index_result_column_for_expr()`: Convert bitmap to UInt8
    column for all index contexts (common exprs + virtual column exprs)
    
    The bitmap result is cached in `IndexExecContext`, and when
    `_materialization_of_virtual_column()` calls
    `VirtualSlotRef::execute_column()` → MATCH's `fast_execute()`, it
    returns the pre-computed column directly.
---
 be/src/exprs/vectorized_fn_call.cpp                |   6 +-
 be/src/storage/segment/segment_iterator.cpp        |  68 +++++-
 be/src/storage/segment/segment_iterator.h          |   4 +-
 be/test/CMakeLists.txt                             |   1 +
 .../segment_iterator_apply_index_expr_test.cpp     | 244 +++++++++++++++++++
 .../doris/nereids/jobs/executor/Rewriter.java      |   2 +
 .../org/apache/doris/nereids/rules/RuleType.java   |   1 +
 .../PushDownMatchProjectionAsVirtualColumn.java    | 138 +++++++++++
 .../rewrite/PushDownScoreTopNIntoOlapScan.java     |   2 +-
 .../rewrite/PushDownVectorTopNIntoOlapScan.java    |   2 +-
 .../doris/nereids/trees/expressions/Match.java     |   2 +-
 .../trees/plans/logical/LogicalOlapScan.java       |  42 +++-
 ...PushDownMatchProjectionAsVirtualColumnTest.java | 267 +++++++++++++++++++++
 .../test_match_projection_virtual_column.out       |  66 +++++
 .../test_match_projection_virtual_column.groovy    | 204 ++++++++++++++++
 15 files changed, 1026 insertions(+), 23 deletions(-)

diff --git a/be/src/exprs/vectorized_fn_call.cpp 
b/be/src/exprs/vectorized_fn_call.cpp
index 684aba5e920..2194200601d 100644
--- a/be/src/exprs/vectorized_fn_call.cpp
+++ b/be/src/exprs/vectorized_fn_call.cpp
@@ -201,7 +201,11 @@ void VectorizedFnCall::close(VExprContext* context, 
FunctionContext::FunctionSta
 }
 
 Status VectorizedFnCall::evaluate_inverted_index(VExprContext* context, 
uint32_t segment_num_rows) {
-    DCHECK_GE(get_num_children(), 1);
+    if (get_num_children() < 1) {
+        // score() and similar 0-children virtual column functions don't need
+        // inverted index evaluation; return OK to skip gracefully.
+        return Status::OK();
+    }
     return _evaluate_inverted_index(context, _function, segment_num_rows);
 }
 
diff --git a/be/src/storage/segment/segment_iterator.cpp 
b/be/src/storage/segment/segment_iterator.cpp
index 29a5881f6c9..9006c96af4d 100644
--- a/be/src/storage/segment/segment_iterator.cpp
+++ b/be/src/storage/segment/segment_iterator.cpp
@@ -1143,6 +1143,25 @@ Status SegmentIterator::_apply_index_expr() {
         }
     }
 
+    // Evaluate inverted index for virtual column MATCH expressions 
(projections).
+    // Unlike common exprs which filter rows, these only compute index result 
bitmaps
+    // for later materialization via fast_execute().
+    for (auto& [cid, expr_ctx] : _virtual_column_exprs) {
+        if (expr_ctx->get_index_context() == nullptr) {
+            continue;
+        }
+        if (Status st = expr_ctx->evaluate_inverted_index(num_rows()); 
!st.ok()) {
+            if (_downgrade_without_index(st) || st.code() == 
ErrorCode::NOT_IMPLEMENTED_ERROR) {
+                continue;
+            } else {
+                LOG(WARNING) << "failed to evaluate inverted index for virtual 
column expr: "
+                             << expr_ctx->root()->debug_string()
+                             << ", error msg: " << st.to_string();
+                return st;
+            }
+        }
+    }
+
     // Apply ann range search
     segment_v2::AnnIndexStats ann_index_stats;
     for (const auto& expr_ctx : _common_expr_ctxs_push_down) {
@@ -2648,6 +2667,19 @@ Status SegmentIterator::_next_batch_internal(Block* 
block) {
 
     // step5: output columns
     RETURN_IF_ERROR(_output_non_pred_columns(block));
+    // Convert inverted index bitmaps to result columns for virtual column 
exprs
+    // (e.g., MATCH projections). This must run before 
_materialization_of_virtual_column
+    // so that fast_execute() can find the pre-computed result columns.
+    if (!_virtual_column_exprs.empty()) {
+        bool use_sel = _is_need_vec_eval || _is_need_short_eval || 
_is_need_expr_eval;
+        uint16_t* sel_rowid_idx = use_sel ? _sel_rowid_idx.data() : nullptr;
+        std::vector<VExprContext*> vir_ctxs;
+        vir_ctxs.reserve(_virtual_column_exprs.size());
+        for (auto& [cid, ctx] : _virtual_column_exprs) {
+            vir_ctxs.push_back(ctx.get());
+        }
+        _output_index_result_column(vir_ctxs, sel_rowid_idx, _selected_size, 
block);
+    }
     RETURN_IF_ERROR(_materialization_of_virtual_column(block));
     // shrink char_type suffix zero data
     block->shrink_char_type_column_suffix_zero(_char_type_idx);
@@ -2754,7 +2786,12 @@ Status SegmentIterator::_process_common_expr(uint16_t* 
sel_rowid_idx, uint16_t&
                            _selected_size));
     }
 
-    _output_index_result_column_for_expr(_sel_rowid_idx.data(), 
_selected_size, block);
+    std::vector<VExprContext*> common_ctxs;
+    common_ctxs.reserve(_common_expr_ctxs_push_down.size());
+    for (auto& ctx : _common_expr_ctxs_push_down) {
+        common_ctxs.push_back(ctx.get());
+    }
+    _output_index_result_column(common_ctxs, _sel_rowid_idx.data(), 
_selected_size, block);
     block->shrink_char_type_column_suffix_zero(_char_type_idx);
     RETURN_IF_ERROR(_execute_common_expr(_sel_rowid_idx.data(), 
_selected_size, block));
 
@@ -2827,15 +2864,19 @@ uint16_t 
SegmentIterator::_evaluate_common_expr_filter(uint16_t* sel_rowid_idx,
     }
 }
 
-void SegmentIterator::_output_index_result_column_for_expr(uint16_t* 
sel_rowid_idx,
-                                                           uint16_t 
select_size, Block* block) {
+void SegmentIterator::_output_index_result_column(const 
std::vector<VExprContext*>& expr_ctxs,
+                                                  uint16_t* sel_rowid_idx, 
uint16_t select_size,
+                                                  Block* block) {
     SCOPED_RAW_TIMER(&_opts.stats->output_index_result_column_timer);
     if (block->rows() == 0) {
         return;
     }
-    for (auto& expr_ctx : _common_expr_ctxs_push_down) {
-        for (auto& inverted_index_result_bitmap_for_expr :
-             expr_ctx->get_index_context()->get_index_result_bitmap()) {
+    for (auto* expr_ctx_ptr : expr_ctxs) {
+        auto index_ctx = expr_ctx_ptr->get_index_context();
+        if (index_ctx == nullptr) {
+            continue;
+        }
+        for (auto& inverted_index_result_bitmap_for_expr : 
index_ctx->get_index_result_bitmap()) {
             const auto* expr = inverted_index_result_bitmap_for_expr.first;
             const auto& result_bitmap = 
inverted_index_result_bitmap_for_expr.second;
             const auto& index_result_bitmap = result_bitmap.get_data_bitmap();
@@ -2873,12 +2914,11 @@ void 
SegmentIterator::_output_index_result_column_for_expr(uint16_t* sel_rowid_i
             DCHECK(block->rows() == vec_match_pred.size());
 
             if (null_map_column) {
-                
expr_ctx->get_index_context()->set_index_result_column_for_expr(
+                index_ctx->set_index_result_column_for_expr(
                         expr, 
ColumnNullable::create(std::move(index_result_column),
                                                      
std::move(null_map_column)));
             } else {
-                
expr_ctx->get_index_context()->set_index_result_column_for_expr(
-                        expr, std::move(index_result_column));
+                index_ctx->set_index_result_column_for_expr(expr, 
std::move(index_result_column));
             }
         }
     }
@@ -2948,6 +2988,16 @@ Status 
SegmentIterator::_construct_compound_expr_context() {
         context->set_index_context(inverted_index_context);
         _common_expr_ctxs_push_down.emplace_back(context);
     }
+    // Clone virtual column exprs before setting IndexExecContext, because
+    // IndexExecContext holds segment-specific index iterator references.
+    // Without cloning, shared VExprContext would be overwritten per-segment
+    // and could point to the wrong segment's context.
+    for (auto& [cid, expr_ctx] : _virtual_column_exprs) {
+        VExprContextSPtr context;
+        RETURN_IF_ERROR(expr_ctx->clone(_opts.runtime_state, context));
+        context->set_index_context(inverted_index_context);
+        expr_ctx = context;
+    }
     return Status::OK();
 }
 
diff --git a/be/src/storage/segment/segment_iterator.h 
b/be/src/storage/segment/segment_iterator.h
index c91aa64c8f0..bc56c35b18a 100644
--- a/be/src/storage/segment/segment_iterator.h
+++ b/be/src/storage/segment/segment_iterator.h
@@ -292,8 +292,8 @@ private:
 
     bool _check_apply_by_inverted_index(std::shared_ptr<ColumnPredicate> pred);
 
-    void _output_index_result_column_for_expr(uint16_t* sel_rowid_idx, 
uint16_t select_size,
-                                              Block* block);
+    void _output_index_result_column(const std::vector<VExprContext*>& 
expr_ctxs,
+                                     uint16_t* sel_rowid_idx, uint16_t 
select_size, Block* block);
 
     bool _need_read_data(ColumnId cid);
     bool _prune_column(ColumnId cid, MutableColumnPtr& column, bool 
fill_defaults,
diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt
index 2fd30182dc2..0d8ce57d8a5 100644
--- a/be/test/CMakeLists.txt
+++ b/be/test/CMakeLists.txt
@@ -52,6 +52,7 @@ list(REMOVE_ITEM UT_FILES
     
${CMAKE_CURRENT_SOURCE_DIR}/storage/segment/frame_of_reference_page_test.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/storage/segment/plain_page_test.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/storage/segment/rle_page_test.cpp
+    
${CMAKE_CURRENT_SOURCE_DIR}/storage/segment/segment_iterator_apply_index_expr_test.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/runtime/decimal_value_test.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/util/decompress_test.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/util/url_coding_test.cpp
diff --git a/be/test/storage/segment/segment_iterator_apply_index_expr_test.cpp 
b/be/test/storage/segment/segment_iterator_apply_index_expr_test.cpp
new file mode 100644
index 00000000000..27110712746
--- /dev/null
+++ b/be/test/storage/segment/segment_iterator_apply_index_expr_test.cpp
@@ -0,0 +1,244 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <memory>
+
+#include "common/status.h"
+#include "core/data_type/data_type_number.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "runtime/runtime_state.h"
+#include "storage/olap_common.h"
+#include "storage/segment/column_reader.h"
+#include "storage/tablet/tablet_schema.h"
+
+// Use #define private public to access private members for testing
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wkeyword-macro"
+#endif
+#define private public
+#include "storage/segment/segment.h"
+#include "storage/segment/segment_iterator.h"
+#undef private
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
+namespace doris::segment_v2 {
+
+namespace {
+
+// A test VExpr that returns a configurable Status from 
evaluate_inverted_index.
+class MockEvalExpr : public VExpr {
+public:
+    MockEvalExpr() { _data_type = std::make_shared<DataTypeUInt8>(); }
+
+    void set_evaluate_status(Status st) { _eval_status = std::move(st); }
+
+    const std::string& expr_name() const override {
+        static const std::string kName = "MockEvalExpr";
+        return kName;
+    }
+
+    Status execute(VExprContext*, Block*, int*) const override { return 
Status::OK(); }
+
+    Status execute_column(VExprContext* context, const Block* block, Selector* 
selector,
+                          size_t count, ColumnPtr& result_column) const 
override {
+        return Status::OK();
+    }
+
+    Status evaluate_inverted_index(VExprContext* context, uint32_t 
segment_num_rows) override {
+        return _eval_status;
+    }
+
+private:
+    Status _eval_status = Status::OK();
+};
+
+TabletSchemaSPtr make_tablet_schema() {
+    TabletSchemaPB schema_pb;
+    schema_pb.set_keys_type(KeysType::DUP_KEYS);
+    auto* col = schema_pb.add_column();
+    col->set_unique_id(0);
+    col->set_name("k0");
+    col->set_type("INT");
+    col->set_is_key(true);
+    col->set_is_nullable(false);
+    auto tablet_schema = std::make_shared<TabletSchema>();
+    tablet_schema->init_from_pb(schema_pb);
+    return tablet_schema;
+}
+
+// Helper to create a minimal Segment with a given num_rows.
+std::shared_ptr<Segment> make_stub_segment(uint32_t num_rows,
+                                           const TabletSchemaSPtr& 
tablet_schema) {
+    auto seg = std::make_shared<Segment>(0, RowsetId(), tablet_schema, 
InvertedIndexFileInfo());
+    seg->_num_rows = num_rows;
+    return seg;
+}
+
+// Helper to create a VExprContext with a MockEvalExpr root.
+VExprContextSPtr make_mock_ctx(Status eval_status, bool with_index_context = 
true) {
+    auto expr = std::make_shared<MockEvalExpr>();
+    expr->set_evaluate_status(std::move(eval_status));
+    auto ctx = std::make_shared<VExprContext>(expr);
+    if (with_index_context) {
+        std::vector<ColumnId> col_ids;
+        std::vector<std::unique_ptr<IndexIterator>> index_iters;
+        std::vector<IndexFieldNameAndTypePair> storage_types;
+        std::unordered_map<ColumnId, std::unordered_map<const VExpr*, bool>> 
status_map;
+        ColumnIteratorOptions column_iter_opts;
+        auto index_ctx =
+                std::make_shared<IndexExecContext>(col_ids, index_iters, 
storage_types, status_map,
+                                                   nullptr, nullptr, 
column_iter_opts);
+        ctx->set_index_context(index_ctx);
+    }
+    return ctx;
+}
+
+} // namespace
+
+class SegmentIteratorApplyIndexExprTest : public testing::Test {
+protected:
+    void SetUp() override {
+        _tablet_schema = make_tablet_schema();
+        _segment = make_stub_segment(100, _tablet_schema);
+
+        _read_schema = std::make_shared<Schema>(_tablet_schema);
+        _iter = std::make_unique<SegmentIterator>(_segment, _read_schema);
+
+        // Set up RuntimeState with fallback enabled so 
_downgrade_without_index works
+        TQueryOptions query_options;
+        query_options.__set_enable_fallback_on_missing_inverted_index(true);
+        _runtime_state.set_query_options(query_options);
+
+        _iter->_opts.runtime_state = &_runtime_state;
+        _iter->_opts.stats = &_stats;
+    }
+
+    std::shared_ptr<Segment> _segment;
+    std::shared_ptr<TabletSchema> _tablet_schema;
+    SchemaSPtr _read_schema;
+    std::unique_ptr<SegmentIterator> _iter;
+    RuntimeState _runtime_state;
+    OlapReaderStatistics _stats;
+};
+
+// When evaluate_inverted_index returns OK, _apply_index_expr should succeed.
+TEST_F(SegmentIteratorApplyIndexExprTest, virtual_column_evaluate_ok) {
+    _iter->_virtual_column_exprs[0] = make_mock_ctx(Status::OK());
+    EXPECT_TRUE(_iter->_apply_index_expr().ok());
+}
+
+// When the index context is null, the expr should be skipped (continue).
+TEST_F(SegmentIteratorApplyIndexExprTest, 
virtual_column_null_index_context_skipped) {
+    _iter->_virtual_column_exprs[0] = make_mock_ctx(Status::OK(), 
/*with_index_context=*/false);
+    EXPECT_TRUE(_iter->_apply_index_expr().ok());
+}
+
+// When evaluate_inverted_index returns INVERTED_INDEX_BYPASS (a downgrade 
error),
+// _apply_index_expr should continue and return OK.
+TEST_F(SegmentIteratorApplyIndexExprTest, 
virtual_column_downgrade_bypass_continues) {
+    _iter->_virtual_column_exprs[0] =
+            
make_mock_ctx(Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>("bypass"));
+    Status st = _iter->_apply_index_expr();
+    EXPECT_TRUE(st.ok()) << st.to_string();
+    EXPECT_EQ(_stats.inverted_index_downgrade_count, 1);
+}
+
+// When evaluate_inverted_index returns INVERTED_INDEX_FILE_NOT_FOUND with 
fallback enabled,
+// _apply_index_expr should downgrade and continue.
+TEST_F(SegmentIteratorApplyIndexExprTest, 
virtual_column_downgrade_file_not_found_continues) {
+    _iter->_virtual_column_exprs[0] =
+            
make_mock_ctx(Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>("not 
found"));
+    Status st = _iter->_apply_index_expr();
+    EXPECT_TRUE(st.ok()) << st.to_string();
+    EXPECT_EQ(_stats.inverted_index_downgrade_count, 1);
+}
+
+// When evaluate_inverted_index returns INVERTED_INDEX_EVALUATE_SKIPPED,
+// _apply_index_expr should downgrade and continue.
+TEST_F(SegmentIteratorApplyIndexExprTest, 
virtual_column_downgrade_evaluate_skipped_continues) {
+    _iter->_virtual_column_exprs[0] =
+            
make_mock_ctx(Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>("skipped"));
+    Status st = _iter->_apply_index_expr();
+    EXPECT_TRUE(st.ok()) << st.to_string();
+    EXPECT_EQ(_stats.inverted_index_downgrade_count, 1);
+}
+
+// When evaluate_inverted_index returns INVERTED_INDEX_FILE_CORRUPTED,
+// _apply_index_expr should downgrade and continue.
+TEST_F(SegmentIteratorApplyIndexExprTest, 
virtual_column_downgrade_file_corrupted_continues) {
+    _iter->_virtual_column_exprs[0] =
+            
make_mock_ctx(Status::Error<ErrorCode::INVERTED_INDEX_FILE_CORRUPTED>("corrupted"));
+    Status st = _iter->_apply_index_expr();
+    EXPECT_TRUE(st.ok()) << st.to_string();
+    EXPECT_EQ(_stats.inverted_index_downgrade_count, 1);
+}
+
+// When evaluate_inverted_index returns NOT_IMPLEMENTED_ERROR,
+// _apply_index_expr should continue and return OK.
+TEST_F(SegmentIteratorApplyIndexExprTest, 
virtual_column_not_implemented_continues) {
+    _iter->_virtual_column_exprs[0] =
+            make_mock_ctx(Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>("not 
impl"));
+    Status st = _iter->_apply_index_expr();
+    EXPECT_TRUE(st.ok()) << st.to_string();
+    // NOT_IMPLEMENTED_ERROR does not go through _downgrade_without_index, so 
count stays 0
+    EXPECT_EQ(_stats.inverted_index_downgrade_count, 0);
+}
+
+// When evaluate_inverted_index returns an unhandled error (e.g., 
INTERNAL_ERROR),
+// _apply_index_expr should propagate the error.
+TEST_F(SegmentIteratorApplyIndexExprTest, 
virtual_column_unhandled_error_propagated) {
+    _iter->_virtual_column_exprs[0] =
+            make_mock_ctx(Status::Error<ErrorCode::INTERNAL_ERROR>("internal 
error"));
+    Status st = _iter->_apply_index_expr();
+    EXPECT_FALSE(st.ok());
+    EXPECT_EQ(st.code(), ErrorCode::INTERNAL_ERROR);
+}
+
+// Multiple virtual column exprs: one OK and one downgrade error should both 
continue.
+TEST_F(SegmentIteratorApplyIndexExprTest, 
multiple_virtual_columns_mixed_results) {
+    _iter->_virtual_column_exprs[0] = make_mock_ctx(Status::OK());
+    _iter->_virtual_column_exprs[1] =
+            
make_mock_ctx(Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>("bypass"));
+    _iter->_virtual_column_exprs[2] = make_mock_ctx(Status::OK());
+    Status st = _iter->_apply_index_expr();
+    EXPECT_TRUE(st.ok()) << st.to_string();
+    EXPECT_EQ(_stats.inverted_index_downgrade_count, 1);
+}
+
+// Multiple virtual column exprs: second one returns unhandled error, should 
stop and propagate.
+TEST_F(SegmentIteratorApplyIndexExprTest, 
multiple_virtual_columns_error_stops_iteration) {
+    _iter->_virtual_column_exprs[0] = make_mock_ctx(Status::OK());
+    _iter->_virtual_column_exprs[1] =
+            make_mock_ctx(Status::Error<ErrorCode::INTERNAL_ERROR>("fail"));
+    _iter->_virtual_column_exprs[2] = make_mock_ctx(Status::OK());
+    Status st = _iter->_apply_index_expr();
+    EXPECT_FALSE(st.ok());
+    EXPECT_EQ(st.code(), ErrorCode::INTERNAL_ERROR);
+}
+
+// Empty virtual_column_exprs should just succeed.
+TEST_F(SegmentIteratorApplyIndexExprTest, empty_virtual_columns_ok) {
+    EXPECT_TRUE(_iter->_apply_index_expr().ok());
+}
+
+} // namespace doris::segment_v2
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
index 00be607fc94..ffc6f18fa4e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java
@@ -139,6 +139,7 @@ import 
org.apache.doris.nereids.rules.rewrite.PushDownJoinOnAssertNumRows;
 import org.apache.doris.nereids.rules.rewrite.PushDownLimit;
 import org.apache.doris.nereids.rules.rewrite.PushDownLimitDistinctThroughJoin;
 import 
org.apache.doris.nereids.rules.rewrite.PushDownLimitDistinctThroughUnion;
+import 
org.apache.doris.nereids.rules.rewrite.PushDownMatchProjectionAsVirtualColumn;
 import org.apache.doris.nereids.rules.rewrite.PushDownProjectThroughLimit;
 import org.apache.doris.nereids.rules.rewrite.PushDownScoreTopNIntoOlapScan;
 import org.apache.doris.nereids.rules.rewrite.PushDownTopNDistinctThroughJoin;
@@ -790,6 +791,7 @@ public class Rewriter extends AbstractBatchJobExecutor {
                 custom(RuleType.ELIMINATE_UNNECESSARY_PROJECT, 
EliminateUnnecessaryProject::new),
                 topDown(new PushDownVectorTopNIntoOlapScan()),
                 topDown(new PushDownVirtualColumnsIntoOlapScan()),
+                topDown(new PushDownMatchProjectionAsVirtualColumn()),
                 topic("score optimize",
                         topDown(new PushDownScoreTopNIntoOlapScan(),
                                 new CheckScoreUsage())
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index 131ce0602c2..1af2b9a5869 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -336,6 +336,7 @@ public enum RuleType {
     PUSH_CONJUNCTS_INTO_ODBC_SCAN(RuleTypeClass.REWRITE),
     PUSH_CONJUNCTS_INTO_ES_SCAN(RuleTypeClass.REWRITE),
     PUSH_DOWN_VIRTUAL_COLUMNS_INTO_OLAP_SCAN(RuleTypeClass.REWRITE),
+    PUSH_DOWN_MATCH_PROJECTION_AS_VIRTUAL_COLUMN(RuleTypeClass.REWRITE),
     PUSH_DOWN_SCORE_TOPN_INTO_OLAP_SCAN(RuleTypeClass.REWRITE),
     PUSH_DOWN_VECTOR_TOPN_INTO_OLAP_SCAN(RuleTypeClass.REWRITE),
     CHECK_SCORE_USAGE(RuleTypeClass.REWRITE),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchProjectionAsVirtualColumn.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchProjectionAsVirtualColumn.java
new file mode 100644
index 00000000000..53e93dfa0fc
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchProjectionAsVirtualColumn.java
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite;
+
+import org.apache.doris.catalog.KeysType;
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Alias;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.Match;
+import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
+import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+
+/**
+ * Push down MATCH expressions in projections as virtual columns on OlapScan.
+ * This allows the BE to evaluate MATCH using inverted index during scan.
+ *
+ * Example transformation:
+ * Before:
+ * Project[a, b, col MATCH_ANY 'hello']
+ * └── OlapScan[table]
+ *
+ * After:
+ * Project[a, b, virtual_slot_ref]
+ * └── OlapScan[table, virtual_columns=[(col MATCH_ANY 'hello') as alias]]
+ */
+public class PushDownMatchProjectionAsVirtualColumn implements 
RewriteRuleFactory {
+
+    private boolean canPushDown(LogicalOlapScan scan) {
+        boolean dupTblOrMOW = scan.getTable().getKeysType() == 
KeysType.DUP_KEYS
+                || (scan.getTable().getTableProperty() != null
+                    && 
scan.getTable().getTableProperty().getEnableUniqueKeyMergeOnWrite());
+        return dupTblOrMOW;
+    }
+
+    @Override
+    public List<Rule> buildRules() {
+        return ImmutableList.of(
+                // Pattern 1: Project -> OlapScan
+                logicalProject(logicalOlapScan().when(this::canPushDown))
+                        .then(project -> {
+                            LogicalOlapScan scan = project.child();
+                            return pushDown(project, scan, newScan -> newScan);
+                        
}).toRule(RuleType.PUSH_DOWN_MATCH_PROJECTION_AS_VIRTUAL_COLUMN),
+                // Pattern 2: Project -> Filter -> OlapScan
+                
logicalProject(logicalFilter(logicalOlapScan().when(this::canPushDown)))
+                        .then(project -> {
+                            LogicalFilter<LogicalOlapScan> filter = 
project.child();
+                            LogicalOlapScan scan = filter.child();
+                            return pushDown(project, scan,
+                                    newScan -> filter.withChildren(newScan));
+                        
}).toRule(RuleType.PUSH_DOWN_MATCH_PROJECTION_AS_VIRTUAL_COLUMN)
+        );
+    }
+
+    /**
+     * Extract MATCH projections and push them as virtual columns on the scan.
+     * @param childRebuilder rebuilds the project's child tree with the new 
scan
+     */
+    private LogicalProject<?> pushDown(
+            LogicalProject<?> project, LogicalOlapScan scan,
+            Function<LogicalOlapScan, ? extends Plan> childRebuilder) {
+        List<NamedExpression> projections = project.getProjects();
+        List<NamedExpression> virtualColumns = new ArrayList<>();
+        Map<Expression, Expression> replaceMap = new HashMap<>();
+
+        for (NamedExpression projection : projections) {
+            Expression matchExpr = unwrapMatch(projection);
+            if (matchExpr != null && !replaceMap.containsKey(matchExpr)) {
+                Alias alias = new Alias(matchExpr);
+                replaceMap.put(matchExpr, alias.toSlot());
+                virtualColumns.add(alias);
+            }
+        }
+
+        if (virtualColumns.isEmpty()) {
+            return null;
+        }
+
+        ImmutableList.Builder<NamedExpression> newProjections = 
ImmutableList.builder();
+        for (NamedExpression projection : projections) {
+            Expression matchExpr = unwrapMatch(projection);
+            if (matchExpr != null && replaceMap.containsKey(matchExpr)) {
+                Expression slot = replaceMap.get(matchExpr);
+                if (projection instanceof Alias) {
+                    newProjections.add(new Alias(((Alias) 
projection).getExprId(),
+                            slot, ((Alias) projection).getName()));
+                } else {
+                    newProjections.add((NamedExpression) slot);
+                }
+            } else {
+                newProjections.add(projection);
+            }
+        }
+
+        LogicalOlapScan newScan = scan.appendVirtualColumns(virtualColumns);
+        return (LogicalProject<?>) project.withProjectsAndChild(
+                newProjections.build(), childRebuilder.apply(newScan));
+    }
+
+    /**
+     * Unwrap a Match expression from a projection.
+     * Returns the Match expression if the projection is a Match directly or 
an Alias wrapping a Match.
+     * Returns null otherwise.
+     */
+    private Expression unwrapMatch(NamedExpression projection) {
+        if (projection instanceof Alias && ((Alias) projection).child() 
instanceof Match) {
+            return ((Alias) projection).child();
+        }
+        return null;
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownScoreTopNIntoOlapScan.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownScoreTopNIntoOlapScan.java
index 8120cceffbe..7073febac4d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownScoreTopNIntoOlapScan.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownScoreTopNIntoOlapScan.java
@@ -189,7 +189,7 @@ public class PushDownScoreTopNIntoOlapScan implements 
RewriteRuleFactory {
         // All conditions met, perform the push down.
         // This is the core action: push score() as a virtual column and also 
push the
         // topN info.
-        Plan newScan = 
scan.withVirtualColumnsAndTopN(ImmutableList.of(scoreAlias),
+        Plan newScan = 
scan.appendVirtualColumnsAndTopN(ImmutableList.of(scoreAlias),
                 ImmutableList.of(), Optional.empty(),
                 topN.getOrderKeys(), Optional.of(topN.getLimit() + 
topN.getOffset()),
                 scoreRangeInfo);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownVectorTopNIntoOlapScan.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownVectorTopNIntoOlapScan.java
index 25efde5cce1..ff464cfa704 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownVectorTopNIntoOlapScan.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownVectorTopNIntoOlapScan.java
@@ -146,7 +146,7 @@ public class PushDownVectorTopNIntoOlapScan implements 
RewriteRuleFactory {
             return null;
         }
 
-        Plan plan = scan.withVirtualColumnsAndTopN(
+        Plan plan = scan.appendVirtualColumnsAndTopN(
                 ImmutableList.of(orderKeyAlias),
                 topN.getOrderKeys(), Optional.of(topN.getLimit() + 
topN.getOffset()),
                 ImmutableList.of(), Optional.empty(), Optional.empty());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Match.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Match.java
index 386d5f63666..e4e36d65521 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Match.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Match.java
@@ -36,7 +36,7 @@ import java.util.Optional;
 /**
  * like expression: a MATCH 'hello'.
  */
-public abstract class Match extends BinaryOperator implements 
PropagateNullable {
+public abstract class Match extends BinaryOperator implements 
PropagateNullable, PreferPushDownProject {
 
     private final Optional<String> analyzer;
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
index 11f5d02c9f2..7f17e19e1f5 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java
@@ -493,13 +493,34 @@ public class LogicalOlapScan extends 
LogicalCatalogRelation implements OlapScan,
     }
 
     /**
-     * Add virtual column to olap scan with optional score range info.
-     * @param virtualColumns generated virtual columns
-     * @param scoreRangeInfo optional score range filter info for BM25 range 
queries
-     * @return scan with virtual columns and optional score range info
+     * Append additional virtual columns to existing ones.
+     * Unlike {@link #withVirtualColumns} which replaces, this merges existing 
+ new.
+     */
+    public LogicalOlapScan appendVirtualColumns(List<NamedExpression> 
additionalVirtualColumns) {
+        LogicalProperties logicalProperties = getLogicalProperties();
+        List<Slot> output = Lists.newArrayList(logicalProperties.getOutput());
+        
output.addAll(additionalVirtualColumns.stream().map(NamedExpression::toSlot)
+                .collect(Collectors.toList()));
+        logicalProperties = new LogicalProperties(() -> output, 
this::computeDataTrait);
+        List<NamedExpression> mergedVirtualColumns = 
ImmutableList.<NamedExpression>builder()
+                .addAll(this.virtualColumns)
+                .addAll(additionalVirtualColumns)
+                .build();
+        return new LogicalOlapScan(relationId, (Table) table, qualifier,
+                groupExpression, Optional.of(logicalProperties),
+                selectedPartitionIds, partitionPruned, selectedTabletIds,
+                selectedIndexId, indexSelected, preAggStatus, 
manuallySpecifiedPartitions,
+                hints, cacheSlotWithSlotName, cachedOutput, tableSample, 
directMvScan, colToSubPathsMap,
+                manuallySpecifiedTabletIds, operativeSlots, 
mergedVirtualColumns, scoreOrderKeys, scoreLimit,
+                scoreRangeInfo, annOrderKeys, annLimit, tableAlias);
+    }
+
+    /**
+     * Append additional virtual columns with topN info.
+     * Merges existing virtual columns with the new ones.
      */
-    public LogicalOlapScan withVirtualColumnsAndTopN(
-            List<NamedExpression> virtualColumns,
+    public LogicalOlapScan appendVirtualColumnsAndTopN(
+            List<NamedExpression> additionalVirtualColumns,
             List<OrderKey> annOrderKeys,
             Optional<Long> annLimit,
             List<OrderKey> scoreOrderKeys,
@@ -507,14 +528,19 @@ public class LogicalOlapScan extends 
LogicalCatalogRelation implements OlapScan,
             Optional<ScoreRangeInfo> scoreRangeInfo) {
         LogicalProperties logicalProperties = getLogicalProperties();
         List<Slot> output = Lists.newArrayList(logicalProperties.getOutput());
-        
output.addAll(virtualColumns.stream().map(NamedExpression::toSlot).collect(Collectors.toList()));
+        
output.addAll(additionalVirtualColumns.stream().map(NamedExpression::toSlot)
+                .collect(Collectors.toList()));
         logicalProperties = new LogicalProperties(() -> output, 
this::computeDataTrait);
+        List<NamedExpression> mergedVirtualColumns = 
ImmutableList.<NamedExpression>builder()
+                .addAll(this.virtualColumns)
+                .addAll(additionalVirtualColumns)
+                .build();
         return new LogicalOlapScan(relationId, (Table) table, qualifier,
                 groupExpression, Optional.of(logicalProperties),
                 selectedPartitionIds, partitionPruned, selectedTabletIds,
                 selectedIndexId, indexSelected, preAggStatus, 
manuallySpecifiedPartitions,
                 hints, cacheSlotWithSlotName, cachedOutput, tableSample, 
directMvScan, colToSubPathsMap,
-                manuallySpecifiedTabletIds, operativeSlots, virtualColumns, 
scoreOrderKeys, scoreLimit,
+                manuallySpecifiedTabletIds, operativeSlots, 
mergedVirtualColumns, scoreOrderKeys, scoreLimit,
                 scoreRangeInfo, annOrderKeys, annLimit, tableAlias);
     }
 
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchProjectionAsVirtualColumnTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchProjectionAsVirtualColumnTest.java
new file mode 100644
index 00000000000..55735021e78
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PushDownMatchProjectionAsVirtualColumnTest.java
@@ -0,0 +1,267 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.rewrite;
+
+import org.apache.doris.nereids.trees.expressions.Alias;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.GreaterThan;
+import org.apache.doris.nereids.trees.expressions.MatchAll;
+import org.apache.doris.nereids.trees.expressions.MatchAny;
+import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.Slot;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral;
+import org.apache.doris.nereids.trees.expressions.literal.StringLiteral;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
+import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
+import org.apache.doris.nereids.util.MemoPatternMatchSupported;
+import org.apache.doris.nereids.util.MemoTestUtils;
+import org.apache.doris.nereids.util.PlanChecker;
+import org.apache.doris.nereids.util.PlanConstructor;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+/**
+ * Test for PushDownMatchProjectionAsVirtualColumn rule.
+ */
+public class PushDownMatchProjectionAsVirtualColumnTest implements 
MemoPatternMatchSupported {
+
+    @Test
+    void testPushDownMatchProjection() {
+        LogicalOlapScan scan = PlanConstructor.newLogicalOlapScan(0, "t1", 0);
+        List<Slot> slots = scan.getOutput();
+        Slot idSlot = slots.get(0);
+        Slot nameSlot = slots.get(1);
+
+        MatchAny matchExpr = new MatchAny(nameSlot, new 
StringLiteral("hello"));
+        LogicalProject<LogicalOlapScan> project = new LogicalProject<>(
+                ImmutableList.of(idSlot, new Alias(matchExpr, "m")), scan);
+
+        Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), 
project)
+                .applyTopDown(new PushDownMatchProjectionAsVirtualColumn())
+                .getPlan();
+
+        // Verify plan structure
+        Assertions.assertInstanceOf(LogicalProject.class, root);
+        LogicalProject<?> resProject = (LogicalProject<?>) root;
+        Assertions.assertInstanceOf(LogicalOlapScan.class, resProject.child());
+        LogicalOlapScan resScan = (LogicalOlapScan) resProject.child();
+
+        // Verify exactly 1 virtual column wrapping the MatchAny expression
+        Assertions.assertEquals(1, resScan.getVirtualColumns().size());
+        Alias vcAlias = (Alias) resScan.getVirtualColumns().get(0);
+        Assertions.assertInstanceOf(MatchAny.class, vcAlias.child());
+        Assertions.assertEquals(matchExpr, vcAlias.child());
+
+        // Verify alias name "m" is preserved in the project output
+        List<NamedExpression> projections = resProject.getProjects();
+        Assertions.assertEquals(2, projections.size());
+        NamedExpression mProjection = projections.get(1);
+        Assertions.assertInstanceOf(Alias.class, mProjection);
+        Assertions.assertEquals("m", ((Alias) mProjection).getName());
+
+        // Verify the "m" alias now references the virtual column's slot, not 
the original MatchAny
+        Expression mChild = ((Alias) mProjection).child();
+        Assertions.assertInstanceOf(SlotReference.class, mChild);
+        Assertions.assertEquals(vcAlias.toSlot().getExprId(), ((SlotReference) 
mChild).getExprId());
+    }
+
+    @Test
+    void testPushDownMatchProjectionWithFilter() {
+        LogicalOlapScan scan = PlanConstructor.newLogicalOlapScan(0, "t1", 0);
+        List<Slot> slots = scan.getOutput();
+        Slot idSlot = slots.get(0);
+        Slot nameSlot = slots.get(1);
+
+        MatchAny matchExpr = new MatchAny(nameSlot, new 
StringLiteral("hello"));
+        GreaterThan filterPred = new GreaterThan(idSlot, new 
IntegerLiteral(2));
+
+        LogicalProject<LogicalFilter<LogicalOlapScan>> project = new 
LogicalProject<>(
+                ImmutableList.of(idSlot, new Alias(matchExpr, "m")),
+                new LogicalFilter<>(ImmutableSet.of(filterPred), scan));
+
+        Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), 
project)
+                .applyTopDown(new PushDownMatchProjectionAsVirtualColumn())
+                .getPlan();
+
+        // Verify plan structure: Project -> Filter -> OlapScan
+        Assertions.assertInstanceOf(LogicalProject.class, root);
+        LogicalProject<?> resProject = (LogicalProject<?>) root;
+        Assertions.assertInstanceOf(LogicalFilter.class, resProject.child());
+        LogicalFilter<?> resFilter = (LogicalFilter<?>) resProject.child();
+        Assertions.assertInstanceOf(LogicalOlapScan.class, resFilter.child());
+        LogicalOlapScan resScan = (LogicalOlapScan) resFilter.child();
+
+        // Verify virtual column
+        Assertions.assertEquals(1, resScan.getVirtualColumns().size());
+        Alias vcAlias = (Alias) resScan.getVirtualColumns().get(0);
+        Assertions.assertEquals(matchExpr, vcAlias.child());
+
+        // Verify filter is preserved
+        Assertions.assertEquals(ImmutableSet.of(filterPred), 
resFilter.getConjuncts());
+
+        // Verify slot replacement in project
+        NamedExpression mProjection = resProject.getProjects().get(1);
+        Assertions.assertEquals("m", ((Alias) mProjection).getName());
+        Assertions.assertEquals(vcAlias.toSlot().getExprId(),
+                ((SlotReference) ((Alias) mProjection).child()).getExprId());
+    }
+
+    @Test
+    void testNoMatchExpressionNoChange() {
+        LogicalOlapScan scan = PlanConstructor.newLogicalOlapScan(0, "t1", 0);
+        List<Slot> slots = scan.getOutput();
+        Slot idSlot = slots.get(0);
+
+        LogicalProject<LogicalOlapScan> project = new LogicalProject<>(
+                ImmutableList.<NamedExpression>of(idSlot), scan);
+
+        PlanChecker.from(MemoTestUtils.createConnectContext(), project)
+                .applyTopDown(new PushDownMatchProjectionAsVirtualColumn())
+                .matches(
+                        logicalProject(
+                                logicalOlapScan().when(s -> 
s.getVirtualColumns().isEmpty())
+                        )
+                );
+    }
+
+    @Test
+    void testDuplicateMatchDedup() {
+        // Same MATCH expression in two aliases should create only one virtual 
column
+        LogicalOlapScan scan = PlanConstructor.newLogicalOlapScan(0, "t1", 0);
+        List<Slot> slots = scan.getOutput();
+        Slot idSlot = slots.get(0);
+        Slot nameSlot = slots.get(1);
+
+        MatchAny matchExpr = new MatchAny(nameSlot, new 
StringLiteral("hello"));
+        LogicalProject<LogicalOlapScan> project = new LogicalProject<>(
+                ImmutableList.of(idSlot, new Alias(matchExpr, "m1"), new 
Alias(matchExpr, "m2")),
+                scan);
+
+        Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), 
project)
+                .applyTopDown(new PushDownMatchProjectionAsVirtualColumn())
+                .getPlan();
+
+        Assertions.assertInstanceOf(LogicalProject.class, root);
+        LogicalProject<?> resProject = (LogicalProject<?>) root;
+        LogicalOlapScan resScan = (LogicalOlapScan) resProject.child();
+
+        // Only one virtual column for the duplicate MATCH expression
+        Assertions.assertEquals(1, resScan.getVirtualColumns().size());
+
+        // Both aliases should reference the same virtual column slot
+        Alias vcAlias = (Alias) resScan.getVirtualColumns().get(0);
+        List<NamedExpression> projections = resProject.getProjects();
+        Assertions.assertEquals(3, projections.size());
+
+        Alias m1 = (Alias) projections.get(1);
+        Alias m2 = (Alias) projections.get(2);
+        Assertions.assertEquals("m1", m1.getName());
+        Assertions.assertEquals("m2", m2.getName());
+        Assertions.assertEquals(vcAlias.toSlot().getExprId(), ((SlotReference) 
m1.child()).getExprId());
+        Assertions.assertEquals(vcAlias.toSlot().getExprId(), ((SlotReference) 
m2.child()).getExprId());
+    }
+
+    @Test
+    void testMultipleDistinctMatchExpressions() {
+        // Two different MATCH expressions should create two virtual columns
+        LogicalOlapScan scan = PlanConstructor.newLogicalOlapScan(0, "t1", 0);
+        List<Slot> slots = scan.getOutput();
+        Slot idSlot = slots.get(0);
+        Slot nameSlot = slots.get(1);
+
+        MatchAny matchAny = new MatchAny(nameSlot, new StringLiteral("hello"));
+        MatchAll matchAll = new MatchAll(nameSlot, new StringLiteral("world"));
+        LogicalProject<LogicalOlapScan> project = new LogicalProject<>(
+                ImmutableList.of(idSlot, new Alias(matchAny, "ma"), new 
Alias(matchAll, "mb")),
+                scan);
+
+        Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), 
project)
+                .applyTopDown(new PushDownMatchProjectionAsVirtualColumn())
+                .getPlan();
+
+        Assertions.assertInstanceOf(LogicalProject.class, root);
+        LogicalProject<?> resProject = (LogicalProject<?>) root;
+        LogicalOlapScan resScan = (LogicalOlapScan) resProject.child();
+
+        // Two distinct virtual columns
+        Assertions.assertEquals(2, resScan.getVirtualColumns().size());
+        Set<Expression> vcExprs = resScan.getVirtualColumns().stream()
+                .map(vc -> ((Alias) vc).child())
+                .collect(Collectors.toSet());
+        Assertions.assertTrue(vcExprs.contains(matchAny));
+        Assertions.assertTrue(vcExprs.contains(matchAll));
+
+        // Each alias references its own virtual column slot
+        List<NamedExpression> projections = resProject.getProjects();
+        Assertions.assertEquals(3, projections.size());
+        Assertions.assertEquals("ma", ((Alias) projections.get(1)).getName());
+        Assertions.assertEquals("mb", ((Alias) projections.get(2)).getName());
+
+        // Slots should be different
+        SlotReference maSlot = (SlotReference) ((Alias) 
projections.get(1)).child();
+        SlotReference mbSlot = (SlotReference) ((Alias) 
projections.get(2)).child();
+        Assertions.assertNotEquals(maSlot.getExprId(), mbSlot.getExprId());
+    }
+
+    @Test
+    void testAppendToExistingVirtualColumns() {
+        // MATCH rule should append to existing virtual columns, not skip
+        LogicalOlapScan scan = PlanConstructor.newLogicalOlapScan(0, "t1", 0);
+        List<Slot> slots = scan.getOutput();
+        Slot idSlot = slots.get(0);
+        Slot nameSlot = slots.get(1);
+
+        // Simulate CSE rule having already added a virtual column
+        Alias existingVc = new Alias(new GreaterThan(idSlot, new 
IntegerLiteral(5)), "cse_vc");
+        LogicalOlapScan scanWithVc = 
scan.withVirtualColumns(ImmutableList.of(existingVc));
+
+        MatchAny matchExpr = new MatchAny(nameSlot, new 
StringLiteral("hello"));
+        LogicalProject<LogicalOlapScan> project = new LogicalProject<>(
+                ImmutableList.of(idSlot, new Alias(matchExpr, "m")), 
scanWithVc);
+
+        Plan root = PlanChecker.from(MemoTestUtils.createConnectContext(), 
project)
+                .applyTopDown(new PushDownMatchProjectionAsVirtualColumn())
+                .getPlan();
+
+        Assertions.assertInstanceOf(LogicalProject.class, root);
+        LogicalProject<?> resProject = (LogicalProject<?>) root;
+        LogicalOlapScan resScan = (LogicalOlapScan) resProject.child();
+
+        // Should have 2 virtual columns: existing CSE one + new MATCH one
+        Assertions.assertEquals(2, resScan.getVirtualColumns().size());
+
+        // Existing CSE virtual column is preserved
+        Alias firstVc = (Alias) resScan.getVirtualColumns().get(0);
+        Assertions.assertInstanceOf(GreaterThan.class, firstVc.child());
+
+        // New MATCH virtual column is appended
+        Alias secondVc = (Alias) resScan.getVirtualColumns().get(1);
+        Assertions.assertInstanceOf(MatchAny.class, secondVc.child());
+        Assertions.assertEquals(matchExpr, secondVc.child());
+    }
+}
diff --git 
a/regression-test/data/inverted_index_p0/test_match_projection_virtual_column.out
 
b/regression-test/data/inverted_index_p0/test_match_projection_virtual_column.out
new file mode 100644
index 00000000000..29ca8cf741d
--- /dev/null
+++ 
b/regression-test/data/inverted_index_p0/test_match_projection_virtual_column.out
@@ -0,0 +1,66 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !match_proj_simple --
+1      true
+2      false
+3      true
+4      false
+5      true
+
+-- !match_proj_full_join --
+\N     \N
+1      true
+2      false
+3      true
+4      false
+5      true
+
+-- !match_proj_multiple --
+\N     \N      \N
+1      true    false
+2      false   true
+3      true    false
+4      false   false
+5      true    false
+
+-- !match_proj_with_filter --
+3      true
+4      false
+5      true
+
+-- !match_phrase_proj --
+1      true
+2      false
+3      false
+4      false
+5      false
+
+-- !match_filter_still_works --
+1      hello world
+3      hello doris database
+5      test hello data
+
+-- !match_inner_join --
+1      hello world
+
+-- !match_proj_no_index --
+1      true
+2      false
+3      true
+
+-- !match_proj_unique_mow --
+1      true
+2      false
+3      true
+
+-- !match_proj_compound --
+1      true
+2      false
+3      false
+4      false
+5      false
+
+-- !match_proj_with_direct_filter --
+2      false
+3      true
+4      false
+
diff --git 
a/regression-test/suites/inverted_index_p0/test_match_projection_virtual_column.groovy
 
b/regression-test/suites/inverted_index_p0/test_match_projection_virtual_column.groovy
new file mode 100644
index 00000000000..6d56ed49b3f
--- /dev/null
+++ 
b/regression-test/suites/inverted_index_p0/test_match_projection_virtual_column.groovy
@@ -0,0 +1,204 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_match_projection_virtual_column") {
+    // This test verifies that MATCH expressions used as projections
+    // (not filters) are pushed down as virtual columns on OlapScan
+    // and evaluated via inverted index. This is important for queries
+    // like FULL OUTER JOIN where MATCH cannot be pushed as a filter.
+
+    def tableA = "test_match_proj_a"
+    def tableB = "test_match_proj_b"
+
+    sql "DROP TABLE IF EXISTS ${tableA}"
+    sql "DROP TABLE IF EXISTS ${tableB}"
+
+    sql """
+        CREATE TABLE ${tableA} (
+            k1 INT,
+            content TEXT,
+            INDEX idx_content (content) USING INVERTED PROPERTIES("parser" = 
"english")
+        ) ENGINE=OLAP
+        DUPLICATE KEY(k1)
+        DISTRIBUTED BY HASH(k1) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        )
+    """
+
+    sql """
+        CREATE TABLE ${tableB} (
+            k1 INT,
+            val VARCHAR(100)
+        ) ENGINE=OLAP
+        DUPLICATE KEY(k1)
+        DISTRIBUTED BY HASH(k1) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        )
+    """
+
+    sql """ INSERT INTO ${tableA} VALUES
+        (1, 'hello world'),
+        (2, 'foo bar baz'),
+        (3, 'hello doris database'),
+        (4, 'nothing here'),
+        (5, 'test hello data')
+    """
+
+    sql """ INSERT INTO ${tableB} VALUES
+        (1, 'b1'),
+        (2, 'b2'),
+        (6, 'b6')
+    """
+
+    // Test 1: MATCH as projection without join
+    order_qt_match_proj_simple """
+        SELECT k1, content MATCH_ANY 'hello' as m FROM ${tableA} ORDER BY k1
+    """
+
+    // Test 2: MATCH as projection with FULL OUTER JOIN
+    // MATCH cannot be pushed as filter in FULL OUTER JOIN,
+    // so it should be pushed as a virtual column projection.
+    order_qt_match_proj_full_join """
+        SELECT ${tableA}.k1, ${tableA}.content MATCH_ANY 'hello' as m
+        FROM ${tableA} FULL OUTER JOIN ${tableB} ON ${tableA}.k1 = ${tableB}.k1
+        ORDER BY ${tableA}.k1
+    """
+
+    // Test 3: Multiple MATCH projections
+    order_qt_match_proj_multiple """
+        SELECT ${tableA}.k1,
+               ${tableA}.content MATCH_ANY 'hello' as m1,
+               ${tableA}.content MATCH_ANY 'foo' as m2
+        FROM ${tableA} FULL OUTER JOIN ${tableB} ON ${tableA}.k1 = ${tableB}.k1
+        ORDER BY ${tableA}.k1
+    """
+
+    // Test 4: MATCH projection with additional filter
+    order_qt_match_proj_with_filter """
+        SELECT ${tableA}.k1, ${tableA}.content MATCH_ANY 'hello' as m
+        FROM ${tableA} FULL OUTER JOIN ${tableB} ON ${tableA}.k1 = ${tableB}.k1
+        WHERE ${tableA}.k1 > 2
+        ORDER BY ${tableA}.k1
+    """
+
+    // Test 5: MATCH_PHRASE as projection
+    order_qt_match_phrase_proj """
+        SELECT k1, content MATCH_PHRASE 'hello world' as m FROM ${tableA} 
ORDER BY k1
+    """
+
+    // Test 6: Verify MATCH as filter still works correctly (regression check)
+    order_qt_match_filter_still_works """
+        SELECT * FROM ${tableA} WHERE content MATCH_ANY 'hello' ORDER BY k1
+    """
+
+    // Test 7: MATCH in INNER JOIN (can be pushed as filter, should still work)
+    order_qt_match_inner_join """
+        SELECT ${tableA}.k1, ${tableA}.content
+        FROM ${tableA} INNER JOIN ${tableB} ON ${tableA}.k1 = ${tableB}.k1
+        WHERE ${tableA}.content MATCH_ANY 'hello'
+        ORDER BY ${tableA}.k1
+    """
+
+    // Test 8: Verify EXPLAIN shows virtual column for FULL OUTER JOIN MATCH 
projection
+    def explainResult = sql """
+        EXPLAIN VERBOSE SELECT ${tableA}.k1, ${tableA}.content MATCH_ANY 
'hello' as m
+        FROM ${tableA} FULL OUTER JOIN ${tableB} ON ${tableA}.k1 = ${tableB}.k1
+    """
+    def explainStr = explainResult.collect { it.toString() }.join("\n")
+    // The SlotDescriptor for the virtual column should show: 
virtualColumn=content... MATCH_ANY 'hello'
+    assertTrue(explainStr.contains("MATCH_ANY"), "EXPLAIN should contain 
MATCH_ANY")
+    assertTrue(explainStr.contains("__DORIS_VIRTUAL_COL__"),
+            "EXPLAIN should show virtual column slot for MATCH projection")
+
+    // 
=========================================================================
+    // Extended tests: edge cases and graceful degradation
+    // 
=========================================================================
+
+    // Test 9: MATCH projection on table WITHOUT inverted index (graceful 
degradation via slow path)
+    def tableNoIdx = "test_match_proj_no_idx"
+    sql "DROP TABLE IF EXISTS ${tableNoIdx}"
+    sql """
+        CREATE TABLE ${tableNoIdx} (
+            k1 INT,
+            content TEXT
+        ) ENGINE=OLAP
+        DUPLICATE KEY(k1)
+        DISTRIBUTED BY HASH(k1) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        )
+    """
+    sql """ INSERT INTO ${tableNoIdx} VALUES
+        (1, 'hello world'),
+        (2, 'foo bar baz'),
+        (3, 'hello doris')
+    """
+    order_qt_match_proj_no_index """
+        SELECT k1, content MATCH_ANY 'hello' as m FROM ${tableNoIdx} ORDER BY 
k1
+    """
+    sql "DROP TABLE IF EXISTS ${tableNoIdx}"
+
+    // Test 10: UNIQUE_KEYS table (MOW) — rule SHOULD fire (MOW is supported).
+    // Verify MATCH projection works correctly on MOW UNIQUE table.
+    def tableUniq = "test_match_proj_uniq"
+    sql "DROP TABLE IF EXISTS ${tableUniq}"
+    sql """
+        CREATE TABLE ${tableUniq} (
+            k1 INT,
+            content VARCHAR(200),
+            INDEX idx_content (content) USING INVERTED PROPERTIES("parser" = 
"english")
+        ) ENGINE=OLAP
+        UNIQUE KEY(k1)
+        DISTRIBUTED BY HASH(k1) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "enable_unique_key_merge_on_write" = "true"
+        )
+    """
+    sql """ INSERT INTO ${tableUniq} VALUES
+        (1, 'hello world'),
+        (2, 'foo bar'),
+        (3, 'hello doris')
+    """
+    order_qt_match_proj_unique_mow """
+        SELECT k1, content MATCH_ANY 'hello' as m FROM ${tableUniq} ORDER BY k1
+    """
+    sql "DROP TABLE IF EXISTS ${tableUniq}"
+
+    // Test 12: Compound MATCH expression — (MATCH AND MATCH) is not a bare 
Match,
+    // so unwrapMatch returns null and it won't be pushed as virtual column.
+    // Verify correct results via slow path expression evaluation.
+    order_qt_match_proj_compound """
+        SELECT k1,
+               (content MATCH_ANY 'hello') AND (content MATCH_ANY 'world') as m
+        FROM ${tableA} ORDER BY k1
+    """
+
+    // Test 13: MATCH projection coexisting with regular filter on same table
+    // This exercises the Project -> Filter -> OlapScan pattern.
+    order_qt_match_proj_with_direct_filter """
+        SELECT k1, content MATCH_ANY 'hello' as m
+        FROM ${tableA}
+        WHERE k1 BETWEEN 2 AND 4
+        ORDER BY k1
+    """
+
+    sql "DROP TABLE IF EXISTS ${tableA}"
+    sql "DROP TABLE IF EXISTS ${tableB}"
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to