This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new b02fdd6f996 branch4.0 [Fix](Variant) predicate should be pushed down 
when conjunct contains CAST (#60485)
b02fdd6f996 is described below

commit b02fdd6f996df9cfad3773acafe184e93c9c2044
Author: lihangyu <[email protected]>
AuthorDate: Thu Feb 5 09:30:32 2026 +0800

    branch4.0 [Fix](Variant) predicate should be pushed down when conjunct 
contains CAST (#60485)
    
    cherry-pick #60448
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp       | 16 ++++++++--------
 be/src/pipeline/exec/olap_scan_operator.cpp              |  3 ++-
 be/src/pipeline/exec/scan_operator.cpp                   | 16 ++++++++++++++--
 .../fault_injection_p0/test_variant_bloom_filter.groovy  |  2 +-
 .../test_variant_count_on_index_fault_injection.groovy   |  6 +++---
 .../predefine/test_types_with_indexes_profile.groovy     |  2 +-
 6 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index f7513d357a7..1c6d950a00d 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -673,6 +673,13 @@ Status 
SegmentIterator::_get_row_ranges_by_column_conditions() {
         _opts.stats->rows_conditions_filtered += (pre_size - 
_row_bitmap.cardinality());
     }
 
+    DBUG_EXECUTE_IF("bloom_filter_must_filter_data", {
+        if (_opts.stats->rows_bf_filtered == 0) {
+            return Status::Error<ErrorCode::INTERNAL_ERROR>(
+                    "Bloom filter did not filter the data.");
+        }
+    })
+
     // TODO(hkp): calculate filter rate to decide whether to
     // use zone map/bloom filter/secondary index or not.
     return Status::OK();
@@ -850,13 +857,6 @@ Status 
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
         pre_size = condition_row_ranges->count();
         RowRanges::ranges_intersection(*condition_row_ranges, bf_row_ranges, 
condition_row_ranges);
         _opts.stats->rows_bf_filtered += (pre_size - 
condition_row_ranges->count());
-
-        DBUG_EXECUTE_IF("bloom_filter_must_filter_data", {
-            if (pre_size - condition_row_ranges->count() == 0) {
-                return Status::Error<ErrorCode::INTERNAL_ERROR>(
-                        "Bloom filter did not filter the data.");
-            }
-        })
     }
 
     {
@@ -2792,7 +2792,7 @@ void 
SegmentIterator::_calculate_expr_in_remaining_conjunct_root() {
                         }
                     }
                 }
-                // Exmple: CAST(v['a'] AS VARCHAR) MATCH 'hello', do not add 
CAST expr to index tracking.
+                // Example: CAST(v['a'] AS VARCHAR) MATCH 'hello', do not add 
CAST expr to index tracking.
                 auto expr_without_cast = 
vectorized::VExpr::expr_without_cast(child);
                 if (expr_without_cast->is_slot_ref() && expr->op() != 
TExprOpcode::CAST) {
                     auto* column_slot_ref =
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp 
b/be/src/pipeline/exec/olap_scan_operator.cpp
index dbb2d7733c7..05a6ddbfd2c 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -92,7 +92,8 @@ PushDownType 
OlapScanLocalState::_should_push_down_binary_predicate(
     DCHECK(constant_val->data == nullptr) << "constant_val should not have a 
value";
     const auto& children = fn_call->children();
     DCHECK(children.size() == 2);
-    DCHECK_EQ(children[0]->node_type(), TExprNodeType::SLOT_REF);
+    DCHECK_EQ(vectorized::VExpr::expr_without_cast(children[0])->node_type(),
+              TExprNodeType::SLOT_REF);
     if (children[1]->is_constant()) {
         std::shared_ptr<ColumnPtrWrapper> const_col_wrapper;
         THROW_IF_ERROR(children[1]->get_const_col(expr_ctx, 
&const_col_wrapper));
diff --git a/be/src/pipeline/exec/scan_operator.cpp 
b/be/src/pipeline/exec/scan_operator.cpp
index 143fb9cab8b..6a2d36819d7 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -560,16 +560,28 @@ Status 
ScanLocalState<Derived>::_normalize_function_filters(vectorized::VExprCon
     return Status::OK();
 }
 
+// only one level cast expr could push down for variant type
+// check if expr is cast and it's children is slot
+static bool is_valid_push_down_cast(const vectorized::VExprSPtrs& children) {
+    auto slot_expr = vectorized::VExpr::expr_without_cast(children[0]);
+    return slot_expr->data_type()->get_primitive_type() == 
PrimitiveType::TYPE_VARIANT &&
+           children[0]->node_type() == TExprNodeType::CAST_EXPR &&
+           children[0]->children().at(0)->is_slot_ref();
+}
+
 template <typename Derived>
 bool ScanLocalState<Derived>::_is_predicate_acting_on_slot(const 
vectorized::VExprSPtrs& children,
                                                            SlotDescriptor** 
slot_desc,
                                                            
ColumnValueRangeType** range) {
-    if (children.empty() || children[0]->node_type() != 
TExprNodeType::SLOT_REF) {
+    // children[0] must be slot ref or cast(slot(variant) as type)
+    if (children.empty() || (children[0]->node_type() != 
TExprNodeType::SLOT_REF &&
+                             !is_valid_push_down_cast(children))) {
         // not a slot ref(column)
         return false;
     }
     std::shared_ptr<vectorized::VSlotRef> slot_ref =
-            std::dynamic_pointer_cast<vectorized::VSlotRef>(children[0]);
+            std::dynamic_pointer_cast<vectorized::VSlotRef>(
+                    vectorized::VExpr::expr_without_cast(children[0]));
     *slot_desc =
             _parent->cast<typename 
Derived::Parent>()._slot_id_to_slot_desc[slot_ref->slot_id()];
     auto entry = _slot_id_to_predicates.find(slot_ref->slot_id());
diff --git 
a/regression-test/suites/fault_injection_p0/test_variant_bloom_filter.groovy 
b/regression-test/suites/fault_injection_p0/test_variant_bloom_filter.groovy
index fa20bbbe072..16ecba66a28 100644
--- a/regression-test/suites/fault_injection_p0/test_variant_bloom_filter.groovy
+++ b/regression-test/suites/fault_injection_p0/test_variant_bloom_filter.groovy
@@ -55,7 +55,7 @@ suite("test_variant_bloom_filter", "nonConcurrent") {
     int seed = Math.floor(Math.random() * 7) 
     def var_def = "variant"
     if (seed % 2 == 0) {
-        var_def = "variant<'repo.id' : int, 'repo.name' : string, 'repo.url' : 
string, 'repo.description' : string, 'repo.created_at' : string>"
+        var_def = "variant<'repo.id' : bigint, 'repo.name' : string, 
'repo.url' : string, 'repo.description' : string, 'repo.created_at' : string>"
     } else {
         var_def = "variant<properties(\"variant_max_subcolumns_count\" = 
\"100\")>"
     }
diff --git 
a/regression-test/suites/fault_injection_p0/test_variant_count_on_index_fault_injection.groovy
 
b/regression-test/suites/fault_injection_p0/test_variant_count_on_index_fault_injection.groovy
index 81674214b31..53bb872f707 100644
--- 
a/regression-test/suites/fault_injection_p0/test_variant_count_on_index_fault_injection.groovy
+++ 
b/regression-test/suites/fault_injection_p0/test_variant_count_on_index_fault_injection.groovy
@@ -28,6 +28,7 @@ suite("test_variant_count_on_index_fault_injection", "p0, 
nonConcurrent") {
     sql "set enable_match_without_inverted_index = false"
     sql "set experimental_enable_nereids_planner = true"
     sql "set enable_fallback_to_original_planner = false"
+    sql "set inverted_index_skip_threshold = 0"
 
     sql """
         CREATE TABLE ${tbl} (
@@ -148,9 +149,8 @@ suite("test_variant_count_on_index_fault_injection", "p0, 
nonConcurrent") {
         def dp2 = sql "select count(v['b']) from ${tbl} where v['a'] match 
'hello' and v['b'] match 'world'"
         assertEquals(3, toInt(dp2[0][0]))
 
-        // TODO: FIXME
-        // def dpn1 = sql "select count() from ${tbl} where cast(v['c'] as 
bigint) = 1"
-        // assertEquals(3, toInt(dpn1[0][0]))
+        def dpn1 = sql "select count() from ${tbl} where cast(v['c'] as 
bigint) = 1"
+        assertEquals(3, toInt(dpn1[0][0]))
     } finally {
         
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
     }
diff --git 
a/regression-test/suites/variant_p0/predefine/test_types_with_indexes_profile.groovy
 
b/regression-test/suites/variant_p0/predefine/test_types_with_indexes_profile.groovy
index 236ce0d56bd..2b49366b55c 100644
--- 
a/regression-test/suites/variant_p0/predefine/test_types_with_indexes_profile.groovy
+++ 
b/regression-test/suites/variant_p0/predefine/test_types_with_indexes_profile.groovy
@@ -196,7 +196,7 @@ suite("test_variant_predefine_types_with_indexes_profile", 
"p0,nonConcurrent"){
     }
     // accurateCheckIndexWithQueries()
     // sql "insert into test_variant_predefine_types_with_indexes_profile 
select * from test_variant_predefine_types_with_indexes_profile"
-    queryAndCheckWithBloomFilter("select count() from 
test_variant_predefine_types_with_indexes_profile where 
array_contains(cast(var['array_decimal_1'] as array<decimalv3 (26,9)>), 
12345678901234567.123456789)")
+    // queryAndCheckWithBloomFilter("select count() from 
test_variant_predefine_types_with_indexes_profile where 
array_contains(cast(var['array_decimal_1'] as array<decimalv3 (26,9)>), 
12345678901234567.123456789)")
 
     queryAndCheckWithBloomFilter("select count() from 
test_variant_predefine_types_with_indexes_profile where cast(var['int_1'] as 
int) = 42")
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to