This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new cd0d86edc2a [refactor](predicate) remove 'can_do_apply_safe' on column 
predicate (#55465)
cd0d86edc2a is described below

commit cd0d86edc2a28cd798e7947211a90182d3da4334
Author: Sun Chenyang <[email protected]>
AuthorDate: Wed Sep 3 09:05:49 2025 +0800

    [refactor](predicate) remove 'can_do_apply_safe' on column predicate 
(#55465)
    
    remove `can_do_apply_safe` on column predicate
---
 be/src/olap/accept_null_predicate.h                |   4 ----
 be/src/olap/bitmap_filter_predicate.h              |   4 ----
 be/src/olap/block_column_predicate.h               |  17 ---------------
 be/src/olap/bloom_filter_predicate.h               |   4 ----
 be/src/olap/column_predicate.h                     |   5 -----
 be/src/olap/comparison_predicate.h                 |   4 ----
 be/src/olap/in_list_predicate.h                    |   4 ----
 be/src/olap/like_column_predicate.h                |   4 ----
 be/src/olap/null_predicate.h                       |   5 -----
 be/src/olap/rowset/segment_v2/segment.cpp          |  11 +++++-----
 be/src/olap/rowset/segment_v2/segment.h            |  23 +++++++++------------
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  23 ++++++++++++---------
 be/src/olap/shared_predicate.h                     |   8 -------
 regression-test/data/variant_p0/cast.out           | Bin 247 -> 248 bytes
 regression-test/suites/variant_p0/cast.groovy      |  19 ++++++++++++++++-
 15 files changed, 47 insertions(+), 88 deletions(-)

diff --git a/be/src/olap/accept_null_predicate.h 
b/be/src/olap/accept_null_predicate.h
index e3296c6fcc6..ec0f199e496 100644
--- a/be/src/olap/accept_null_predicate.h
+++ b/be/src/olap/accept_null_predicate.h
@@ -56,10 +56,6 @@ public:
         return _nested->evaluate(name_with_type, iterator, num_rows, bitmap);
     }
 
-    bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const 
override {
-        return _nested->can_do_apply_safely(input_type, is_null);
-    }
-
     void evaluate_and(const vectorized::IColumn& column, const uint16_t* sel, 
uint16_t size,
                       bool* flags) const override {
         if (column.has_null()) {
diff --git a/be/src/olap/bitmap_filter_predicate.h 
b/be/src/olap/bitmap_filter_predicate.h
index 64923f6e768..9bf44354a98 100644
--- a/be/src/olap/bitmap_filter_predicate.h
+++ b/be/src/olap/bitmap_filter_predicate.h
@@ -41,10 +41,6 @@ public:
 
     PredicateType type() const override { return PredicateType::BITMAP_FILTER; 
}
 
-    bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const 
override {
-        return input_type == T || (is_string_type(input_type) && 
is_string_type(T));
-    }
-
     bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& 
statistic) const override {
         if (_specific_filter->is_not_in()) {
             return true;
diff --git a/be/src/olap/block_column_predicate.h 
b/be/src/olap/block_column_predicate.h
index b6ff115c34c..1c4507f1496 100644
--- a/be/src/olap/block_column_predicate.h
+++ b/be/src/olap/block_column_predicate.h
@@ -73,10 +73,6 @@ public:
     virtual void evaluate_vec(vectorized::MutableColumns& block, uint16_t 
size, bool* flags) const {
     }
 
-    virtual bool can_do_apply_safely(PrimitiveType input_type, bool is_null) 
const {
-        throw Exception(Status::FatalError("should not reach here"));
-    }
-
     virtual bool support_zonemap() const { return true; }
 
     virtual bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& 
statistic) const {
@@ -132,10 +128,6 @@ public:
         return _predicate->can_do_bloom_filter(ngram);
     }
 
-    bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const 
override {
-        return _predicate->can_do_apply_safely(input_type, is_null);
-    }
-
 private:
     const ColumnPredicate* _predicate = nullptr;
 };
@@ -220,15 +212,6 @@ public:
         return true;
     }
 
-    bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const 
override {
-        for (auto& pred : _block_column_predicate_vec) {
-            if (!pred->can_do_apply_safely(input_type, is_null)) {
-                return false;
-            }
-        }
-        return true;
-    }
-
     Status evaluate(const std::string& column_name, InvertedIndexIterator* 
iterator,
                     uint32_t num_rows, roaring::Roaring* bitmap) const 
override;
 };
diff --git a/be/src/olap/bloom_filter_predicate.h 
b/be/src/olap/bloom_filter_predicate.h
index 4fed6ac3ce0..a0e70e7ff5a 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -50,10 +50,6 @@ public:
         return Status::OK();
     }
 
-    bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const 
override {
-        return input_type == T || (is_string_type(input_type) && 
is_string_type(T));
-    }
-
     double get_ignore_threshold() const override { return 
get_bloom_filter_ignore_thredhold(); }
 
 private:
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 793fe3841bf..372d71e7a81 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -223,11 +223,6 @@ public:
 
     virtual bool can_do_bloom_filter(bool ngram) const { return false; }
 
-    // Check input type could apply safely.
-    // Note: Currenly ColumnPredicate is not include complex type, so use 
PrimitiveType
-    // is simple and intuitive
-    virtual bool can_do_apply_safely(PrimitiveType input_type, bool is_null) 
const = 0;
-
     // used to evaluate pre read column in lazy materialization
     // now only support integer/float
     // a vectorized eval way
diff --git a/be/src/olap/comparison_predicate.h 
b/be/src/olap/comparison_predicate.h
index 6501d4aa13d..78039134a2f 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -36,10 +36,6 @@ public:
     ComparisonPredicateBase(uint32_t column_id, const T& value, bool opposite 
= false)
             : ColumnPredicate(column_id, opposite), _value(value) {}
 
-    bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const 
override {
-        return input_type == Type || (is_string_type(input_type) && 
is_string_type(Type));
-    }
-
     PredicateType type() const override { return PT; }
 
     Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows,
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 78160438fbf..55a64e7acff 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -135,10 +135,6 @@ public:
 
     PredicateType type() const override { return PT; }
 
-    bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const 
override {
-        return input_type == Type || (is_string_type(input_type) && 
is_string_type(Type));
-    }
-
     Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows,
                     roaring::Roaring* result) const override {
         if (iterator == nullptr) {
diff --git a/be/src/olap/like_column_predicate.h 
b/be/src/olap/like_column_predicate.h
index 7f58118b0bd..37af7a14298 100644
--- a/be/src/olap/like_column_predicate.h
+++ b/be/src/olap/like_column_predicate.h
@@ -62,10 +62,6 @@ public:
         return Status::OK();
     }
 
-    bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const 
override {
-        return input_type == T || (is_string_type(input_type) && 
is_string_type(T));
-    }
-
     void evaluate_and_vec(const vectorized::IColumn& column, uint16_t size,
                           bool* flags) const override;
 
diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h
index d99b519b90b..ffd05af86a1 100644
--- a/be/src/olap/null_predicate.h
+++ b/be/src/olap/null_predicate.h
@@ -94,11 +94,6 @@ public:
 
     bool can_do_bloom_filter(bool ngram) const override { return _is_null && 
!ngram; }
 
-    bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const 
override {
-        // Always safe to apply is null predicate
-        return true;
-    }
-
     void evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool* 
flags) const override;
 
 private:
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp 
b/be/src/olap/rowset/segment_v2/segment.cpp
index ab4db0bc9c0..88452e5ad09 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -250,9 +250,9 @@ Status Segment::new_iterator(SchemaSPtr schema, const 
StorageReadOptions& read_o
             continue;
         }
         if (read_options.col_id_to_predicates.contains(column_id) &&
-            can_apply_predicate_safely(column_id,
-                                       
read_options.col_id_to_predicates.at(column_id).get(),
-                                       *schema, 
read_options.io_ctx.reader_type) &&
+            can_apply_predicate_safely(column_id, *schema,
+                                       
read_options.target_cast_type_for_variants,
+                                       read_options.io_ctx.reader_type) &&
             !reader->match_condition(entry.second.get())) {
             // any condition not satisfied, return.
             *iter = std::make_unique<EmptySegmentIterator>(*schema);
@@ -279,8 +279,9 @@ Status Segment::new_iterator(SchemaSPtr schema, const 
StorageReadOptions& read_o
             }
             RETURN_IF_ERROR(st);
             DCHECK(reader != nullptr);
-            if (can_apply_predicate_safely(runtime_predicate->column_id(), 
runtime_predicate.get(),
-                                           *schema, 
read_options.io_ctx.reader_type) &&
+            if (can_apply_predicate_safely(runtime_predicate->column_id(), 
*schema,
+                                           
read_options.target_cast_type_for_variants,
+                                           read_options.io_ctx.reader_type) &&
                 !reader->match_condition(&and_predicate)) {
                 // any condition not satisfied, return.
                 *iter = std::make_unique<EmptySegmentIterator>(*schema);
diff --git a/be/src/olap/rowset/segment_v2/segment.h 
b/be/src/olap/rowset/segment_v2/segment.h
index 370be8dd8c7..3f5418b1e38 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -180,26 +180,23 @@ public:
     bool same_with_storage_type(int32_t cid, const Schema& schema, bool 
read_flat_leaves);
 
     // If column in segment is the same type in schema, then it is safe to 
apply predicate
-    template <typename Predicate>
-    bool can_apply_predicate_safely(int cid, Predicate* pred, const Schema& 
schema,
-                                    ReaderType read_type) {
+    bool can_apply_predicate_safely(
+            int cid, const Schema& schema,
+            const std::map<std::string, vectorized::DataTypePtr>& 
target_cast_type_for_variants,
+            ReaderType read_type) {
         const doris::Field* col = schema.column(cid);
         vectorized::DataTypePtr storage_column_type =
                 get_data_type_of(col->get_desc(), read_type != 
ReaderType::READER_QUERY);
-        if (storage_column_type == nullptr) {
-            // Default column iterator
+        if (storage_column_type == nullptr || col->type() != 
FieldType::OLAP_FIELD_TYPE_VARIANT ||
+            !target_cast_type_for_variants.contains(col->name())) {
+            // Default column iterator or not variant column
             return true;
         }
-        PrimitiveType type = storage_column_type->get_primitive_type();
-        if (type == TYPE_VARIANT || is_complex_type(type)) {
-            // Predicate should nerver apply on variant/complex type
+        if 
(storage_column_type->equals(*target_cast_type_for_variants.at(col->name()))) {
+            return true;
+        } else {
             return false;
         }
-        bool safe = 
pred->can_do_apply_safely(storage_column_type->get_primitive_type(),
-                                              
storage_column_type->is_nullable());
-        // Currently only variant column can lead to unsafe
-        CHECK(safe || col->type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
-        return safe;
     }
 
     const TabletSchemaSPtr& tablet_schema() { return _tablet_schema; }
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 350425b0418..7911f417007 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -301,7 +301,8 @@ Status SegmentIterator::_init_impl(const 
StorageReadOptions& opts) {
     _col_predicates.clear();
 
     for (const auto& predicate : opts.column_predicates) {
-        if (!_segment->can_apply_predicate_safely(predicate->column_id(), 
predicate, *_schema,
+        if (!_segment->can_apply_predicate_safely(predicate->column_id(), 
*_schema,
+                                                  
_opts.target_cast_type_for_variants,
                                                   _opts.io_ctx.reader_type)) {
             continue;
         }
@@ -715,9 +716,9 @@ Status 
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
         if (_opts.io_ctx.reader_type == ReaderType::READER_QUERY) {
             RowRanges dict_row_ranges = RowRanges::create_single(num_rows());
             for (auto cid : cids) {
-                if (!_segment->can_apply_predicate_safely(cid,
-                                                          
_opts.col_id_to_predicates.at(cid).get(),
-                                                          *_schema, 
_opts.io_ctx.reader_type)) {
+                if (!_segment->can_apply_predicate_safely(cid, *_schema,
+                                                          
_opts.target_cast_type_for_variants,
+                                                          
_opts.io_ctx.reader_type)) {
                     continue;
                 }
                 DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
@@ -747,8 +748,9 @@ Status 
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
         RowRanges bf_row_ranges = RowRanges::create_single(num_rows());
         for (auto& cid : cids) {
             DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
-            if (!_segment->can_apply_predicate_safely(cid, 
_opts.col_id_to_predicates.at(cid).get(),
-                                                      *_schema, 
_opts.io_ctx.reader_type)) {
+            if (!_segment->can_apply_predicate_safely(cid, *_schema,
+                                                      
_opts.target_cast_type_for_variants,
+                                                      
_opts.io_ctx.reader_type)) {
                 continue;
             }
             // get row ranges by bf index of this column,
@@ -776,8 +778,9 @@ Status 
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
         // second filter data by zone map
         for (const auto& cid : cids) {
             DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
-            if (!_segment->can_apply_predicate_safely(cid, 
_opts.col_id_to_predicates.at(cid).get(),
-                                                      *_schema, 
_opts.io_ctx.reader_type)) {
+            if (!_segment->can_apply_predicate_safely(cid, *_schema,
+                                                      
_opts.target_cast_type_for_variants,
+                                                      
_opts.io_ctx.reader_type)) {
                 continue;
             }
             // do not check zonemap if predicate does not support zonemap
@@ -808,8 +811,8 @@ Status 
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
                 std::shared_ptr<doris::ColumnPredicate> runtime_predicate =
                         query_ctx->get_runtime_predicate(id).get_predicate(
                                 _opts.topn_filter_target_node_id);
-                if 
(_segment->can_apply_predicate_safely(runtime_predicate->column_id(),
-                                                         
runtime_predicate.get(), *_schema,
+                if 
(_segment->can_apply_predicate_safely(runtime_predicate->column_id(), *_schema,
+                                                         
_opts.target_cast_type_for_variants,
                                                          
_opts.io_ctx.reader_type)) {
                     AndBlockColumnPredicate and_predicate;
                     and_predicate.add_column_predicate(
diff --git a/be/src/olap/shared_predicate.h b/be/src/olap/shared_predicate.h
index 064aa72e582..902cff749ac 100644
--- a/be/src/olap/shared_predicate.h
+++ b/be/src/olap/shared_predicate.h
@@ -71,14 +71,6 @@ public:
         return _nested->evaluate(name_with_type, iterator, num_rows, bitmap);
     }
 
-    bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const 
override {
-        std::shared_lock<std::shared_mutex> lock(_mtx);
-        if (!_nested) {
-            return true;
-        }
-        return _nested->can_do_apply_safely(input_type, is_null);
-    }
-
     void evaluate_and(const vectorized::IColumn& column, const uint16_t* sel, 
uint16_t size,
                       bool* flags) const override {
         std::shared_lock<std::shared_mutex> lock(_mtx);
diff --git a/regression-test/data/variant_p0/cast.out 
b/regression-test/data/variant_p0/cast.out
index 7ceb039ea71..96f4297f4ee 100644
Binary files a/regression-test/data/variant_p0/cast.out and 
b/regression-test/data/variant_p0/cast.out differ
diff --git a/regression-test/suites/variant_p0/cast.groovy 
b/regression-test/suites/variant_p0/cast.groovy
index 434104000ca..e002a10352d 100644
--- a/regression-test/suites/variant_p0/cast.groovy
+++ b/regression-test/suites/variant_p0/cast.groovy
@@ -57,4 +57,21 @@ suite("test_variant_cast", "p0") {
     //qt_sql7 "select cast(var as json) from var_not_null_cast"
     sql """insert into var_not_null_cast values (1, '123')"""
     //qt_sql8 "select cast(var as json) from var_not_null_cast"
-}
+    sql """insert into var_not_null_cast values (1, '{"aaa" : "aaa"}')"""
+    qt_sql9 "select * from var_not_null_cast where cast(var['aaa'] as int) is 
null"
+
+    sql "DROP TABLE IF EXISTS var_cast_decimal"
+    sql """
+        CREATE TABLE `var_cast_decimal` (
+            `k` int NULL,
+            `var` variant<'aaa': decimal(10, 2), 
properties("variant_enable_typed_paths_to_sparse" = "false")> NULL
+        ) ENGINE=OLAP
+        UNIQUE KEY(`k`)
+        DISTRIBUTED BY HASH(k) BUCKETS 1
+        PROPERTIES (
+        "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+    sql """insert into var_cast_decimal values (1, '{"aaa" : 1.23}')"""
+    qt_sql10 "select * from var_cast_decimal where cast(var['aaa'] as 
decimal(10, 1)) = 1.2"
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to