This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new cd0d86edc2a [refactor](predicate) remove 'can_do_apply_safe' on column
predicate (#55465)
cd0d86edc2a is described below
commit cd0d86edc2a28cd798e7947211a90182d3da4334
Author: Sun Chenyang <[email protected]>
AuthorDate: Wed Sep 3 09:05:49 2025 +0800
[refactor](predicate) remove 'can_do_apply_safe' on column predicate
(#55465)
remove `can_do_apply_safe` on column predicate
---
be/src/olap/accept_null_predicate.h | 4 ----
be/src/olap/bitmap_filter_predicate.h | 4 ----
be/src/olap/block_column_predicate.h | 17 ---------------
be/src/olap/bloom_filter_predicate.h | 4 ----
be/src/olap/column_predicate.h | 5 -----
be/src/olap/comparison_predicate.h | 4 ----
be/src/olap/in_list_predicate.h | 4 ----
be/src/olap/like_column_predicate.h | 4 ----
be/src/olap/null_predicate.h | 5 -----
be/src/olap/rowset/segment_v2/segment.cpp | 11 +++++-----
be/src/olap/rowset/segment_v2/segment.h | 23 +++++++++------------
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 23 ++++++++++++---------
be/src/olap/shared_predicate.h | 8 -------
regression-test/data/variant_p0/cast.out | Bin 247 -> 248 bytes
regression-test/suites/variant_p0/cast.groovy | 19 ++++++++++++++++-
15 files changed, 47 insertions(+), 88 deletions(-)
diff --git a/be/src/olap/accept_null_predicate.h
b/be/src/olap/accept_null_predicate.h
index e3296c6fcc6..ec0f199e496 100644
--- a/be/src/olap/accept_null_predicate.h
+++ b/be/src/olap/accept_null_predicate.h
@@ -56,10 +56,6 @@ public:
return _nested->evaluate(name_with_type, iterator, num_rows, bitmap);
}
- bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const
override {
- return _nested->can_do_apply_safely(input_type, is_null);
- }
-
void evaluate_and(const vectorized::IColumn& column, const uint16_t* sel,
uint16_t size,
bool* flags) const override {
if (column.has_null()) {
diff --git a/be/src/olap/bitmap_filter_predicate.h
b/be/src/olap/bitmap_filter_predicate.h
index 64923f6e768..9bf44354a98 100644
--- a/be/src/olap/bitmap_filter_predicate.h
+++ b/be/src/olap/bitmap_filter_predicate.h
@@ -41,10 +41,6 @@ public:
PredicateType type() const override { return PredicateType::BITMAP_FILTER;
}
- bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const
override {
- return input_type == T || (is_string_type(input_type) &&
is_string_type(T));
- }
-
bool evaluate_and(const std::pair<WrapperField*, WrapperField*>&
statistic) const override {
if (_specific_filter->is_not_in()) {
return true;
diff --git a/be/src/olap/block_column_predicate.h
b/be/src/olap/block_column_predicate.h
index b6ff115c34c..1c4507f1496 100644
--- a/be/src/olap/block_column_predicate.h
+++ b/be/src/olap/block_column_predicate.h
@@ -73,10 +73,6 @@ public:
virtual void evaluate_vec(vectorized::MutableColumns& block, uint16_t
size, bool* flags) const {
}
- virtual bool can_do_apply_safely(PrimitiveType input_type, bool is_null)
const {
- throw Exception(Status::FatalError("should not reach here"));
- }
-
virtual bool support_zonemap() const { return true; }
virtual bool evaluate_and(const std::pair<WrapperField*, WrapperField*>&
statistic) const {
@@ -132,10 +128,6 @@ public:
return _predicate->can_do_bloom_filter(ngram);
}
- bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const
override {
- return _predicate->can_do_apply_safely(input_type, is_null);
- }
-
private:
const ColumnPredicate* _predicate = nullptr;
};
@@ -220,15 +212,6 @@ public:
return true;
}
- bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const
override {
- for (auto& pred : _block_column_predicate_vec) {
- if (!pred->can_do_apply_safely(input_type, is_null)) {
- return false;
- }
- }
- return true;
- }
-
Status evaluate(const std::string& column_name, InvertedIndexIterator*
iterator,
uint32_t num_rows, roaring::Roaring* bitmap) const
override;
};
diff --git a/be/src/olap/bloom_filter_predicate.h
b/be/src/olap/bloom_filter_predicate.h
index 4fed6ac3ce0..a0e70e7ff5a 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -50,10 +50,6 @@ public:
return Status::OK();
}
- bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const
override {
- return input_type == T || (is_string_type(input_type) &&
is_string_type(T));
- }
-
double get_ignore_threshold() const override { return
get_bloom_filter_ignore_thredhold(); }
private:
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 793fe3841bf..372d71e7a81 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -223,11 +223,6 @@ public:
virtual bool can_do_bloom_filter(bool ngram) const { return false; }
- // Check input type could apply safely.
- // Note: Currenly ColumnPredicate is not include complex type, so use
PrimitiveType
- // is simple and intuitive
- virtual bool can_do_apply_safely(PrimitiveType input_type, bool is_null)
const = 0;
-
// used to evaluate pre read column in lazy materialization
// now only support integer/float
// a vectorized eval way
diff --git a/be/src/olap/comparison_predicate.h
b/be/src/olap/comparison_predicate.h
index 6501d4aa13d..78039134a2f 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -36,10 +36,6 @@ public:
ComparisonPredicateBase(uint32_t column_id, const T& value, bool opposite
= false)
: ColumnPredicate(column_id, opposite), _value(value) {}
- bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const
override {
- return input_type == Type || (is_string_type(input_type) &&
is_string_type(Type));
- }
-
PredicateType type() const override { return PT; }
Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows,
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 78160438fbf..55a64e7acff 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -135,10 +135,6 @@ public:
PredicateType type() const override { return PT; }
- bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const
override {
- return input_type == Type || (is_string_type(input_type) &&
is_string_type(Type));
- }
-
Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows,
roaring::Roaring* result) const override {
if (iterator == nullptr) {
diff --git a/be/src/olap/like_column_predicate.h
b/be/src/olap/like_column_predicate.h
index 7f58118b0bd..37af7a14298 100644
--- a/be/src/olap/like_column_predicate.h
+++ b/be/src/olap/like_column_predicate.h
@@ -62,10 +62,6 @@ public:
return Status::OK();
}
- bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const
override {
- return input_type == T || (is_string_type(input_type) &&
is_string_type(T));
- }
-
void evaluate_and_vec(const vectorized::IColumn& column, uint16_t size,
bool* flags) const override;
diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h
index d99b519b90b..ffd05af86a1 100644
--- a/be/src/olap/null_predicate.h
+++ b/be/src/olap/null_predicate.h
@@ -94,11 +94,6 @@ public:
bool can_do_bloom_filter(bool ngram) const override { return _is_null &&
!ngram; }
- bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const
override {
- // Always safe to apply is null predicate
- return true;
- }
-
void evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool*
flags) const override;
private:
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index ab4db0bc9c0..88452e5ad09 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -250,9 +250,9 @@ Status Segment::new_iterator(SchemaSPtr schema, const
StorageReadOptions& read_o
continue;
}
if (read_options.col_id_to_predicates.contains(column_id) &&
- can_apply_predicate_safely(column_id,
-
read_options.col_id_to_predicates.at(column_id).get(),
- *schema,
read_options.io_ctx.reader_type) &&
+ can_apply_predicate_safely(column_id, *schema,
+
read_options.target_cast_type_for_variants,
+ read_options.io_ctx.reader_type) &&
!reader->match_condition(entry.second.get())) {
// any condition not satisfied, return.
*iter = std::make_unique<EmptySegmentIterator>(*schema);
@@ -279,8 +279,9 @@ Status Segment::new_iterator(SchemaSPtr schema, const
StorageReadOptions& read_o
}
RETURN_IF_ERROR(st);
DCHECK(reader != nullptr);
- if (can_apply_predicate_safely(runtime_predicate->column_id(),
runtime_predicate.get(),
- *schema,
read_options.io_ctx.reader_type) &&
+ if (can_apply_predicate_safely(runtime_predicate->column_id(),
*schema,
+
read_options.target_cast_type_for_variants,
+ read_options.io_ctx.reader_type) &&
!reader->match_condition(&and_predicate)) {
// any condition not satisfied, return.
*iter = std::make_unique<EmptySegmentIterator>(*schema);
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index 370be8dd8c7..3f5418b1e38 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -180,26 +180,23 @@ public:
bool same_with_storage_type(int32_t cid, const Schema& schema, bool
read_flat_leaves);
// If column in segment is the same type in schema, then it is safe to
apply predicate
- template <typename Predicate>
- bool can_apply_predicate_safely(int cid, Predicate* pred, const Schema&
schema,
- ReaderType read_type) {
+ bool can_apply_predicate_safely(
+ int cid, const Schema& schema,
+ const std::map<std::string, vectorized::DataTypePtr>&
target_cast_type_for_variants,
+ ReaderType read_type) {
const doris::Field* col = schema.column(cid);
vectorized::DataTypePtr storage_column_type =
get_data_type_of(col->get_desc(), read_type !=
ReaderType::READER_QUERY);
- if (storage_column_type == nullptr) {
- // Default column iterator
+ if (storage_column_type == nullptr || col->type() !=
FieldType::OLAP_FIELD_TYPE_VARIANT ||
+ !target_cast_type_for_variants.contains(col->name())) {
+ // Default column iterator or not variant column
return true;
}
- PrimitiveType type = storage_column_type->get_primitive_type();
- if (type == TYPE_VARIANT || is_complex_type(type)) {
- // Predicate should nerver apply on variant/complex type
+ if
(storage_column_type->equals(*target_cast_type_for_variants.at(col->name()))) {
+ return true;
+ } else {
return false;
}
- bool safe =
pred->can_do_apply_safely(storage_column_type->get_primitive_type(),
-
storage_column_type->is_nullable());
- // Currently only variant column can lead to unsafe
- CHECK(safe || col->type() == FieldType::OLAP_FIELD_TYPE_VARIANT);
- return safe;
}
const TabletSchemaSPtr& tablet_schema() { return _tablet_schema; }
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 350425b0418..7911f417007 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -301,7 +301,8 @@ Status SegmentIterator::_init_impl(const
StorageReadOptions& opts) {
_col_predicates.clear();
for (const auto& predicate : opts.column_predicates) {
- if (!_segment->can_apply_predicate_safely(predicate->column_id(),
predicate, *_schema,
+ if (!_segment->can_apply_predicate_safely(predicate->column_id(),
*_schema,
+
_opts.target_cast_type_for_variants,
_opts.io_ctx.reader_type)) {
continue;
}
@@ -715,9 +716,9 @@ Status
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
if (_opts.io_ctx.reader_type == ReaderType::READER_QUERY) {
RowRanges dict_row_ranges = RowRanges::create_single(num_rows());
for (auto cid : cids) {
- if (!_segment->can_apply_predicate_safely(cid,
-
_opts.col_id_to_predicates.at(cid).get(),
- *_schema,
_opts.io_ctx.reader_type)) {
+ if (!_segment->can_apply_predicate_safely(cid, *_schema,
+
_opts.target_cast_type_for_variants,
+
_opts.io_ctx.reader_type)) {
continue;
}
DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
@@ -747,8 +748,9 @@ Status
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
RowRanges bf_row_ranges = RowRanges::create_single(num_rows());
for (auto& cid : cids) {
DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
- if (!_segment->can_apply_predicate_safely(cid,
_opts.col_id_to_predicates.at(cid).get(),
- *_schema,
_opts.io_ctx.reader_type)) {
+ if (!_segment->can_apply_predicate_safely(cid, *_schema,
+
_opts.target_cast_type_for_variants,
+
_opts.io_ctx.reader_type)) {
continue;
}
// get row ranges by bf index of this column,
@@ -776,8 +778,9 @@ Status
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
// second filter data by zone map
for (const auto& cid : cids) {
DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
- if (!_segment->can_apply_predicate_safely(cid,
_opts.col_id_to_predicates.at(cid).get(),
- *_schema,
_opts.io_ctx.reader_type)) {
+ if (!_segment->can_apply_predicate_safely(cid, *_schema,
+
_opts.target_cast_type_for_variants,
+
_opts.io_ctx.reader_type)) {
continue;
}
// do not check zonemap if predicate does not support zonemap
@@ -808,8 +811,8 @@ Status
SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
std::shared_ptr<doris::ColumnPredicate> runtime_predicate =
query_ctx->get_runtime_predicate(id).get_predicate(
_opts.topn_filter_target_node_id);
- if
(_segment->can_apply_predicate_safely(runtime_predicate->column_id(),
-
runtime_predicate.get(), *_schema,
+ if
(_segment->can_apply_predicate_safely(runtime_predicate->column_id(), *_schema,
+
_opts.target_cast_type_for_variants,
_opts.io_ctx.reader_type)) {
AndBlockColumnPredicate and_predicate;
and_predicate.add_column_predicate(
diff --git a/be/src/olap/shared_predicate.h b/be/src/olap/shared_predicate.h
index 064aa72e582..902cff749ac 100644
--- a/be/src/olap/shared_predicate.h
+++ b/be/src/olap/shared_predicate.h
@@ -71,14 +71,6 @@ public:
return _nested->evaluate(name_with_type, iterator, num_rows, bitmap);
}
- bool can_do_apply_safely(PrimitiveType input_type, bool is_null) const
override {
- std::shared_lock<std::shared_mutex> lock(_mtx);
- if (!_nested) {
- return true;
- }
- return _nested->can_do_apply_safely(input_type, is_null);
- }
-
void evaluate_and(const vectorized::IColumn& column, const uint16_t* sel,
uint16_t size,
bool* flags) const override {
std::shared_lock<std::shared_mutex> lock(_mtx);
diff --git a/regression-test/data/variant_p0/cast.out
b/regression-test/data/variant_p0/cast.out
index 7ceb039ea71..96f4297f4ee 100644
Binary files a/regression-test/data/variant_p0/cast.out and
b/regression-test/data/variant_p0/cast.out differ
diff --git a/regression-test/suites/variant_p0/cast.groovy
b/regression-test/suites/variant_p0/cast.groovy
index 434104000ca..e002a10352d 100644
--- a/regression-test/suites/variant_p0/cast.groovy
+++ b/regression-test/suites/variant_p0/cast.groovy
@@ -57,4 +57,21 @@ suite("test_variant_cast", "p0") {
//qt_sql7 "select cast(var as json) from var_not_null_cast"
sql """insert into var_not_null_cast values (1, '123')"""
//qt_sql8 "select cast(var as json) from var_not_null_cast"
-}
+ sql """insert into var_not_null_cast values (1, '{"aaa" : "aaa"}')"""
+ qt_sql9 "select * from var_not_null_cast where cast(var['aaa'] as int) is
null"
+
+ sql "DROP TABLE IF EXISTS var_cast_decimal"
+ sql """
+ CREATE TABLE `var_cast_decimal` (
+ `k` int NULL,
+ `var` variant<'aaa': decimal(10, 2),
properties("variant_enable_typed_paths_to_sparse" = "false")> NULL
+ ) ENGINE=OLAP
+ UNIQUE KEY(`k`)
+ DISTRIBUTED BY HASH(k) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ sql """insert into var_cast_decimal values (1, '{"aaa" : 1.23}')"""
+ qt_sql10 "select * from var_cast_decimal where cast(var['aaa'] as
decimal(10, 1)) = 1.2"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]