This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 3103bb08dcd [pick](Variant) casting to decimal type may lost precision 
 (#39843)
3103bb08dcd is described below

commit 3103bb08dcde269b5388a98fea3ea41fcd7f2890
Author: lihangyu <[email protected]>
AuthorDate: Fri Aug 23 22:47:32 2024 +0800

    [pick](Variant) casting to decimal type may lost precision  (#39843)
    
    #39650
---
 be/src/olap/iterators.h                            |  2 +-
 be/src/olap/rowset/rowset_reader_context.h         |  2 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  2 +-
 be/src/olap/tablet_reader.cpp                      |  3 +-
 be/src/olap/tablet_reader.h                        |  2 +-
 be/src/pipeline/exec/scan_operator.cpp             | 38 +++++++++++-----------
 be/src/pipeline/exec/scan_operator.h               |  5 +--
 be/src/vec/exec/scan/new_olap_scan_node.cpp        |  4 +--
 be/src/vec/exec/scan/new_olap_scan_node.h          |  2 +-
 be/src/vec/exec/scan/vscan_node.cpp                | 16 ++++-----
 be/src/vec/exec/scan/vscan_node.h                  |  3 +-
 .../data/variant_p0/sql/implicit_cast.out          | 12 +++++++
 .../suites/variant_p0/sql/implicit_cast.sql        |  4 ++-
 13 files changed, 56 insertions(+), 39 deletions(-)

diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index 5d752a2bf73..b29c63a2c9a 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -118,7 +118,7 @@ public:
     Version version;
     int64_t tablet_id = 0;
     // slots that cast may be eliminated in storage layer
-    std::map<std::string, PrimitiveType> target_cast_type_for_variants;
+    std::map<std::string, TypeDescriptor> target_cast_type_for_variants;
     RowRanges row_ranges;
     size_t topn_limit = 0;
 };
diff --git a/be/src/olap/rowset/rowset_reader_context.h 
b/be/src/olap/rowset/rowset_reader_context.h
index 6029196c9bb..8cc7a281c88 100644
--- a/be/src/olap/rowset/rowset_reader_context.h
+++ b/be/src/olap/rowset/rowset_reader_context.h
@@ -82,7 +82,7 @@ struct RowsetReaderContext {
     const std::set<int32_t>* output_columns = nullptr;
     RowsetId rowset_id;
     // slots that cast may be eliminated in storage layer
-    std::map<std::string, PrimitiveType> target_cast_type_for_variants;
+    std::map<std::string, TypeDescriptor> target_cast_type_for_variants;
     size_t topn_limit = 0;
 };
 
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 8d647aab39f..c9eec10e0c4 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1816,7 +1816,7 @@ bool 
SegmentIterator::_can_evaluated_by_vectorized(ColumnPredicate* predicate) {
     if (field_type == FieldType::OLAP_FIELD_TYPE_VARIANT) {
         // Use variant cast dst type
         field_type = TabletColumn::get_field_type_by_type(
-                
_opts.target_cast_type_for_variants[_schema->column(cid)->name()]);
+                
_opts.target_cast_type_for_variants[_schema->column(cid)->name()].type);
     }
     switch (predicate->type()) {
     case PredicateType::EQ:
diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp
index 5af4c0d4fcc..6b5d12118f7 100644
--- a/be/src/olap/tablet_reader.cpp
+++ b/be/src/olap/tablet_reader.cpp
@@ -272,7 +272,8 @@ TabletColumn TabletReader::materialize_column(const 
TabletColumn& orig) {
     }
     TabletColumn column_with_cast_type = orig;
     auto cast_type = 
_reader_context.target_cast_type_for_variants.at(orig.name());
-    
column_with_cast_type.set_type(TabletColumn::get_field_type_by_type(cast_type));
+    FieldType filed_type = 
TabletColumn::get_field_type_by_type(cast_type.type);
+    column_with_cast_type.set_type(filed_type);
     return column_with_cast_type;
 }
 
diff --git a/be/src/olap/tablet_reader.h b/be/src/olap/tablet_reader.h
index 942c61f8207..8b99a8f886c 100644
--- a/be/src/olap/tablet_reader.h
+++ b/be/src/olap/tablet_reader.h
@@ -136,7 +136,7 @@ public:
         std::vector<FunctionFilter> function_filters;
         std::vector<RowsetMetaSharedPtr> delete_predicates;
         // slots that cast may be eliminated in storage layer
-        std::map<std::string, PrimitiveType> target_cast_type_for_variants;
+        std::map<std::string, TypeDescriptor> target_cast_type_for_variants;
 
         std::vector<RowSetSplits> rs_splits;
         // For unique key table with merge-on-write
diff --git a/be/src/pipeline/exec/scan_operator.cpp 
b/be/src/pipeline/exec/scan_operator.cpp
index feb10a618da..d17a3aa701b 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -28,6 +28,7 @@
 #include "pipeline/exec/meta_scan_operator.h"
 #include "pipeline/exec/olap_scan_operator.h"
 #include "pipeline/exec/operator.h"
+#include "runtime/types.h"
 #include "util/runtime_profile.h"
 #include "vec/exec/runtime_filter_consumer.h"
 #include "vec/exec/scan/pip_scanner_context.h"
@@ -168,14 +169,14 @@ Status ScanLocalState<Derived>::_normalize_conjuncts() {
     // The conjuncts is always on output tuple, so use _output_tuple_desc;
     std::vector<SlotDescriptor*> slots = p._output_tuple_desc->slots();
 
-    auto init_value_range = [&](SlotDescriptor* slot, PrimitiveType type) {
-        switch (type) {
-#define M(NAME)                                                                
          \
-    case TYPE_##NAME: {                                                        
          \
-        ColumnValueRange<TYPE_##NAME> range(slot->col_name(), 
slot->is_nullable(),       \
-                                            slot->type().precision, 
slot->type().scale); \
-        _slot_id_to_value_range[slot->id()] = std::pair {slot, range};         
          \
-        break;                                                                 
          \
+    auto init_value_range = [&](SlotDescriptor* slot, const TypeDescriptor& 
type_desc) {
+        switch (type_desc.type) {
+#define M(NAME)                                                                
    \
+    case TYPE_##NAME: {                                                        
    \
+        ColumnValueRange<TYPE_##NAME> range(slot->col_name(), 
slot->is_nullable(), \
+                                            type_desc.precision, 
type_desc.scale); \
+        _slot_id_to_value_range[slot->id()] = std::pair {slot, range};         
    \
+        break;                                                                 
    \
     }
 #define APPLY_FOR_PRIMITIVE_TYPE(M) \
     M(TINYINT)                      \
@@ -219,7 +220,7 @@ Status ScanLocalState<Derived>::_normalize_conjuncts() {
                 continue;
             }
         }
-        init_value_range(slots[slot_idx], slots[slot_idx]->type().type);
+        init_value_range(slots[slot_idx], slots[slot_idx]->type());
     }
 
     get_cast_types_for_variants();
@@ -631,7 +632,7 @@ Status 
ScanLocalState<Derived>::_normalize_in_and_eq_predicate(
         vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx, 
SlotDescriptor* slot,
         ColumnValueRange<T>& range, vectorized::VScanNode::PushDownType* pdt) {
     auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
-            slot->is_nullable(), slot->type().precision, slot->type().scale);
+            slot->is_nullable(), range.precision(), range.scale());
     // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
     if (TExprNodeType::IN_PRED == expr->node_type()) {
         HybridSetBase::IteratorBase* iter = nullptr;
@@ -787,7 +788,7 @@ Status 
ScanLocalState<Derived>::_normalize_not_in_and_not_eq_predicate(
         ColumnValueRange<T>& range, vectorized::VScanNode::PushDownType* pdt) {
     bool is_fixed_range = range.is_fixed_value_range();
     auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(
-            range.column_name(), slot->is_nullable(), slot->type().precision, 
slot->type().scale);
+            range.column_name(), slot->is_nullable(), range.precision(), 
range.scale());
     vectorized::VScanNode::PushDownType temp_pdt =
             vectorized::VScanNode::PushDownType::UNACCEPTABLE;
     // 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
@@ -969,14 +970,14 @@ Status 
ScanLocalState<Derived>::_normalize_is_null_predicate(
         if 
(reinterpret_cast<vectorized::VectorizedFnCall*>(expr)->fn().name.function_name 
==
             "is_null_pred") {
             auto temp_range = 
ColumnValueRange<T>::create_empty_column_value_range(
-                    slot->is_nullable(), slot->type().precision, 
slot->type().scale);
+                    slot->is_nullable(), range.precision(), range.scale());
             temp_range.set_contain_null(true);
             range.intersection(temp_range);
             *pdt = temp_pdt;
         } else if 
(reinterpret_cast<vectorized::VectorizedFnCall*>(expr)->fn().name.function_name 
==
                    "is_not_null_pred") {
             auto temp_range = 
ColumnValueRange<T>::create_empty_column_value_range(
-                    slot->is_nullable(), slot->type().precision, 
slot->type().scale);
+                    slot->is_nullable(), range.precision(), range.scale());
             temp_range.set_contain_null(false);
             range.intersection(temp_range);
             *pdt = temp_pdt;
@@ -1216,7 +1217,7 @@ Status 
ScanLocalState<Derived>::_normalize_match_predicate(
 
         // create empty range as temp range, temp range should do intersection 
on range
         auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
-                slot->is_nullable(), slot->type().precision, 
slot->type().scale);
+                slot->is_nullable(), range.precision(), range.scale());
         // Normalize match conjuncts like 'where col match value'
 
         auto match_checker = [](const std::string& fn_name) { return 
is_match_condition(fn_name); };
@@ -1361,7 +1362,7 @@ Status ScanLocalState<Derived>::_init_profile() {
 template <typename Derived>
 void ScanLocalState<Derived>::_filter_and_collect_cast_type_for_variant(
         const vectorized::VExpr* expr,
-        phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>& 
colname_to_cast_types) {
+        std::unordered_map<std::string, std::vector<TypeDescriptor>>& 
colname_to_cast_types) {
     const auto* cast_expr = dynamic_cast<const vectorized::VCastExpr*>(expr);
     if (cast_expr != nullptr) {
         const auto* src_slot =
@@ -1373,10 +1374,9 @@ void 
ScanLocalState<Derived>::_filter_and_collect_cast_type_for_variant(
         }
         std::vector<SlotDescriptor*> slots = output_tuple_desc()->slots();
         SlotDescriptor* src_slot_desc = 
_slot_id_to_slot_desc[src_slot->slot_id()];
-        PrimitiveType cast_dst_type =
-                
cast_expr->get_target_type()->get_type_as_type_descriptor().type;
+        TypeDescriptor type_desc = 
cast_expr->get_target_type()->get_type_as_type_descriptor();
         if (src_slot_desc->type().is_variant_type()) {
-            
colname_to_cast_types[src_slot_desc->col_name()].push_back(cast_dst_type);
+            
colname_to_cast_types[src_slot_desc->col_name()].push_back(type_desc);
         }
     }
     for (const auto& child : expr->children()) {
@@ -1386,7 +1386,7 @@ void 
ScanLocalState<Derived>::_filter_and_collect_cast_type_for_variant(
 
 template <typename Derived>
 void ScanLocalState<Derived>::get_cast_types_for_variants() {
-    phmap::flat_hash_map<std::string, std::vector<PrimitiveType>> 
colname_to_cast_types;
+    std::unordered_map<std::string, std::vector<TypeDescriptor>> 
colname_to_cast_types;
     for (auto it = _conjuncts.begin(); it != _conjuncts.end();) {
         auto& conjunct = *it;
         if (conjunct->root()) {
diff --git a/be/src/pipeline/exec/scan_operator.h 
b/be/src/pipeline/exec/scan_operator.h
index 4cdebeedc83..100b12d0a76 100644
--- a/be/src/pipeline/exec/scan_operator.h
+++ b/be/src/pipeline/exec/scan_operator.h
@@ -27,6 +27,7 @@
 #include "pipeline/pipeline_x/dependency.h"
 #include "pipeline/pipeline_x/operator.h"
 #include "runtime/descriptors.h"
+#include "runtime/types.h"
 #include "vec/exec/scan/vscan_node.h"
 
 namespace doris {
@@ -329,7 +330,7 @@ protected:
     void get_cast_types_for_variants();
     void _filter_and_collect_cast_type_for_variant(
             const vectorized::VExpr* expr,
-            phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>& 
colname_to_cast_types);
+            std::unordered_map<std::string, std::vector<TypeDescriptor>>& 
colname_to_cast_types);
 
     // Every time vconjunct_ctx_ptr is updated, the old ctx will be stored in 
this vector
     // so that it will be destroyed uniformly at the end of the query.
@@ -344,7 +345,7 @@ protected:
     std::vector<FunctionFilter> _push_down_functions;
 
     // colname -> cast dst type
-    std::map<std::string, PrimitiveType> _cast_types_for_variants;
+    std::map<std::string, TypeDescriptor> _cast_types_for_variants;
 
     // slot id -> SlotDescriptor
     phmap::flat_hash_map<int, SlotDescriptor*> _slot_id_to_slot_desc;
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp 
b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index 63b0067dd1c..c6e4363db4d 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -423,7 +423,7 @@ std::string NewOlapScanNode::get_name() {
 
 void NewOlapScanNode::_filter_and_collect_cast_type_for_variant(
         const VExpr* expr,
-        phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>& 
colname_to_cast_types) {
+        phmap::flat_hash_map<std::string, std::vector<TypeDescriptor>>& 
colname_to_cast_types) {
     auto* cast_expr = dynamic_cast<const VCastExpr*>(expr);
     if (cast_expr != nullptr) {
         auto* src_slot = cast_expr->get_child(0)->node_type() == 
TExprNodeType::SLOT_REF
@@ -446,7 +446,7 @@ void 
NewOlapScanNode::_filter_and_collect_cast_type_for_variant(
 }
 
 void NewOlapScanNode::get_cast_types_for_variants() {
-    phmap::flat_hash_map<std::string, std::vector<PrimitiveType>> 
colname_to_cast_types;
+    phmap::flat_hash_map<std::string, std::vector<TypeDescriptor>> 
colname_to_cast_types;
     for (auto it = _conjuncts.begin(); it != _conjuncts.end();) {
         auto& conjunct = *it;
         if (conjunct->root()) {
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h 
b/be/src/vec/exec/scan/new_olap_scan_node.h
index 4ee2b77216f..15b15ead164 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.h
+++ b/be/src/vec/exec/scan/new_olap_scan_node.h
@@ -106,7 +106,7 @@ protected:
     void get_cast_types_for_variants() override;
     void _filter_and_collect_cast_type_for_variant(
             const VExpr* expr,
-            phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>& 
colname_to_cast_types);
+            phmap::flat_hash_map<std::string, std::vector<TypeDescriptor>>& 
colname_to_cast_types);
 
 private:
     Status _build_key_ranges_and_filters();
diff --git a/be/src/vec/exec/scan/vscan_node.cpp 
b/be/src/vec/exec/scan/vscan_node.cpp
index 5a64287ce24..258e225a26e 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -342,14 +342,14 @@ Status VScanNode::_normalize_conjuncts() {
     // The conjuncts is always on output tuple, so use _output_tuple_desc;
     std::vector<SlotDescriptor*> slots = _output_tuple_desc->slots();
 
-    auto init_value_range = [&](SlotDescriptor* slot, PrimitiveType type) {
-        switch (type) {
-#define M(NAME)                                                                
          \
-    case TYPE_##NAME: {                                                        
          \
-        ColumnValueRange<TYPE_##NAME> range(slot->col_name(), 
slot->is_nullable(),       \
-                                            slot->type().precision, 
slot->type().scale); \
-        _slot_id_to_value_range[slot->id()] = std::pair {slot, range};         
          \
-        break;                                                                 
          \
+    auto init_value_range = [&](SlotDescriptor* slot, TypeDescriptor type) {
+        switch (type.type) {
+#define M(NAME)                                                                
                    \
+    case TYPE_##NAME: {                                                        
                    \
+        ColumnValueRange<TYPE_##NAME> range(slot->col_name(), 
slot->is_nullable(), type.precision, \
+                                            type.scale);                       
                    \
+        _slot_id_to_value_range[slot->id()] = std::pair {slot, range};         
                    \
+        break;                                                                 
                    \
     }
 #define APPLY_FOR_PRIMITIVE_TYPE(M) \
     M(TINYINT)                      \
diff --git a/be/src/vec/exec/scan/vscan_node.h 
b/be/src/vec/exec/scan/vscan_node.h
index 04bc738fda4..ddbaac13336 100644
--- a/be/src/vec/exec/scan/vscan_node.h
+++ b/be/src/vec/exec/scan/vscan_node.h
@@ -41,6 +41,7 @@
 #include "runtime/define_primitive_type.h"
 #include "runtime/query_context.h"
 #include "runtime/runtime_state.h"
+#include "runtime/types.h"
 #include "util/runtime_profile.h"
 #include "vec/exec/runtime_filter_consumer.h"
 #include "vec/exec/scan/scanner_context.h"
@@ -308,7 +309,7 @@ protected:
     std::vector<FunctionFilter> _push_down_functions;
 
     // colname -> cast dst type
-    std::map<std::string, PrimitiveType> _cast_types_for_variants;
+    std::map<std::string, TypeDescriptor> _cast_types_for_variants;
 
     // slot id -> ColumnValueRange
     // Parsed from conjuncts
diff --git a/regression-test/data/variant_p0/sql/implicit_cast.out 
b/regression-test/data/variant_p0/sql/implicit_cast.out
index b0f5d96087b..2eefddc43e5 100644
--- a/regression-test/data/variant_p0/sql/implicit_cast.out
+++ b/regression-test/data/variant_p0/sql/implicit_cast.out
@@ -78,3 +78,15 @@ user
 user
 user
 
+-- !implicit_cast_14 --
+14690746673
+14690746676
+14690746679
+14690746680
+14690746681
+14690746684
+14690746685
+14690746687
+14690746688
+14690746689
+
diff --git a/regression-test/suites/variant_p0/sql/implicit_cast.sql 
b/regression-test/suites/variant_p0/sql/implicit_cast.sql
index 0653a52eed7..f62b25ecfdc 100644
--- a/regression-test/suites/variant_p0/sql/implicit_cast.sql
+++ b/regression-test/suites/variant_p0/sql/implicit_cast.sql
@@ -12,4 +12,6 @@ SELECT v["payload"]["member"]["id"] FROM ghdata where 
v["payload"]["member"]["id
 select k, json_extract(v, '$.repo') from ghdata WHERE v["type"] = 'WatchEvent' 
 order by k limit 10;
 -- SELECT v["payload"]["member"]["id"], count() FROM ghdata where 
v["payload"]["member"]["id"] is not null group by v["payload"]["member"]["id"] 
order by 1, 2 desc LIMIT 10;
 select k, v["id"], v["type"], v["repo"]["name"] from ghdata WHERE v["type"] = 
'WatchEvent'  order by k limit 10;
-SELECT v["payload"]["pusher_type"] FROM ghdata where 
v["payload"]["pusher_type"] is not null ORDER BY k LIMIT 10;
\ No newline at end of file
+SELECT v["payload"]["pusher_type"] FROM ghdata where 
v["payload"]["pusher_type"] is not null ORDER BY k LIMIT 10;
+-- implicit cast to decimal type
+SELECT v["id"] FROM ghdata where v["id"] not in (7273, 10.118626, -69352) 
order by cast(v["id"] as decimal) limit 10;
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to