This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 3103bb08dcd [pick](Variant) casting to decimal type may lost precision
(#39843)
3103bb08dcd is described below
commit 3103bb08dcde269b5388a98fea3ea41fcd7f2890
Author: lihangyu <[email protected]>
AuthorDate: Fri Aug 23 22:47:32 2024 +0800
[pick](Variant) casting to decimal type may lost precision (#39843)
#39650
---
be/src/olap/iterators.h | 2 +-
be/src/olap/rowset/rowset_reader_context.h | 2 +-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 2 +-
be/src/olap/tablet_reader.cpp | 3 +-
be/src/olap/tablet_reader.h | 2 +-
be/src/pipeline/exec/scan_operator.cpp | 38 +++++++++++-----------
be/src/pipeline/exec/scan_operator.h | 5 +--
be/src/vec/exec/scan/new_olap_scan_node.cpp | 4 +--
be/src/vec/exec/scan/new_olap_scan_node.h | 2 +-
be/src/vec/exec/scan/vscan_node.cpp | 16 ++++-----
be/src/vec/exec/scan/vscan_node.h | 3 +-
.../data/variant_p0/sql/implicit_cast.out | 12 +++++++
.../suites/variant_p0/sql/implicit_cast.sql | 4 ++-
13 files changed, 56 insertions(+), 39 deletions(-)
diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index 5d752a2bf73..b29c63a2c9a 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -118,7 +118,7 @@ public:
Version version;
int64_t tablet_id = 0;
// slots that cast may be eliminated in storage layer
- std::map<std::string, PrimitiveType> target_cast_type_for_variants;
+ std::map<std::string, TypeDescriptor> target_cast_type_for_variants;
RowRanges row_ranges;
size_t topn_limit = 0;
};
diff --git a/be/src/olap/rowset/rowset_reader_context.h
b/be/src/olap/rowset/rowset_reader_context.h
index 6029196c9bb..8cc7a281c88 100644
--- a/be/src/olap/rowset/rowset_reader_context.h
+++ b/be/src/olap/rowset/rowset_reader_context.h
@@ -82,7 +82,7 @@ struct RowsetReaderContext {
const std::set<int32_t>* output_columns = nullptr;
RowsetId rowset_id;
// slots that cast may be eliminated in storage layer
- std::map<std::string, PrimitiveType> target_cast_type_for_variants;
+ std::map<std::string, TypeDescriptor> target_cast_type_for_variants;
size_t topn_limit = 0;
};
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 8d647aab39f..c9eec10e0c4 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1816,7 +1816,7 @@ bool
SegmentIterator::_can_evaluated_by_vectorized(ColumnPredicate* predicate) {
if (field_type == FieldType::OLAP_FIELD_TYPE_VARIANT) {
// Use variant cast dst type
field_type = TabletColumn::get_field_type_by_type(
-
_opts.target_cast_type_for_variants[_schema->column(cid)->name()]);
+
_opts.target_cast_type_for_variants[_schema->column(cid)->name()].type);
}
switch (predicate->type()) {
case PredicateType::EQ:
diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp
index 5af4c0d4fcc..6b5d12118f7 100644
--- a/be/src/olap/tablet_reader.cpp
+++ b/be/src/olap/tablet_reader.cpp
@@ -272,7 +272,8 @@ TabletColumn TabletReader::materialize_column(const
TabletColumn& orig) {
}
TabletColumn column_with_cast_type = orig;
auto cast_type =
_reader_context.target_cast_type_for_variants.at(orig.name());
-
column_with_cast_type.set_type(TabletColumn::get_field_type_by_type(cast_type));
+ FieldType filed_type =
TabletColumn::get_field_type_by_type(cast_type.type);
+ column_with_cast_type.set_type(filed_type);
return column_with_cast_type;
}
diff --git a/be/src/olap/tablet_reader.h b/be/src/olap/tablet_reader.h
index 942c61f8207..8b99a8f886c 100644
--- a/be/src/olap/tablet_reader.h
+++ b/be/src/olap/tablet_reader.h
@@ -136,7 +136,7 @@ public:
std::vector<FunctionFilter> function_filters;
std::vector<RowsetMetaSharedPtr> delete_predicates;
// slots that cast may be eliminated in storage layer
- std::map<std::string, PrimitiveType> target_cast_type_for_variants;
+ std::map<std::string, TypeDescriptor> target_cast_type_for_variants;
std::vector<RowSetSplits> rs_splits;
// For unique key table with merge-on-write
diff --git a/be/src/pipeline/exec/scan_operator.cpp
b/be/src/pipeline/exec/scan_operator.cpp
index feb10a618da..d17a3aa701b 100644
--- a/be/src/pipeline/exec/scan_operator.cpp
+++ b/be/src/pipeline/exec/scan_operator.cpp
@@ -28,6 +28,7 @@
#include "pipeline/exec/meta_scan_operator.h"
#include "pipeline/exec/olap_scan_operator.h"
#include "pipeline/exec/operator.h"
+#include "runtime/types.h"
#include "util/runtime_profile.h"
#include "vec/exec/runtime_filter_consumer.h"
#include "vec/exec/scan/pip_scanner_context.h"
@@ -168,14 +169,14 @@ Status ScanLocalState<Derived>::_normalize_conjuncts() {
// The conjuncts is always on output tuple, so use _output_tuple_desc;
std::vector<SlotDescriptor*> slots = p._output_tuple_desc->slots();
- auto init_value_range = [&](SlotDescriptor* slot, PrimitiveType type) {
- switch (type) {
-#define M(NAME)
\
- case TYPE_##NAME: {
\
- ColumnValueRange<TYPE_##NAME> range(slot->col_name(),
slot->is_nullable(), \
- slot->type().precision,
slot->type().scale); \
- _slot_id_to_value_range[slot->id()] = std::pair {slot, range};
\
- break;
\
+ auto init_value_range = [&](SlotDescriptor* slot, const TypeDescriptor&
type_desc) {
+ switch (type_desc.type) {
+#define M(NAME)
\
+ case TYPE_##NAME: {
\
+ ColumnValueRange<TYPE_##NAME> range(slot->col_name(),
slot->is_nullable(), \
+ type_desc.precision,
type_desc.scale); \
+ _slot_id_to_value_range[slot->id()] = std::pair {slot, range};
\
+ break;
\
}
#define APPLY_FOR_PRIMITIVE_TYPE(M) \
M(TINYINT) \
@@ -219,7 +220,7 @@ Status ScanLocalState<Derived>::_normalize_conjuncts() {
continue;
}
}
- init_value_range(slots[slot_idx], slots[slot_idx]->type().type);
+ init_value_range(slots[slot_idx], slots[slot_idx]->type());
}
get_cast_types_for_variants();
@@ -631,7 +632,7 @@ Status
ScanLocalState<Derived>::_normalize_in_and_eq_predicate(
vectorized::VExpr* expr, vectorized::VExprContext* expr_ctx,
SlotDescriptor* slot,
ColumnValueRange<T>& range, vectorized::VScanNode::PushDownType* pdt) {
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
- slot->is_nullable(), slot->type().precision, slot->type().scale);
+ slot->is_nullable(), range.precision(), range.scale());
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
if (TExprNodeType::IN_PRED == expr->node_type()) {
HybridSetBase::IteratorBase* iter = nullptr;
@@ -787,7 +788,7 @@ Status
ScanLocalState<Derived>::_normalize_not_in_and_not_eq_predicate(
ColumnValueRange<T>& range, vectorized::VScanNode::PushDownType* pdt) {
bool is_fixed_range = range.is_fixed_value_range();
auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(
- range.column_name(), slot->is_nullable(), slot->type().precision,
slot->type().scale);
+ range.column_name(), slot->is_nullable(), range.precision(),
range.scale());
vectorized::VScanNode::PushDownType temp_pdt =
vectorized::VScanNode::PushDownType::UNACCEPTABLE;
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
@@ -969,14 +970,14 @@ Status
ScanLocalState<Derived>::_normalize_is_null_predicate(
if
(reinterpret_cast<vectorized::VectorizedFnCall*>(expr)->fn().name.function_name
==
"is_null_pred") {
auto temp_range =
ColumnValueRange<T>::create_empty_column_value_range(
- slot->is_nullable(), slot->type().precision,
slot->type().scale);
+ slot->is_nullable(), range.precision(), range.scale());
temp_range.set_contain_null(true);
range.intersection(temp_range);
*pdt = temp_pdt;
} else if
(reinterpret_cast<vectorized::VectorizedFnCall*>(expr)->fn().name.function_name
==
"is_not_null_pred") {
auto temp_range =
ColumnValueRange<T>::create_empty_column_value_range(
- slot->is_nullable(), slot->type().precision,
slot->type().scale);
+ slot->is_nullable(), range.precision(), range.scale());
temp_range.set_contain_null(false);
range.intersection(temp_range);
*pdt = temp_pdt;
@@ -1216,7 +1217,7 @@ Status
ScanLocalState<Derived>::_normalize_match_predicate(
// create empty range as temp range, temp range should do intersection
on range
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
- slot->is_nullable(), slot->type().precision,
slot->type().scale);
+ slot->is_nullable(), range.precision(), range.scale());
// Normalize match conjuncts like 'where col match value'
auto match_checker = [](const std::string& fn_name) { return
is_match_condition(fn_name); };
@@ -1361,7 +1362,7 @@ Status ScanLocalState<Derived>::_init_profile() {
template <typename Derived>
void ScanLocalState<Derived>::_filter_and_collect_cast_type_for_variant(
const vectorized::VExpr* expr,
- phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>&
colname_to_cast_types) {
+ std::unordered_map<std::string, std::vector<TypeDescriptor>>&
colname_to_cast_types) {
const auto* cast_expr = dynamic_cast<const vectorized::VCastExpr*>(expr);
if (cast_expr != nullptr) {
const auto* src_slot =
@@ -1373,10 +1374,9 @@ void
ScanLocalState<Derived>::_filter_and_collect_cast_type_for_variant(
}
std::vector<SlotDescriptor*> slots = output_tuple_desc()->slots();
SlotDescriptor* src_slot_desc =
_slot_id_to_slot_desc[src_slot->slot_id()];
- PrimitiveType cast_dst_type =
-
cast_expr->get_target_type()->get_type_as_type_descriptor().type;
+ TypeDescriptor type_desc =
cast_expr->get_target_type()->get_type_as_type_descriptor();
if (src_slot_desc->type().is_variant_type()) {
-
colname_to_cast_types[src_slot_desc->col_name()].push_back(cast_dst_type);
+
colname_to_cast_types[src_slot_desc->col_name()].push_back(type_desc);
}
}
for (const auto& child : expr->children()) {
@@ -1386,7 +1386,7 @@ void
ScanLocalState<Derived>::_filter_and_collect_cast_type_for_variant(
template <typename Derived>
void ScanLocalState<Derived>::get_cast_types_for_variants() {
- phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>
colname_to_cast_types;
+ std::unordered_map<std::string, std::vector<TypeDescriptor>>
colname_to_cast_types;
for (auto it = _conjuncts.begin(); it != _conjuncts.end();) {
auto& conjunct = *it;
if (conjunct->root()) {
diff --git a/be/src/pipeline/exec/scan_operator.h
b/be/src/pipeline/exec/scan_operator.h
index 4cdebeedc83..100b12d0a76 100644
--- a/be/src/pipeline/exec/scan_operator.h
+++ b/be/src/pipeline/exec/scan_operator.h
@@ -27,6 +27,7 @@
#include "pipeline/pipeline_x/dependency.h"
#include "pipeline/pipeline_x/operator.h"
#include "runtime/descriptors.h"
+#include "runtime/types.h"
#include "vec/exec/scan/vscan_node.h"
namespace doris {
@@ -329,7 +330,7 @@ protected:
void get_cast_types_for_variants();
void _filter_and_collect_cast_type_for_variant(
const vectorized::VExpr* expr,
- phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>&
colname_to_cast_types);
+ std::unordered_map<std::string, std::vector<TypeDescriptor>>&
colname_to_cast_types);
// Every time vconjunct_ctx_ptr is updated, the old ctx will be stored in
this vector
// so that it will be destroyed uniformly at the end of the query.
@@ -344,7 +345,7 @@ protected:
std::vector<FunctionFilter> _push_down_functions;
// colname -> cast dst type
- std::map<std::string, PrimitiveType> _cast_types_for_variants;
+ std::map<std::string, TypeDescriptor> _cast_types_for_variants;
// slot id -> SlotDescriptor
phmap::flat_hash_map<int, SlotDescriptor*> _slot_id_to_slot_desc;
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp
b/be/src/vec/exec/scan/new_olap_scan_node.cpp
index 63b0067dd1c..c6e4363db4d 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.cpp
+++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp
@@ -423,7 +423,7 @@ std::string NewOlapScanNode::get_name() {
void NewOlapScanNode::_filter_and_collect_cast_type_for_variant(
const VExpr* expr,
- phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>&
colname_to_cast_types) {
+ phmap::flat_hash_map<std::string, std::vector<TypeDescriptor>>&
colname_to_cast_types) {
auto* cast_expr = dynamic_cast<const VCastExpr*>(expr);
if (cast_expr != nullptr) {
auto* src_slot = cast_expr->get_child(0)->node_type() ==
TExprNodeType::SLOT_REF
@@ -446,7 +446,7 @@ void
NewOlapScanNode::_filter_and_collect_cast_type_for_variant(
}
void NewOlapScanNode::get_cast_types_for_variants() {
- phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>
colname_to_cast_types;
+ phmap::flat_hash_map<std::string, std::vector<TypeDescriptor>>
colname_to_cast_types;
for (auto it = _conjuncts.begin(); it != _conjuncts.end();) {
auto& conjunct = *it;
if (conjunct->root()) {
diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h
b/be/src/vec/exec/scan/new_olap_scan_node.h
index 4ee2b77216f..15b15ead164 100644
--- a/be/src/vec/exec/scan/new_olap_scan_node.h
+++ b/be/src/vec/exec/scan/new_olap_scan_node.h
@@ -106,7 +106,7 @@ protected:
void get_cast_types_for_variants() override;
void _filter_and_collect_cast_type_for_variant(
const VExpr* expr,
- phmap::flat_hash_map<std::string, std::vector<PrimitiveType>>&
colname_to_cast_types);
+ phmap::flat_hash_map<std::string, std::vector<TypeDescriptor>>&
colname_to_cast_types);
private:
Status _build_key_ranges_and_filters();
diff --git a/be/src/vec/exec/scan/vscan_node.cpp
b/be/src/vec/exec/scan/vscan_node.cpp
index 5a64287ce24..258e225a26e 100644
--- a/be/src/vec/exec/scan/vscan_node.cpp
+++ b/be/src/vec/exec/scan/vscan_node.cpp
@@ -342,14 +342,14 @@ Status VScanNode::_normalize_conjuncts() {
// The conjuncts is always on output tuple, so use _output_tuple_desc;
std::vector<SlotDescriptor*> slots = _output_tuple_desc->slots();
- auto init_value_range = [&](SlotDescriptor* slot, PrimitiveType type) {
- switch (type) {
-#define M(NAME)
\
- case TYPE_##NAME: {
\
- ColumnValueRange<TYPE_##NAME> range(slot->col_name(),
slot->is_nullable(), \
- slot->type().precision,
slot->type().scale); \
- _slot_id_to_value_range[slot->id()] = std::pair {slot, range};
\
- break;
\
+ auto init_value_range = [&](SlotDescriptor* slot, TypeDescriptor type) {
+ switch (type.type) {
+#define M(NAME)
\
+ case TYPE_##NAME: {
\
+ ColumnValueRange<TYPE_##NAME> range(slot->col_name(),
slot->is_nullable(), type.precision, \
+ type.scale);
\
+ _slot_id_to_value_range[slot->id()] = std::pair {slot, range};
\
+ break;
\
}
#define APPLY_FOR_PRIMITIVE_TYPE(M) \
M(TINYINT) \
diff --git a/be/src/vec/exec/scan/vscan_node.h
b/be/src/vec/exec/scan/vscan_node.h
index 04bc738fda4..ddbaac13336 100644
--- a/be/src/vec/exec/scan/vscan_node.h
+++ b/be/src/vec/exec/scan/vscan_node.h
@@ -41,6 +41,7 @@
#include "runtime/define_primitive_type.h"
#include "runtime/query_context.h"
#include "runtime/runtime_state.h"
+#include "runtime/types.h"
#include "util/runtime_profile.h"
#include "vec/exec/runtime_filter_consumer.h"
#include "vec/exec/scan/scanner_context.h"
@@ -308,7 +309,7 @@ protected:
std::vector<FunctionFilter> _push_down_functions;
// colname -> cast dst type
- std::map<std::string, PrimitiveType> _cast_types_for_variants;
+ std::map<std::string, TypeDescriptor> _cast_types_for_variants;
// slot id -> ColumnValueRange
// Parsed from conjuncts
diff --git a/regression-test/data/variant_p0/sql/implicit_cast.out
b/regression-test/data/variant_p0/sql/implicit_cast.out
index b0f5d96087b..2eefddc43e5 100644
--- a/regression-test/data/variant_p0/sql/implicit_cast.out
+++ b/regression-test/data/variant_p0/sql/implicit_cast.out
@@ -78,3 +78,15 @@ user
user
user
+-- !implicit_cast_14 --
+14690746673
+14690746676
+14690746679
+14690746680
+14690746681
+14690746684
+14690746685
+14690746687
+14690746688
+14690746689
+
diff --git a/regression-test/suites/variant_p0/sql/implicit_cast.sql
b/regression-test/suites/variant_p0/sql/implicit_cast.sql
index 0653a52eed7..f62b25ecfdc 100644
--- a/regression-test/suites/variant_p0/sql/implicit_cast.sql
+++ b/regression-test/suites/variant_p0/sql/implicit_cast.sql
@@ -12,4 +12,6 @@ SELECT v["payload"]["member"]["id"] FROM ghdata where
v["payload"]["member"]["id
select k, json_extract(v, '$.repo') from ghdata WHERE v["type"] = 'WatchEvent'
order by k limit 10;
-- SELECT v["payload"]["member"]["id"], count() FROM ghdata where
v["payload"]["member"]["id"] is not null group by v["payload"]["member"]["id"]
order by 1, 2 desc LIMIT 10;
select k, v["id"], v["type"], v["repo"]["name"] from ghdata WHERE v["type"] =
'WatchEvent' order by k limit 10;
-SELECT v["payload"]["pusher_type"] FROM ghdata where
v["payload"]["pusher_type"] is not null ORDER BY k LIMIT 10;
\ No newline at end of file
+SELECT v["payload"]["pusher_type"] FROM ghdata where
v["payload"]["pusher_type"] is not null ORDER BY k LIMIT 10;
+-- implicit cast to decimal type
+SELECT v["id"] FROM ghdata where v["id"] not in (7273, 10.118626, -69352)
order by cast(v["id"] as decimal) limit 10;
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]