This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 512b787559 [fix](parquet-reader) fix stack-use-after-return error
(#14411)
512b787559 is described below
commit 512b787559496319b73daad66a87c3b82fe01bec
Author: Mingyu Chen <[email protected]>
AuthorDate: Sat Nov 19 10:52:50 2022 +0800
[fix](parquet-reader) fix stack-use-after-return error (#14411)
---
be/src/exec/olap_common.h | 4 +++
be/src/vec/exec/format/parquet/parquet_pred_cmp.h | 32 ++++++++++++++++------
be/src/vec/exec/format/parquet/vparquet_reader.cpp | 4 +++
.../docker-compose/mysql/init/04-insert.sql | 2 ++
.../multi_catalog_query_parquet/hive_catalog.out | 2 ++
5 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index aa200ec5a8..1ee8f37f1e 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -131,6 +131,10 @@ public:
CppType get_range_min_value() const { return _low_value; }
+ const CppType* get_range_max_value_ptr() const { return &_high_value; }
+
+ const CppType* get_range_min_value_ptr() const { return &_low_value; }
+
SQLFilterOp get_range_high_op() const { return _high_op; }
SQLFilterOp get_range_low_op() const { return _low_op; }
diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
index 5f33880c2a..06db698a9c 100644
--- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
+++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
@@ -126,18 +126,18 @@ static bool _eval_in_val(const ColumnMinMaxParams&
params) {
case TYPE_STRING:
case TYPE_VARCHAR:
case TYPE_CHAR: {
- std::vector<const char*> in_values;
+ std::vector<std::string> in_values;
for (auto val : params.in_pred_values) {
- std::string value = ((StringValue*)val)->to_string();
- in_values.emplace_back(value.data());
+ in_values.emplace_back(((StringValue*)val)->to_string());
}
if (in_values.empty()) {
return false;
}
auto result = std::minmax_element(in_values.begin(), in_values.end());
- const char* in_min = *result.first;
- const char* in_max = *result.second;
- if (strcmp(in_max, params.min_bytes) < 0 || strcmp(in_min,
params.max_bytes) > 0) {
+ std::string& in_min = *result.first;
+ std::string& in_max = *result.second;
+ if (strcmp(in_max.data(), params.min_bytes) < 0 ||
+ strcmp(in_min.data(), params.max_bytes) > 0) {
return true;
}
break;
@@ -397,6 +397,17 @@ struct ScanPredicate {
bool _null_op = false;
bool _is_null = false;
int _scale;
+
+ ScanPredicate(const ScanPredicate& other) {
+ _col_name = other._col_name;
+ _op = other._op;
+ for (void* v : other._values) {
+ _values.push_back(v);
+ }
+ _null_op = other._null_op;
+ _is_null = other._is_null;
+ _scale = other._scale;
+ }
};
template <PrimitiveType primitive_type>
@@ -440,7 +451,9 @@ static void to_filter(const
ColumnValueRange<primitive_type>& col_val_range,
low._col_name = col_val_range.column_name();
low._op = (low_op == SQLFilterOp::FILTER_LARGER_OR_EQUAL ?
TExprOpcode::GE
:
TExprOpcode::GT);
- low._values.push_back(const_cast<CppType*>(&low_value));
+ // NOTICE: use get_range_min_value_ptr, not "low_value"'s addr,
+ // to avoid stack-use-after-return bug
+
low._values.push_back(const_cast<CppType*>(col_val_range.get_range_min_value_ptr()));
low._scale = col_val_range.scale();
filters.push_back(low);
}
@@ -451,7 +464,9 @@ static void to_filter(const
ColumnValueRange<primitive_type>& col_val_range,
high._col_name = col_val_range.column_name();
high._op = (high_op == SQLFilterOp::FILTER_LESS_OR_EQUAL ?
TExprOpcode::LE
:
TExprOpcode::LT);
- high._values.push_back(const_cast<CppType*>(&high_value));
+ // NOTICE: use get_range_max_value_ptr, not "high_value"'s addr,
+ // to avoid stack-use-after-return bug
+
high._values.push_back(const_cast<CppType*>(col_val_range.get_range_max_value_ptr()));
high._scale = col_val_range.scale();
filters.push_back(high);
}
@@ -534,7 +549,6 @@ static bool determine_filter_min_max(const
ColumnValueRangeType& col_val_range,
params.parquet_type_length = col_schema->parquet_schema.type_length;
params.min_bytes = min_bytes;
params.max_bytes = max_bytes;
-
for (int i = 0; i < filters.size(); i++) {
_eval_predicate(filters[i], ¶ms, &need_filter);
if (need_filter) {
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index b6f65b81bb..a525b531a6 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -72,6 +72,10 @@ void ParquetReader::_init_profile() {
ADD_CHILD_TIMER(_profile, "ColumnReadTime", parquet_profile);
_parquet_profile.parse_meta_time =
ADD_CHILD_TIMER(_profile, "ParseMetaTime", parquet_profile);
+ _parquet_profile.page_index_filter_time =
+ ADD_CHILD_TIMER(_profile, "PageIndexFilterTime",
parquet_profile);
+ _parquet_profile.row_group_filter_time =
+ ADD_CHILD_TIMER(_profile, "RowGroupFilterTime",
parquet_profile);
_parquet_profile.file_read_time = ADD_TIMER(_profile, "FileReadTime");
_parquet_profile.file_read_calls = ADD_COUNTER(_profile,
"FileReadCalls", TUnit::UNIT);
diff --git a/docker/thirdparties/docker-compose/mysql/init/04-insert.sql
b/docker/thirdparties/docker-compose/mysql/init/04-insert.sql
index 5067a546ab..fa9139e3be 100644
--- a/docker/thirdparties/docker-compose/mysql/init/04-insert.sql
+++ b/docker/thirdparties/docker-compose/mysql/init/04-insert.sql
@@ -1055,6 +1055,8 @@ insert into doris_test.ex_tb3 values
('mus','plat_code','1001169339',1590406790026,1590420872639,'11','1006061','beijing'),
('mus','plat_code','1001169339',1590420482288,1590420872639,'11','1006061','beijing');
+-- remove NO_ZERO_IN_DATE and NO_ZERO_DATE to allow insert 0000-00-00 00:00:00
+set
sql_mode="ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION";
insert into doris_test.ex_tb4 values
(1, 111, '2021-09-01 07:01:01', '2021-09-01 08:01:01', 1),
(2, 112, '2021-09-02 07:01:01', '2021-09-02 08:01:01', 1),
diff --git
a/regression-test/data/tpch_sf1_p0/multi_catalog_query_parquet/hive_catalog.out
b/regression-test/data/tpch_sf1_p0/multi_catalog_query_parquet/hive_catalog.out
index ab76b3ae04..1b4fe8339e 100644
---
a/regression-test/data/tpch_sf1_p0/multi_catalog_query_parquet/hive_catalog.out
+++
b/regression-test/data/tpch_sf1_p0/multi_catalog_query_parquet/hive_catalog.out
@@ -3,8 +3,10 @@
customer
lineitem
nation
+orc_all_types
orders
part
+partition_table
partsupp
region
supplier
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]