This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 512b787559 [fix](parquet-reader) fix stack-use-after-return error 
(#14411)
512b787559 is described below

commit 512b787559496319b73daad66a87c3b82fe01bec
Author: Mingyu Chen <[email protected]>
AuthorDate: Sat Nov 19 10:52:50 2022 +0800

    [fix](parquet-reader) fix stack-use-after-return error (#14411)
---
 be/src/exec/olap_common.h                          |  4 +++
 be/src/vec/exec/format/parquet/parquet_pred_cmp.h  | 32 ++++++++++++++++------
 be/src/vec/exec/format/parquet/vparquet_reader.cpp |  4 +++
 .../docker-compose/mysql/init/04-insert.sql        |  2 ++
 .../multi_catalog_query_parquet/hive_catalog.out   |  2 ++
 5 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index aa200ec5a8..1ee8f37f1e 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -131,6 +131,10 @@ public:
 
     CppType get_range_min_value() const { return _low_value; }
 
+    const CppType* get_range_max_value_ptr() const { return &_high_value; }
+
+    const CppType* get_range_min_value_ptr() const { return &_low_value; }
+
     SQLFilterOp get_range_high_op() const { return _high_op; }
 
     SQLFilterOp get_range_low_op() const { return _low_op; }
diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h 
b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
index 5f33880c2a..06db698a9c 100644
--- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
+++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
@@ -126,18 +126,18 @@ static bool _eval_in_val(const ColumnMinMaxParams& 
params) {
     case TYPE_STRING:
     case TYPE_VARCHAR:
     case TYPE_CHAR: {
-        std::vector<const char*> in_values;
+        std::vector<std::string> in_values;
         for (auto val : params.in_pred_values) {
-            std::string value = ((StringValue*)val)->to_string();
-            in_values.emplace_back(value.data());
+            in_values.emplace_back(((StringValue*)val)->to_string());
         }
         if (in_values.empty()) {
             return false;
         }
         auto result = std::minmax_element(in_values.begin(), in_values.end());
-        const char* in_min = *result.first;
-        const char* in_max = *result.second;
-        if (strcmp(in_max, params.min_bytes) < 0 || strcmp(in_min, 
params.max_bytes) > 0) {
+        std::string& in_min = *result.first;
+        std::string& in_max = *result.second;
+        if (strcmp(in_max.data(), params.min_bytes) < 0 ||
+            strcmp(in_min.data(), params.max_bytes) > 0) {
             return true;
         }
         break;
@@ -397,6 +397,17 @@ struct ScanPredicate {
     bool _null_op = false;
     bool _is_null = false;
     int _scale;
+
+    ScanPredicate(const ScanPredicate& other) {
+        _col_name = other._col_name;
+        _op = other._op;
+        for (void* v : other._values) {
+            _values.push_back(v);
+        }
+        _null_op = other._null_op;
+        _is_null = other._is_null;
+        _scale = other._scale;
+    }
 };
 
 template <PrimitiveType primitive_type>
@@ -440,7 +451,9 @@ static void to_filter(const 
ColumnValueRange<primitive_type>& col_val_range,
             low._col_name = col_val_range.column_name();
             low._op = (low_op == SQLFilterOp::FILTER_LARGER_OR_EQUAL ? 
TExprOpcode::GE
                                                                      : 
TExprOpcode::GT);
-            low._values.push_back(const_cast<CppType*>(&low_value));
+            // NOTICE: use get_range_min_value_ptr, not "low_value"'s addr,
+            // to avoid stack-use-after-return bug
+            
low._values.push_back(const_cast<CppType*>(col_val_range.get_range_min_value_ptr()));
             low._scale = col_val_range.scale();
             filters.push_back(low);
         }
@@ -451,7 +464,9 @@ static void to_filter(const 
ColumnValueRange<primitive_type>& col_val_range,
             high._col_name = col_val_range.column_name();
             high._op = (high_op == SQLFilterOp::FILTER_LESS_OR_EQUAL ? 
TExprOpcode::LE
                                                                      : 
TExprOpcode::LT);
-            high._values.push_back(const_cast<CppType*>(&high_value));
+            // NOTICE: use get_range_max_value_ptr, not "high_value"'s addr,
+            // to avoid stack-use-after-return bug
+            
high._values.push_back(const_cast<CppType*>(col_val_range.get_range_max_value_ptr()));
             high._scale = col_val_range.scale();
             filters.push_back(high);
         }
@@ -534,7 +549,6 @@ static bool determine_filter_min_max(const 
ColumnValueRangeType& col_val_range,
     params.parquet_type_length = col_schema->parquet_schema.type_length;
     params.min_bytes = min_bytes;
     params.max_bytes = max_bytes;
-
     for (int i = 0; i < filters.size(); i++) {
         _eval_predicate(filters[i], &params, &need_filter);
         if (need_filter) {
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index b6f65b81bb..a525b531a6 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -72,6 +72,10 @@ void ParquetReader::_init_profile() {
                 ADD_CHILD_TIMER(_profile, "ColumnReadTime", parquet_profile);
         _parquet_profile.parse_meta_time =
                 ADD_CHILD_TIMER(_profile, "ParseMetaTime", parquet_profile);
+        _parquet_profile.page_index_filter_time =
+                ADD_CHILD_TIMER(_profile, "PageIndexFilterTime", 
parquet_profile);
+        _parquet_profile.row_group_filter_time =
+                ADD_CHILD_TIMER(_profile, "RowGroupFilterTime", 
parquet_profile);
 
         _parquet_profile.file_read_time = ADD_TIMER(_profile, "FileReadTime");
         _parquet_profile.file_read_calls = ADD_COUNTER(_profile, 
"FileReadCalls", TUnit::UNIT);
diff --git a/docker/thirdparties/docker-compose/mysql/init/04-insert.sql 
b/docker/thirdparties/docker-compose/mysql/init/04-insert.sql
index 5067a546ab..fa9139e3be 100644
--- a/docker/thirdparties/docker-compose/mysql/init/04-insert.sql
+++ b/docker/thirdparties/docker-compose/mysql/init/04-insert.sql
@@ -1055,6 +1055,8 @@ insert into doris_test.ex_tb3 values
 
('mus','plat_code','1001169339',1590406790026,1590420872639,'11','1006061','beijing'),
 
('mus','plat_code','1001169339',1590420482288,1590420872639,'11','1006061','beijing');
 
+-- remove NO_ZERO_IN_DATE and NO_ZERO_DATE to allow insert 0000-00-00 00:00:00
+set 
sql_mode="ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION";
 insert into doris_test.ex_tb4 values
 (1, 111, '2021-09-01 07:01:01', '2021-09-01 08:01:01', 1),
 (2, 112, '2021-09-02 07:01:01', '2021-09-02 08:01:01', 1),
diff --git 
a/regression-test/data/tpch_sf1_p0/multi_catalog_query_parquet/hive_catalog.out 
b/regression-test/data/tpch_sf1_p0/multi_catalog_query_parquet/hive_catalog.out
index ab76b3ae04..1b4fe8339e 100644
--- 
a/regression-test/data/tpch_sf1_p0/multi_catalog_query_parquet/hive_catalog.out
+++ 
b/regression-test/data/tpch_sf1_p0/multi_catalog_query_parquet/hive_catalog.out
@@ -3,8 +3,10 @@
 customer
 lineitem
 nation
+orc_all_types
 orders
 part
+partition_table
 partsupp
 region
 supplier


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to