This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit d7ecc111492b77e65fc103b119bfd1d5f09af65c Author: LPL <[email protected]> AuthorDate: Tue Aug 16 17:22:34 2022 +0800 IMPALA-11500: Fix Impalad crashed in ParquetBoolDecoder::SkipValues when num_values is 0 Fix Impalad crashed in the method ParquetBoolDecoder::SkipValues when the parameter 'num_values' is 0. The function should tolerate that the 'num_values' is 0 values. Testing: - Add e2e tests Change-Id: I8c4c5a4dff9e9e75913c7b524b4ae70967febb37 Reviewed-on: http://gerrit.cloudera.org:8080/18854 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/exec/parquet/parquet-bool-decoder.cc | 2 +- .../QueryTest/parquet-page-index-bugfix.test | 32 ++++++++++++++++++++++ tests/query_test/test_parquet_stats.py | 2 ++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/be/src/exec/parquet/parquet-bool-decoder.cc b/be/src/exec/parquet/parquet-bool-decoder.cc index 31b9998e2..cff3254cc 100644 --- a/be/src/exec/parquet/parquet-bool-decoder.cc +++ b/be/src/exec/parquet/parquet-bool-decoder.cc @@ -67,7 +67,7 @@ bool ParquetBoolDecoder::DecodeValues( } bool ParquetBoolDecoder::SkipValues(int num_values) { - DCHECK_GT(num_values, 0); + DCHECK_GE(num_values, 0); int skip_cached = min(num_unpacked_values_ - unpacked_value_idx_, num_values); unpacked_value_idx_ += skip_cached; if (skip_cached == num_values) return true; diff --git a/testdata/workloads/functional-query/queries/QueryTest/parquet-page-index-bugfix.test b/testdata/workloads/functional-query/queries/QueryTest/parquet-page-index-bugfix.test new file mode 100644 index 000000000..a1d071456 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/parquet-page-index-bugfix.test @@ -0,0 +1,32 @@ +==== +---- QUERY +# Test for IMPALA-11500 +# Impalad crashed in the ParquetBoolDecoder::SkipValues when num_values is 0 +create table parq_tbl ( + s string, + i int, + b boolean +) stored as parquet; +==== +---- QUERY +insert into + parq_tbl +values + ( "abc", 3, true), + ( "def", NULL, false), + ( "ghi", 1, NULL), + (NULL, 0, NULL), + (NULL, NULL, NULL); +select * from parq_tbl where s is not null order by s +---- RESULTS +'abc',3,true +'def',NULL,false +'ghi',1,NULL +---- TYPES +STRING,INT,BOOLEAN +---- RUNTIME_PROFILE +aggregation(SUM, NumColumns): 3 +aggregation(SUM, NumPages): 3 +aggregation(SUM, NumRowGroups): 1 +aggregation(SUM, RowsRead): 5 +==== diff --git a/tests/query_test/test_parquet_stats.py b/tests/query_test/test_parquet_stats.py index c9663afa9..dd3ce678f 100644 --- a/tests/query_test/test_parquet_stats.py +++ b/tests/query_test/test_parquet_stats.py @@ -105,3 +105,5 @@ class TestParquetStats(ImpalaTestSuite): new_vector.get_value('exec_option')['batch_size'] = batch_size self.run_test_case('QueryTest/parquet-page-index-large', new_vector, unique_database) + # Test for the bugfix + self.run_test_case('QueryTest/parquet-page-index-bugfix', vector, unique_database)
