This is an automated email from the ASF dual-hosted git repository. todd pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit f9bf62eefab7fb807f4e5d6900064b612b455a5e Author: Todd Lipcon <t...@apache.org> AuthorDate: Mon May 20 13:45:29 2019 -0700 IMPALA-8566. Fix computation of num_nulls for incremental stats The calculation for num_nulls in the incremental stats code path initialized the counter to -1 instead of 0. This meant that, if there were no nulls (reasonably common), the num_nulls counter would be set to -1, indicating unknown, rather than 0. This simply fixes the initialization and updates the tests. Change-Id: Ie42103ad21d719cac45abc160c8d5422dd33fb28 Reviewed-on: http://gerrit.cloudera.org:8080/13378 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- be/src/exec/incr-stats-util.cc | 6 +- .../QueryTest/compute-stats-incremental.test | 122 ++++++++++----------- .../queries/QueryTest/truncate-table.test | 26 ++--- 3 files changed, 77 insertions(+), 77 deletions(-) diff --git a/be/src/exec/incr-stats-util.cc b/be/src/exec/incr-stats-util.cc index f0bb73f..6cc9f2f 100644 --- a/be/src/exec/incr-stats-util.cc +++ b/be/src/exec/incr-stats-util.cc @@ -140,7 +140,7 @@ struct PerColumnStats { double avg_width; PerColumnStats() - : intermediate_ndv(AggregateFunctions::HLL_LEN, 0), num_nulls(-1), + : intermediate_ndv(AggregateFunctions::HLL_LEN, 0), num_nulls(0), max_width(0), num_rows(0), avg_width(0) { } // Updates all aggregate statistics with a new set of measurements. @@ -150,11 +150,11 @@ struct PerColumnStats { DCHECK_GE(num_new_rows, 0); DCHECK_GE(max_new_width, 0); DCHECK_GE(new_avg_width, 0); - DCHECK_GE(num_new_nulls, -1); + DCHECK_GE(num_new_nulls, 0); for (int j = 0; j < ndv.size(); ++j) { intermediate_ndv[j] = ::max(intermediate_ndv[j], ndv[j]); } - if (num_new_nulls >= 0) num_nulls += num_new_nulls; + num_nulls += num_new_nulls; max_width = ::max(max_width, max_new_width); avg_width += (new_avg_width * num_new_rows); num_rows += num_new_rows; diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test index 064c23a..e76170a 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test +++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test @@ -48,17 +48,17 @@ show column stats alltypes_incremental ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS -'id','INT',7300,-1,4,4 -'bool_col','BOOLEAN',2,-1,1,1 -'tinyint_col','TINYINT',10,-1,1,1 -'smallint_col','SMALLINT',10,-1,2,2 -'int_col','INT',10,-1,4,4 -'bigint_col','BIGINT',10,-1,8,8 -'float_col','FLOAT',10,-1,4,4 -'double_col','DOUBLE',10,-1,8,8 -'date_string_col','STRING',736,-1,8,8 -'string_col','STRING',10,-1,1,1 -'timestamp_col','TIMESTAMP',7300,-1,16,16 +'id','INT',7300,0,4,4 +'bool_col','BOOLEAN',2,0,1,1 +'tinyint_col','TINYINT',10,0,1,1 +'smallint_col','SMALLINT',10,0,2,2 +'int_col','INT',10,0,4,4 +'bigint_col','BIGINT',10,0,8,8 +'float_col','FLOAT',10,0,4,4 +'double_col','DOUBLE',10,0,8,8 +'date_string_col','STRING',736,0,8,8 +'string_col','STRING',10,0,1,1 +'timestamp_col','TIMESTAMP',7300,0,16,16 'year','INT',2,0,4,4 'month','INT',12,0,4,4 ---- TYPES @@ -141,17 +141,17 @@ show column stats alltypes_incremental ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS -'id','INT',7300,-1,4,4 -'bool_col','BOOLEAN',2,-1,1,1 -'tinyint_col','TINYINT',10,-1,1,1 -'smallint_col','SMALLINT',10,-1,2,2 -'int_col','INT',10,-1,4,4 -'bigint_col','BIGINT',10,-1,8,8 -'float_col','FLOAT',10,-1,4,4 -'double_col','DOUBLE',10,-1,8,8 -'date_string_col','STRING',736,-1,8,8 -'string_col','STRING',10,-1,1,1 -'timestamp_col','TIMESTAMP',7300,-1,16,16 +'id','INT',7300,0,4,4 +'bool_col','BOOLEAN',2,0,1,1 +'tinyint_col','TINYINT',10,0,1,1 +'smallint_col','SMALLINT',10,0,2,2 +'int_col','INT',10,0,4,4 +'bigint_col','BIGINT',10,0,8,8 +'float_col','FLOAT',10,0,4,4 +'double_col','DOUBLE',10,0,8,8 +'date_string_col','STRING',736,0,8,8 +'string_col','STRING',10,0,1,1 +'timestamp_col','TIMESTAMP',7300,0,16,16 'year','INT',2,0,4,4 'month','INT',12,0,4,4 ---- TYPES @@ -242,17 +242,17 @@ show column stats alltypes_incremental ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS -'id','INT',6990,-1,4,4 -'bool_col','BOOLEAN',2,-1,1,1 -'tinyint_col','TINYINT',10,-1,1,1 -'smallint_col','SMALLINT',10,-1,2,2 -'int_col','INT',10,-1,4,4 -'bigint_col','BIGINT',10,-1,8,8 -'float_col','FLOAT',10,-1,4,4 -'double_col','DOUBLE',10,-1,8,8 -'date_string_col','STRING',688,-1,8,8 -'string_col','STRING',10,-1,1,1 -'timestamp_col','TIMESTAMP',6990,-1,16,16 +'id','INT',6990,0,4,4 +'bool_col','BOOLEAN',2,0,1,1 +'tinyint_col','TINYINT',10,0,1,1 +'smallint_col','SMALLINT',10,0,2,2 +'int_col','INT',10,0,4,4 +'bigint_col','BIGINT',10,0,8,8 +'float_col','FLOAT',10,0,4,4 +'double_col','DOUBLE',10,0,8,8 +'date_string_col','STRING',688,0,8,8 +'string_col','STRING',10,0,1,1 +'timestamp_col','TIMESTAMP',6990,0,16,16 'year','INT',2,0,4,4 'month','INT',12,0,4,4 ---- TYPES @@ -305,17 +305,17 @@ show column stats alltypes_incremental ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS -'id','INT',7300,-1,4,4 -'bool_col','BOOLEAN',2,-1,1,1 -'tinyint_col','TINYINT',10,-1,1,1 -'smallint_col','SMALLINT',10,-1,2,2 -'int_col','INT',10,-1,4,4 -'bigint_col','BIGINT',10,-1,8,8 -'float_col','FLOAT',10,-1,4,4 -'double_col','DOUBLE',10,-1,8,8 -'date_string_col','STRING',736,-1,8,8 -'string_col','STRING',10,-1,1,1 -'timestamp_col','TIMESTAMP',7300,-1,16,16 +'id','INT',7300,0,4,4 +'bool_col','BOOLEAN',2,0,1,1 +'tinyint_col','TINYINT',10,0,1,1 +'smallint_col','SMALLINT',10,0,2,2 +'int_col','INT',10,0,4,4 +'bigint_col','BIGINT',10,0,8,8 +'float_col','FLOAT',10,0,4,4 +'double_col','DOUBLE',10,0,8,8 +'date_string_col','STRING',736,0,8,8 +'string_col','STRING',10,0,1,1 +'timestamp_col','TIMESTAMP',7300,0,16,16 'year','INT',2,0,4,4 'month','INT',12,0,4,4 ---- TYPES @@ -546,14 +546,14 @@ show column stats chars_tbl ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS -'id','INT',2915,-1,4,4 -'ch1','CHAR(1)',1,-1,1,1 -'ch2','CHAR(8)',10,-1,8,8 -'ch3','CHAR(20)',10,-1,8,8 -'ts','TIMESTAMP',2871,-1,16,16 -'vc1','VARCHAR(1)',1,-1,1,1 -'vc2','VARCHAR(8)',10,-1,8,8 -'vc3','VARCHAR(20)',10,-1,8,8 +'id','INT',2915,0,4,4 +'ch1','CHAR(1)',1,0,1,1 +'ch2','CHAR(8)',10,0,8,8 +'ch3','CHAR(20)',10,0,8,8 +'ts','TIMESTAMP',2871,0,16,16 +'vc1','VARCHAR(1)',1,0,1,1 +'vc2','VARCHAR(8)',10,0,8,8 +'vc3','VARCHAR(20)',10,0,8,8 'year','CHAR(5)',1,0,5,5 'day','VARCHAR(13)',3,1,-1,-1 ---- TYPES @@ -576,14 +576,14 @@ show column stats chars_tbl ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS -'id','INT',2915,0,4,4 -'ch1','CHAR(1)',2,-1,1,1 -'ch2','CHAR(8)',11,-1,8,7.99766731262207 -'ch3','CHAR(20)',11,-1,8,7.99766731262207 -'ts','TIMESTAMP',2871,0,16,16 -'vc1','VARCHAR(1)',2,-1,1,1 -'vc2','VARCHAR(8)',11,-1,8,7.99766731262207 -'vc3','VARCHAR(20)',11,-1,8,7.99766731262207 +'id','INT',2915,1,4,4 +'ch1','CHAR(1)',2,0,1,1 +'ch2','CHAR(8)',11,0,8,7.99766731262207 +'ch3','CHAR(20)',11,0,8,7.99766731262207 +'ts','TIMESTAMP',2871,1,16,16 +'vc1','VARCHAR(1)',2,0,1,1 +'vc2','VARCHAR(8)',11,0,8,7.99766731262207 +'vc3','VARCHAR(20)',11,0,8,7.99766731262207 'year','CHAR(5)',2,0,5,5 'day','VARCHAR(13)',4,1,-1,-1 ---- TYPES @@ -621,4 +621,4 @@ compute incremental stats complextypestbl_part; 'Updated 1 partition(s) and 1 column(s).' ---- TYPES STRING -==== \ No newline at end of file +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test b/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test index a8d2a80..9ad769f 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test +++ b/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test @@ -42,17 +42,17 @@ show column stats t1; ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS -'id','INT',7300,-1,4,4 -'bool_col','BOOLEAN',2,-1,1,1 -'tinyint_col','TINYINT',10,-1,1,1 -'smallint_col','SMALLINT',10,-1,2,2 -'int_col','INT',10,-1,4,4 -'bigint_col','BIGINT',10,-1,8,8 -'float_col','FLOAT',10,-1,4,4 -'double_col','DOUBLE',10,-1,8,8 -'date_string_col','STRING',736,-1,8,8 -'string_col','STRING',10,-1,1,1 -'timestamp_col','TIMESTAMP',7300,-1,16,16 +'id','INT',7300,0,4,4 +'bool_col','BOOLEAN',2,0,1,1 +'tinyint_col','TINYINT',10,0,1,1 +'smallint_col','SMALLINT',10,0,2,2 +'int_col','INT',10,0,4,4 +'bigint_col','BIGINT',10,0,8,8 +'float_col','FLOAT',10,0,4,4 +'double_col','DOUBLE',10,0,8,8 +'date_string_col','STRING',736,0,8,8 +'string_col','STRING',10,0,1,1 +'timestamp_col','TIMESTAMP',7300,0,16,16 'year','INT',2,0,4,4 'month','INT',12,0,4,4 ---- TYPES @@ -135,8 +135,8 @@ show column stats t2; ---- LABELS COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- RESULTS -'a','STRING',3,-1,8,6.666666507720947 -'b','STRING',3,-1,7,4 +'a','STRING',3,0,8,6.666666507720947 +'b','STRING',3,0,7,4 ---- TYPES STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE ====