IMPALA-5615: Fix compute incremental stats for general partition exprs The fix for IMPALA-1654 has broken the compute incremental stats child query generation logic for general partition expressions. This commit fixes it and also adds new queries to fix the test gap. These tests fail consistently without the patch.
Change-Id: I227fc06f580eb9174f60ad0f515a3641cec19268 Reviewed-on: http://gerrit.cloudera.org:8080/7379 Reviewed-by: Bharath Vissapragada <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/2c0fc306 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/2c0fc306 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/2c0fc306 Branch: refs/heads/master Commit: 2c0fc30628dd1f5d9ad7831de2cad5df1bbce80a Parents: d2d7328 Author: Bharath Vissapragada <[email protected]> Authored: Fri Jul 7 13:08:28 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Tue Jul 25 03:31:19 2017 +0000 ---------------------------------------------------------------------- .../impala/analysis/ComputeStatsStmt.java | 13 ++-- .../QueryTest/compute-stats-incremental.test | 63 ++++++++++++++++++++ .../partition-ddl-predicates-all-fs.test | 4 +- 3 files changed, 72 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2c0fc306/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java index d806521..fec9ac9 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java @@ -19,6 +19,7 @@ package org.apache.impala.analysis; import java.util.Collection; import java.util.Iterator; +import java.util.HashSet; import java.util.List; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -41,6 +42,7 @@ import org.apache.log4j.Logger; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import com.google.common.collect.Sets; /** * Represents a COMPUTE STATS <table> and COMPUTE INCREMENTAL STATS <table> [PARTITION @@ -355,12 +357,9 @@ public class ComputeStatsStmt extends StatementBase { expectAllPartitions_ = true; } } else { - List<HdfsPartition> targetPartitions = partitionSet_.getPartitions(); - // Always compute stats on a set of partitions when told to. - List<String> partitionConjuncts = Lists.newArrayList(); - for (HdfsPartition targetPartition : targetPartitions) { - partitionConjuncts.add(targetPartition.getConjunctSql()); + for (HdfsPartition targetPartition: partitionSet_.getPartitions()) { + filterPreds.add(targetPartition.getConjunctSql()); List<String> partValues = Lists.newArrayList(); for (LiteralExpr partValue: targetPartition.getPartitionValues()) { partValues.add(PartitionKeyValue.getPartitionKeyValueString(partValue, @@ -368,7 +367,9 @@ public class ComputeStatsStmt extends StatementBase { } expectedPartitions_.add(partValues); } - filterPreds.add("(" + Joiner.on(" AND ").join(partitionConjuncts) + ")"); + // Create a hash set out of partitionSet_ for O(1) lookups. + HashSet<HdfsPartition> targetPartitions = + Sets.newHashSet(partitionSet_.getPartitions()); for (HdfsPartition p : hdfsTable.getPartitions()) { if (p.isDefaultPartition()) continue; if (targetPartitions.contains(p)) continue; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2c0fc306/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test index fa76136..0ac5767 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test +++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test @@ -259,6 +259,69 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE ==== ---- QUERY +# Drop and recompute incremental stats for general partition expressions +drop incremental stats alltypes_incremental partition(year=2010); +==== +---- QUERY +compute incremental stats alltypes_incremental partition(year=2010); +---- RESULTS +'Updated 12 partition(s) and 11 column(s).' +---- TYPES +STRING +==== +---- QUERY +show table stats alltypes_incremental; +---- RESULTS +'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.* +'Total','',7300,24,'586.84KB','0B','','','','' +---- TYPES +STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING +==== +---- QUERY +show column stats alltypes_incremental +---- LABELS +COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE +---- RESULTS +'id','INT',7505,-1,4,4 +'bool_col','BOOLEAN',2,-1,1,1 +'tinyint_col','TINYINT',10,-1,1,1 +'smallint_col','SMALLINT',10,-1,2,2 +'int_col','INT',10,-1,4,4 +'bigint_col','BIGINT',10,-1,8,8 +'float_col','FLOAT',10,-1,4,4 +'double_col','DOUBLE',10,-1,8,8 +'date_string_col','STRING',736,-1,8,8 +'string_col','STRING',10,-1,1,1 +'timestamp_col','TIMESTAMP',7554,-1,16,16 +'year','INT',2,0,4,4 +'month','INT',12,0,4,4 +---- TYPES +STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE +==== +---- QUERY # Confirm that dropping stats drops incremental stats as well drop stats alltypes_incremental; show table stats alltypes_incremental; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2c0fc306/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-all-fs.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-all-fs.test b/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-all-fs.test index 1aa5c51..17113aa 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-all-fs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-all-fs.test @@ -129,7 +129,7 @@ show partitions p1 '2','d',-1,0,regex:.+,regex:.+,regex:.+,'PARQUET',regex:.+,regex:.*/test-warehouse/.+/p1/j=2/k=d '2','e',-1,0,regex:.+,regex:.+,regex:.+,'PARQUET',regex:.+,regex:.*/test-warehouse/.+/p1/j=2/k=e '2','f',-1,0,regex:.+,regex:.+,regex:.+,'PARQUET',regex:.+,regex:.*/test-warehouse/.+/p1/j=2/k=f -'Total','',0,0,regex:.+,regex:.+,'','','','' +'Total','',3,0,regex:.+,regex:.+,'','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ==== @@ -149,7 +149,7 @@ show partitions p1 '2','d',-1,0,regex:.+,regex:.+,regex:.+,'PARQUET',regex:.+,regex:.*/test-warehouse/.+/p1/j=2/k=d '2','e',-1,0,regex:.+,regex:.+,regex:.+,'PARQUET',regex:.+,regex:.*/test-warehouse/.+/p1/j=2/k=e '2','f',-1,0,regex:.+,regex:.+,regex:.+,'PARQUET',regex:.+,regex:.*/test-warehouse/.+/p1/j=2/k=f -'Total','',0,0,regex:.+,regex:.+,'','','','' +'Total','',3,0,regex:.+,regex:.+,'','','','' ---- TYPES STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING ====
