IMPALA-5615: Fix compute incremental stats for general partition exprs

The fix for IMPALA-1654 has broken the compute incremental stats child
query generation logic for general partition expressions. This commit
fixes it and also adds new queries to fix the test gap. These tests
fail consistently without the patch.

Change-Id: I227fc06f580eb9174f60ad0f515a3641cec19268
Reviewed-on: http://gerrit.cloudera.org:8080/7379
Reviewed-by: Bharath Vissapragada <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/2c0fc306
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/2c0fc306
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/2c0fc306

Branch: refs/heads/master
Commit: 2c0fc30628dd1f5d9ad7831de2cad5df1bbce80a
Parents: d2d7328
Author: Bharath Vissapragada <[email protected]>
Authored: Fri Jul 7 13:08:28 2017 -0700
Committer: Impala Public Jenkins <[email protected]>
Committed: Tue Jul 25 03:31:19 2017 +0000

----------------------------------------------------------------------
 .../impala/analysis/ComputeStatsStmt.java       | 13 ++--
 .../QueryTest/compute-stats-incremental.test    | 63 ++++++++++++++++++++
 .../partition-ddl-predicates-all-fs.test        |  4 +-
 3 files changed, 72 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2c0fc306/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
index d806521..fec9ac9 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ComputeStatsStmt.java
@@ -19,6 +19,7 @@ package org.apache.impala.analysis;
 
 import java.util.Collection;
 import java.util.Iterator;
+import java.util.HashSet;
 import java.util.List;
 
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -41,6 +42,7 @@ import org.apache.log4j.Logger;
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 
 /**
  * Represents a COMPUTE STATS <table> and COMPUTE INCREMENTAL STATS <table> 
[PARTITION
@@ -355,12 +357,9 @@ public class ComputeStatsStmt extends StatementBase {
           expectAllPartitions_ = true;
         }
       } else {
-        List<HdfsPartition> targetPartitions = partitionSet_.getPartitions();
-
         // Always compute stats on a set of partitions when told to.
-        List<String> partitionConjuncts = Lists.newArrayList();
-        for (HdfsPartition targetPartition : targetPartitions) {
-          partitionConjuncts.add(targetPartition.getConjunctSql());
+        for (HdfsPartition targetPartition: partitionSet_.getPartitions()) {
+          filterPreds.add(targetPartition.getConjunctSql());
           List<String> partValues = Lists.newArrayList();
           for (LiteralExpr partValue: targetPartition.getPartitionValues()) {
             
partValues.add(PartitionKeyValue.getPartitionKeyValueString(partValue,
@@ -368,7 +367,9 @@ public class ComputeStatsStmt extends StatementBase {
           }
           expectedPartitions_.add(partValues);
         }
-        filterPreds.add("(" + Joiner.on(" AND ").join(partitionConjuncts) + 
")");
+        // Create a hash set out of partitionSet_ for O(1) lookups.
+        HashSet<HdfsPartition> targetPartitions =
+            Sets.newHashSet(partitionSet_.getPartitions());
         for (HdfsPartition p : hdfsTable.getPartitions()) {
           if (p.isDefaultPartition()) continue;
           if (targetPartitions.contains(p)) continue;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2c0fc306/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
 
b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
index fa76136..0ac5767 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-incremental.test
@@ -259,6 +259,69 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
 ====
 ---- QUERY
+# Drop and recompute incremental stats for general partition expressions
+drop incremental stats alltypes_incremental partition(year=2010);
+====
+---- QUERY
+compute incremental stats alltypes_incremental partition(year=2010);
+---- RESULTS
+'Updated 12 partition(s) and 11 column(s).'
+---- TYPES
+STRING
+====
+---- QUERY
+show table stats alltypes_incremental;
+---- RESULTS
+'2009','1',310,1,'24.56KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2009','2',280,1,'22.27KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2009','3',310,1,'24.67KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2009','4',300,1,'24.06KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2009','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2009','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2009','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2009','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2009','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2009','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2009','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2009','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2010','1',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2010','2',280,1,'22.54KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2010','3',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2010','4',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2010','5',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2010','6',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2010','7',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2010','8',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2010','9',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2010','10',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2010','11',300,1,'24.16KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'2010','12',310,1,'24.97KB','NOT CACHED','NOT CACHED','TEXT','true',regex:.*
+'Total','',7300,24,'586.84KB','0B','','','',''
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
+====
+---- QUERY
+show column stats alltypes_incremental
+---- LABELS
+COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
+---- RESULTS
+'id','INT',7505,-1,4,4
+'bool_col','BOOLEAN',2,-1,1,1
+'tinyint_col','TINYINT',10,-1,1,1
+'smallint_col','SMALLINT',10,-1,2,2
+'int_col','INT',10,-1,4,4
+'bigint_col','BIGINT',10,-1,8,8
+'float_col','FLOAT',10,-1,4,4
+'double_col','DOUBLE',10,-1,8,8
+'date_string_col','STRING',736,-1,8,8
+'string_col','STRING',10,-1,1,1
+'timestamp_col','TIMESTAMP',7554,-1,16,16
+'year','INT',2,0,4,4
+'month','INT',12,0,4,4
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
+====
+---- QUERY
 # Confirm that dropping stats drops incremental stats as well
 drop stats alltypes_incremental;
 show table stats alltypes_incremental;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/2c0fc306/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-all-fs.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-all-fs.test
 
b/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-all-fs.test
index 1aa5c51..17113aa 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-all-fs.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/partition-ddl-predicates-all-fs.test
@@ -129,7 +129,7 @@ show partitions p1
 
'2','d',-1,0,regex:.+,regex:.+,regex:.+,'PARQUET',regex:.+,regex:.*/test-warehouse/.+/p1/j=2/k=d
 
'2','e',-1,0,regex:.+,regex:.+,regex:.+,'PARQUET',regex:.+,regex:.*/test-warehouse/.+/p1/j=2/k=e
 
'2','f',-1,0,regex:.+,regex:.+,regex:.+,'PARQUET',regex:.+,regex:.*/test-warehouse/.+/p1/j=2/k=f
-'Total','',0,0,regex:.+,regex:.+,'','','',''
+'Total','',3,0,regex:.+,regex:.+,'','','',''
 ---- TYPES
 STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
 ====
@@ -149,7 +149,7 @@ show partitions p1
 
'2','d',-1,0,regex:.+,regex:.+,regex:.+,'PARQUET',regex:.+,regex:.*/test-warehouse/.+/p1/j=2/k=d
 
'2','e',-1,0,regex:.+,regex:.+,regex:.+,'PARQUET',regex:.+,regex:.*/test-warehouse/.+/p1/j=2/k=e
 
'2','f',-1,0,regex:.+,regex:.+,regex:.+,'PARQUET',regex:.+,regex:.*/test-warehouse/.+/p1/j=2/k=f
-'Total','',0,0,regex:.+,regex:.+,'','','',''
+'Total','',3,0,regex:.+,regex:.+,'','','',''
 ---- TYPES
 STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
 ====

Reply via email to