IMPALA-4260: Alter table add column drops all the column stats Hive expects types for column stats to be specified as all lower case. For some reason, it doesn't check this when the stats are first written, but it does check when performing an 'alter table'. This causes it to drop stats that Impala wrote because we specify type names in upper case.
This patch converts the types that Impala sends to Hive for the column stats to all lower case and adds a regression test. I also filed HIVE-15061 to track the issue from the Hive end. Change-Id: Ia373ec917efa7ab9f2a59b8a870b7ebc30175dda Reviewed-on: http://gerrit.cloudera.org:8080/4845 Reviewed-by: Matthew Jacobs <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/5cc13394 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/5cc13394 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/5cc13394 Branch: refs/heads/hadoop-next Commit: 5cc133947fe26a9ae39b0e7afb4678d251206b6b Parents: c62d824 Author: Thomas Tauber-Marshall <[email protected]> Authored: Mon Oct 24 15:37:22 2016 -0700 Committer: Internal Jenkins <[email protected]> Committed: Mon Nov 7 22:29:19 2016 +0000 ---------------------------------------------------------------------- .../impala/service/CatalogOpExecutor.java | 2 +- .../queries/QueryTest/compute-stats.test | 82 ++++++++++++++++++++ 2 files changed, 83 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5cc13394/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java index ce5a1b2..884c43a 100644 --- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java +++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java @@ -722,7 +722,7 @@ public class CatalogOpExecutor { entry.getValue().getNum_nulls(), entry.getValue().getMax_size(), entry.getValue().getAvg_size())); ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj(colName, - tableCol.getType().toString(), colStatsData); + tableCol.getType().toString().toLowerCase(), colStatsData); colStats.addToStatsObj(colStatsObj); } return colStats; http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/5cc13394/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test index dd3fec8..a5ddb90 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test +++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test @@ -67,6 +67,88 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY +# Adding a column shouldn't cause the stats to be dropped. +alter table alltypes add columns (new_col int) +---- RESULTS +==== +---- QUERY +show column stats alltypes +---- LABELS +COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE +---- RESULTS +'id','INT',7505,-1,4,4 +'bool_col','BOOLEAN',2,-1,1,1 +'tinyint_col','TINYINT',10,-1,1,1 +'smallint_col','SMALLINT',10,-1,2,2 +'int_col','INT',10,-1,4,4 +'bigint_col','BIGINT',10,-1,8,8 +'float_col','FLOAT',10,-1,4,4 +'double_col','DOUBLE',10,-1,8,8 +'date_string_col','STRING',736,-1,8,8 +'string_col','STRING',10,-1,1,1 +'timestamp_col','TIMESTAMP',7554,-1,16,16 +'year','INT',2,0,4,4 +'month','INT',12,0,4,4 +'new_col','INT',-1,-1,4,4 +---- TYPES +STRING, STRING, BIGINT, BIGINT, INT, DOUBLE +==== +---- QUERY +# Changing a column shouldn't cause the stats of other columns to be dropped. +# Check that the column's own stats aren't dropped if the type matches once +# HIVE-15075 is resolved. +alter table alltypes change new_col new_col2 int +---- RESULTS +==== +---- QUERY +show column stats alltypes +---- LABELS +COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE +---- RESULTS +'id','INT',7505,-1,4,4 +'bool_col','BOOLEAN',2,-1,1,1 +'tinyint_col','TINYINT',10,-1,1,1 +'smallint_col','SMALLINT',10,-1,2,2 +'int_col','INT',10,-1,4,4 +'bigint_col','BIGINT',10,-1,8,8 +'float_col','FLOAT',10,-1,4,4 +'double_col','DOUBLE',10,-1,8,8 +'date_string_col','STRING',736,-1,8,8 +'string_col','STRING',10,-1,1,1 +'timestamp_col','TIMESTAMP',7554,-1,16,16 +'year','INT',2,0,4,4 +'month','INT',12,0,4,4 +'new_col2','INT',-1,-1,4,4 +---- TYPES +STRING, STRING, BIGINT, BIGINT, INT, DOUBLE +==== +---- QUERY +# Removing a column shouldn't cause the stats to be dropped. +alter table alltypes drop column new_col2 +---- RESULTS +==== +---- QUERY +show column stats alltypes +---- LABELS +COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE +---- RESULTS +'id','INT',7505,-1,4,4 +'bool_col','BOOLEAN',2,-1,1,1 +'tinyint_col','TINYINT',10,-1,1,1 +'smallint_col','SMALLINT',10,-1,2,2 +'int_col','INT',10,-1,4,4 +'bigint_col','BIGINT',10,-1,8,8 +'float_col','FLOAT',10,-1,4,4 +'double_col','DOUBLE',10,-1,8,8 +'date_string_col','STRING',736,-1,8,8 +'string_col','STRING',10,-1,1,1 +'timestamp_col','TIMESTAMP',7554,-1,16,16 +'year','INT',2,0,4,4 +'month','INT',12,0,4,4 +---- TYPES +STRING, STRING, BIGINT, BIGINT, INT, DOUBLE +==== +---- QUERY # drop stats from this table drop stats alltypes ====
