IMPALA-4767: Workaround for HIVE-15653 to preserve table stats. HIVE-15653 is a Hive Metastore bug that results in ALTER TABLE commands wiping the table stats of unpartitioned tables.
Until the Hive bug is fixed, this patch adds a workaround to Impala that forces the Metastore to preserve the table stats. Testing: Private core/hdfs run passed. Change-Id: Ic191c765f73624bc716badadd7215c8dca9d6b1f Reviewed-on: http://gerrit.cloudera.org:8080/5731 Reviewed-by: Alex Behm <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/74387300 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/74387300 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/74387300 Branch: refs/heads/master Commit: 743873005225c55240de7181eea7bb438a260ff1 Parents: 6cf3efd Author: Alex Behm <[email protected]> Authored: Tue Jan 17 18:34:08 2017 -0800 Committer: Impala Public Jenkins <[email protected]> Committed: Fri Jan 20 01:18:10 2017 +0000 ---------------------------------------------------------------------- .../impala/service/CatalogOpExecutor.java | 6 ++- .../queries/QueryTest/compute-stats.test | 52 +++++++++++++++++++- 2 files changed, 56 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/74387300/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java index 208ff2b..fce6e07 100644 --- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java +++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java @@ -712,7 +712,7 @@ public class CatalogOpExecutor { msClient.getHiveClient().updateTableColumnStatistics(colStats); } catch (Exception e) { throw new ImpalaRuntimeException(String.format(HMS_RPC_ERROR_FORMAT_STR, - "updateTableColumnStatistics"), e); + "updateTableColumnStatistics"), e); } } // Update the table stats. Apply the table alteration last to ensure the @@ -2622,6 +2622,10 @@ public class CatalogOpExecutor { try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) { lastDdlTime = calculateDdlTime(msTbl); msTbl.putToParameters("transient_lastDdlTime", Long.toString(lastDdlTime)); + // TODO: Remove this workaround for HIVE-15653 to preserve table stats + // during table alterations. + msTbl.putToParameters(StatsSetupConst.STATS_GENERATED_VIA_STATS_TASK, + StatsSetupConst.TRUE); msClient.getHiveClient().alter_table( msTbl.getDbName(), msTbl.getTableName(), msTbl); } catch (TException e) { http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/74387300/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test index b741c5a..a42dedf 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test +++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test @@ -316,7 +316,7 @@ STRING, STRING, BIGINT, BIGINT, INT, DOUBLE drop stats alltypes ==== ---- QUERY -# test computing stats on an partitioned text table with all types +# test computing stats on an unpartitioned text table with all types create table alltypesnopart like functional.alltypesnopart; insert into alltypesnopart select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, @@ -359,6 +359,19 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY +# IMPALA-4767: Test that ALTER TABLE commands preserve table stats. +alter table alltypesnopart set tblproperties('test'='test'); +alter table alltypesnopart set column stats string_col ('numDVs'='10'); +alter table alltypesnopart add columns (new_col int); +show table stats alltypesnopart; +---- LABELS +#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION +---- RESULTS +100,3,'7.73KB','NOT CACHED','NOT CACHED','TEXT','false',regex:.* +---- TYPES +BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING +==== +---- QUERY # test computing stats on a partitioned parquet table with all types create table alltypes_parquet like functional_parquet.alltypes; @@ -427,6 +440,43 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE STRING, STRING, BIGINT, BIGINT, INT, DOUBLE ==== ---- QUERY +# IMPALA-4767: Test that ALTER TABLE commands preserve table stats. +alter table alltypes_parquet set tblproperties('test'='test'); +alter table alltypes_parquet set column stats string_col ('numDVs'='10'); +alter table alltypes_parquet add columns (new_col int); +show table stats alltypes_parquet; +---- LABELS +YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION +---- RESULTS +'2009','1',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2009','2',280,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2009','3',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2009','4',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2009','5',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2009','6',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2009','7',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2009','8',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2009','9',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2009','10',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2009','11',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2009','12',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2010','1',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2010','2',280,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2010','3',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2010','4',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2010','5',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2010','6',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2010','7',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2010','8',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2010','9',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2010','10',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2010','11',300,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'2010','12',310,1,regex:.+KB,'NOT CACHED','NOT CACHED','PARQUET','false',regex:.* +'Total','',7300,24,regex:.+KB,'0B','','','','' +---- TYPES +STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING +==== +---- QUERY # test computing stats on an empty table create table alltypes_empty like functional_rc_snap.alltypes ====
