HIVE-15903: Compute table stats when user computes column stats (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1321293e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1321293e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1321293e Branch: refs/heads/hive-14535 Commit: 1321293e8801e0dacd9b2c0824e673b6410a7fe8 Parents: 0cc1afa Author: Pengcheng Xiong <[email protected]> Authored: Wed Mar 8 13:18:02 2017 -0800 Committer: Pengcheng Xiong <[email protected]> Committed: Wed Mar 8 13:18:02 2017 -0800 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 2 + .../hive/ql/parse/BaseSemanticAnalyzer.java | 19 +- .../hadoop/hive/ql/parse/GenTezUtils.java | 4 + .../hive/ql/parse/ProcessAnalyzeTable.java | 147 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 33 +- .../queries/clientpositive/column_table_stats.q | 88 ++ .../clientpositive/column_table_stats_orc.q | 57 + .../alter_table_invalidate_column_stats.q.out | 16 + .../columnStatsUpdateForStatsOptimizer_1.q.out | 10 + .../llap/column_table_stats.q.out | 1421 ++++++++++++++++++ .../llap/column_table_stats_orc.q.out | 989 ++++++++++++ .../llap/columnstats_part_coltype.q.out | 42 + .../clientpositive/llap/deleteAnalyze.q.out | 4 + .../llap/drop_partition_with_stats.q.out | 80 + .../clientpositive/llap/explainuser_2.q.out | 6 + .../extrapolate_part_stats_partial_ndv.q.out | 24 + .../clientpositive/llap/llap_stats.q.out | 26 + .../clientpositive/llap/llapdecider.q.out | 2 + .../llap/metadata_only_queries.q.out | 14 + .../metadata_only_queries_with_filters.q.out | 8 + .../clientpositive/llap/schema_evol_stats.q.out | 12 + .../llap/schema_evol_text_vec_table.q.out | 2 +- .../llap/special_character_in_tabnames_1.q.out | 16 + .../clientpositive/llap/stats_only_null.q.out | 16 + .../clientpositive/llap/union_remove_26.q.out | 6 + .../llap/vector_outer_join1.q.out | 2 + .../llap/vector_outer_join2.q.out | 2 + .../llap/vector_outer_join3.q.out | 2 + .../llap/vector_outer_join4.q.out | 2 + .../llap/vector_outer_join5.q.out | 8 + .../vectorized_dynamic_semijoin_reduction.q.out | 2 + ...vectorized_dynamic_semijoin_reduction2.q.out | 4 + .../results/clientpositive/perf/query14.q.out | 4 +- .../clientpositive/tez/explainanalyze_3.q.out | 28 +- .../clientpositive/tez/explainanalyze_5.q.out | 28 +- .../clientpositive/tez/explainuser_3.q.out | 38 +- 36 files changed, 3060 insertions(+), 104 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/1321293e/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 9babc77..2a7627a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -474,6 +474,8 @@ minillaplocal.query.files=acid_globallimit.q,\ cbo_rp_unionDistinct_2.q,\ cbo_rp_windowing_2.q,\ cbo_subq_not_in.q,\ + column_table_stats.q,\ + column_table_stats_orc.q,\ constprog_dpp.q,\ current_date_timestamp.q,\ correlationoptimizer1.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/1321293e/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 3e749eb..36009bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -1007,6 +1007,23 @@ public abstract class BaseSemanticAnalyzer { } } + public TableSpec(Table tableHandle, List<Partition> partitions) + throws HiveException { + this.tableHandle = tableHandle; + this.tableName = tableHandle.getTableName(); + if (partitions != null && !partitions.isEmpty()) { + this.specType = SpecType.STATIC_PARTITION; + this.partitions = partitions; + List<FieldSchema> partCols = this.tableHandle.getPartCols(); + this.partSpec = new LinkedHashMap<>(); + for (FieldSchema partCol : partCols) { + partSpec.put(partCol.getName(), null); + } + } else { + this.specType = SpecType.TABLE_ONLY; + } + } + public TableSpec(Hive db, HiveConf conf, ASTNode ast, boolean allowDynamicPartitionsSpec, boolean allowPartialPartitionsSpec) throws SemanticException { assert (ast.getToken().getType() == HiveParser.TOK_TAB @@ -1156,7 +1173,6 @@ public abstract class BaseSemanticAnalyzer { private List<String> colType; private boolean tblLvl; - public String getTableName() { return tableName; } @@ -1188,6 +1204,7 @@ public abstract class BaseSemanticAnalyzer { public void setColType(List<String> colType) { this.colType = colType; } + } /** http://git-wip-us.apache.org/repos/asf/hive/blob/1321293e/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java index 7f5fdff..905431f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java @@ -199,6 +199,10 @@ public class GenTezUtils { // All the setup is done in GenMapRedUtils GenMapRedUtils.setMapWork(mapWork, context.parseContext, context.inputs, partitions, root, alias, context.conf, false); + // we also collect table stats while collecting column stats. + if (context.parseContext.getAnalyzeRewrite() != null) { + mapWork.setGatheringStats(true); + } } // removes any union operator and clones the plan http://git-wip-us.apache.org/repos/asf/hive/blob/1321293e/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java index c13a404..46c24e3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.parse; +import java.io.Serializable; +import java.util.ArrayList; import java.util.List; import java.util.Set; import java.util.Stack; @@ -30,14 +32,17 @@ import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.StatsNoJobWork; import org.apache.hadoop.hive.ql.plan.StatsWork; @@ -65,9 +70,8 @@ public class ProcessAnalyzeTable implements NodeProcessor { @SuppressWarnings("unchecked") @Override - public Object process(Node nd, Stack<Node> stack, - NodeProcessorCtx procContext, Object... nodeOutputs) - throws SemanticException { + public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procContext, + Object... nodeOutputs) throws SemanticException { GenTezProcContext context = (GenTezProcContext) procContext; @@ -79,18 +83,16 @@ public class ProcessAnalyzeTable implements NodeProcessor { if (parseContext.getQueryProperties().isAnalyzeCommand()) { - assert tableScan.getChildOperators() == null - || tableScan.getChildOperators().size() == 0; + assert tableScan.getChildOperators() == null || tableScan.getChildOperators().size() == 0; String alias = null; - for (String a: parseContext.getTopOps().keySet()) { + for (String a : parseContext.getTopOps().keySet()) { if (tableScan == parseContext.getTopOps().get(a)) { alias = a; } } assert alias != null; - TezWork tezWork = context.currentTask.getWork(); if (inputFormat.equals(OrcInputFormat.class)) { // For ORC, all the following statements are the same @@ -99,7 +101,8 @@ public class ProcessAnalyzeTable implements NodeProcessor { // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; // There will not be any Tez job above this task - StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec()); + StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata() + .getTableSpec()); snjWork.setStatsReliable(parseContext.getConf().getBoolVar( HiveConf.ConfVars.HIVE_STATS_RELIABLE)); // If partition is specified, get pruned partition list @@ -107,8 +110,8 @@ public class ProcessAnalyzeTable implements NodeProcessor { if (confirmedParts.size() > 0) { Table source = tableScan.getConf().getTableMetadata(); List<String> partCols = GenMapRedUtils.getPartitionColumns(tableScan); - PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, - partCols, false); + PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, partCols, + false); snjWork.setPrunedPartitionList(partList); } Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseContext.getConf()); @@ -118,52 +121,101 @@ public class ProcessAnalyzeTable implements NodeProcessor { return true; } else { - // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS; - // The plan consists of a simple TezTask followed by a StatsTask. - // The Tez task is just a simple TableScanOperator + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS; + // The plan consists of a simple TezTask followed by a StatsTask. + // The Tez task is just a simple TableScanOperator - StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); - statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); - statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); - statsWork.setSourceTask(context.currentTask); - statsWork.setStatsReliable(parseContext.getConf().getBoolVar(HiveConf.ConfVars.HIVE_STATS_RELIABLE)); - Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseContext.getConf()); - context.currentTask.addDependentTask(statsTask); - - // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; - // The plan consists of a StatsTask only. - if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { - statsTask.setParentTasks(null); - statsWork.setNoScanAnalyzeCommand(true); - context.rootTasks.remove(context.currentTask); - context.rootTasks.add(statsTask); - } + StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + statsWork.setSourceTask(context.currentTask); + statsWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + Task<StatsWork> statsTask = TaskFactory.get(statsWork, parseContext.getConf()); + context.currentTask.addDependentTask(statsTask); - // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; - if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) { - handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask); - } + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan; + // The plan consists of a StatsTask only. + if (parseContext.getQueryProperties().isNoScanAnalyzeCommand()) { + statsTask.setParentTasks(null); + statsWork.setNoScanAnalyzeCommand(true); + context.rootTasks.remove(context.currentTask); + context.rootTasks.add(statsTask); + } - // NOTE: here we should use the new partition predicate pushdown API to get a list of pruned list, - // and pass it to setTaskPlan as the last parameter - Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan); - PrunedPartitionList partitions = null; - if (confirmedPartns.size() > 0) { - Table source = tableScan.getConf().getTableMetadata(); - List<String> partCols = GenMapRedUtils.getPartitionColumns(tableScan); - partitions = new PrunedPartitionList(source, confirmedPartns, partCols, false); - } + // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan; + if (parseContext.getQueryProperties().isPartialScanAnalyzeCommand()) { + handlePartialScanCommand(tableScan, parseContext, statsWork, context, statsTask); + } + + // NOTE: here we should use the new partition predicate pushdown API to + // get a list of pruned list, + // and pass it to setTaskPlan as the last parameter + Set<Partition> confirmedPartns = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan); + PrunedPartitionList partitions = null; + if (confirmedPartns.size() > 0) { + Table source = tableScan.getConf().getTableMetadata(); + List<String> partCols = GenMapRedUtils.getPartitionColumns(tableScan); + partitions = new PrunedPartitionList(source, confirmedPartns, partCols, false); + } - MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions); - w.setGatheringStats(true); + MapWork w = utils.createMapWork(context, tableScan, tezWork, partitions); + w.setGatheringStats(true); - return true; + return true; + } + } else if (parseContext.getAnalyzeRewrite() != null) { + // we need to collect table stats while collecting column stats. + try { + context.currentTask.addDependentTask(genTableStats(context, tableScan)); + } catch (HiveException e) { + throw new SemanticException(e); } } return null; } + private Task<?> genTableStats(GenTezProcContext context, TableScanOperator tableScan) + throws HiveException { + Class<? extends InputFormat> inputFormat = tableScan.getConf().getTableMetadata() + .getInputFormatClass(); + ParseContext parseContext = context.parseContext; + Table table = tableScan.getConf().getTableMetadata(); + List<Partition> partitions = new ArrayList<>(); + if (table.isPartitioned()) { + partitions.addAll(parseContext.getPrunedPartitions(tableScan).getPartitions()); + for (Partition partn : partitions) { + LOG.debug("XXX: adding part: " + partn); + context.outputs.add(new WriteEntity(partn, WriteEntity.WriteType.DDL_NO_LOCK)); + } + } + TableSpec tableSpec = new TableSpec(table, partitions); + tableScan.getConf().getTableMetadata().setTableSpec(tableSpec); + + if (inputFormat.equals(OrcInputFormat.class)) { + // For ORC, there is no Tez Job for table stats. + StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata() + .getTableSpec()); + snjWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + // If partition is specified, get pruned partition list + if (partitions.size() > 0) { + snjWork.setPrunedPartitionList(parseContext.getPrunedPartitions(tableScan)); + } + return TaskFactory.get(snjWork, parseContext.getConf()); + } else { + + StatsWork statsWork = new StatsWork(tableScan.getConf().getTableMetadata().getTableSpec()); + statsWork.setAggKey(tableScan.getConf().getStatsAggPrefix()); + statsWork.setStatsTmpDir(tableScan.getConf().getTmpStatsDir()); + statsWork.setSourceTask(context.currentTask); + statsWork.setStatsReliable(parseContext.getConf().getBoolVar( + HiveConf.ConfVars.HIVE_STATS_RELIABLE)); + return TaskFactory.get(statsWork, parseContext.getConf()); + } + } + /** * handle partial scan command. * @@ -171,11 +223,12 @@ public class ProcessAnalyzeTable implements NodeProcessor { */ private void handlePartialScanCommand(TableScanOperator tableScan, ParseContext parseContext, StatsWork statsWork, GenTezProcContext context, Task<StatsWork> statsTask) - throws SemanticException { + throws SemanticException { String aggregationKey = tableScan.getConf().getStatsAggPrefix(); StringBuilder aggregationKeyBuffer = new StringBuilder(aggregationKey); - List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(tableScan, aggregationKeyBuffer); + List<Path> inputPaths = GenMapRedUtils.getInputPathsForPartialScan(tableScan, + aggregationKeyBuffer); aggregationKey = aggregationKeyBuffer.toString(); // scan work http://git-wip-us.apache.org/repos/asf/hive/blob/1321293e/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index ee9101b..0732207 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -10260,7 +10260,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { RowResolver rwsch) throws SemanticException { - if (!qbp.isAnalyzeCommand()) { + // if it is not analyze command and not column stats, then do not gatherstats + // if it is column stats, but it is not tez, do not gatherstats + if ((!qbp.isAnalyzeCommand() && qbp.getAnalyzeRewrite() == null) + || (qbp.getAnalyzeRewrite() != null && !HiveConf.getVar(conf, + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez"))) { tsDesc.setGatherStats(false); } else { if (HiveConf.getVar(conf, HIVESTATSDBCLASS).equalsIgnoreCase(StatDB.fs.name())) { @@ -10283,15 +10287,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { tsDesc.addVirtualCols(vcList); String tblName = tab.getTableName(); - TableSpec tblSpec = qbp.getTableSpec(alias); - Map<String, String> partSpec = tblSpec.getPartSpec(); - - if (partSpec != null) { - List<String> cols = new ArrayList<String>(); - cols.addAll(partSpec.keySet()); - tsDesc.setPartColumns(cols); - } - // Theoretically the key prefix could be any unique string shared // between TableScanOperator (when publishing) and StatsTask (when aggregating). // Here we use @@ -10300,13 +10295,27 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // Currently, partition spec can only be static partition. String k = MetaStoreUtils.encodeTableName(tblName) + Path.SEPARATOR; tsDesc.setStatsAggPrefix(tab.getDbName()+"."+k); - + // set up WriteEntity for replication outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED)); // add WriteEntity for each matching partition if (tab.isPartitioned()) { - if (partSpec == null) { + List<String> cols = new ArrayList<String>(); + if (qbp.getAnalyzeRewrite() != null) { + List<FieldSchema> partitionCols = tab.getPartCols(); + for (FieldSchema fs : partitionCols) { + cols.add(fs.getName()); + } + tsDesc.setPartColumns(cols); + return; + } + TableSpec tblSpec = qbp.getTableSpec(alias); + Map<String, String> partSpec = tblSpec.getPartSpec(); + if (partSpec != null) { + cols.addAll(partSpec.keySet()); + tsDesc.setPartColumns(cols); + } else { throw new SemanticException(ErrorMsg.NEED_PARTITION_SPECIFICATION.getMsg()); } List<Partition> partitions = qbp.getTableSpec().partitions; http://git-wip-us.apache.org/repos/asf/hive/blob/1321293e/ql/src/test/queries/clientpositive/column_table_stats.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/column_table_stats.q b/ql/src/test/queries/clientpositive/column_table_stats.q new file mode 100644 index 0000000..991fa54 --- /dev/null +++ b/ql/src/test/queries/clientpositive/column_table_stats.q @@ -0,0 +1,88 @@ +set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS + +DROP TABLE IF EXISTS s; + +CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE s; + +desc formatted s; + +explain extended analyze table s compute statistics for columns; + +analyze table s compute statistics for columns; + +desc formatted s; + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11"); + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12"); + + +desc formatted spart; + +explain extended analyze table spart compute statistics for columns; + +analyze table spart compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11"); + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12"); + + +desc formatted spart; + +explain extended analyze table spart partition(ds,hr) compute statistics for columns; + +analyze table spart partition(ds,hr) compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="11"); + +LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" +OVERWRITE INTO TABLE spart PARTITION (ds="2008-04-08", hr="12"); + + +desc formatted spart; + +explain extended analyze table spart partition(hr="11") compute statistics for columns; + +analyze table spart partition(hr="11") compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); http://git-wip-us.apache.org/repos/asf/hive/blob/1321293e/ql/src/test/queries/clientpositive/column_table_stats_orc.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/column_table_stats_orc.q b/ql/src/test/queries/clientpositive/column_table_stats_orc.q new file mode 100644 index 0000000..51fccd2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/column_table_stats_orc.q @@ -0,0 +1,57 @@ +set hive.mapred.mode=nonstrict; +-- SORT_QUERY_RESULTS + +DROP TABLE IF EXISTS s; + +CREATE TABLE s (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS ORC; + +insert into table s values ('1','2'); + +desc formatted s; + +explain extended analyze table s compute statistics for columns; + +analyze table s compute statistics for columns; + +desc formatted s; + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS ORC; + +insert into table spart PARTITION (ds="2008-04-08", hr="12") values ('1','2'); +insert into table spart PARTITION (ds="2008-04-08", hr="11") values ('1','2'); + +desc formatted spart; + +explain extended analyze table spart compute statistics for columns; + +analyze table spart compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); + + +DROP TABLE IF EXISTS spart; + +CREATE TABLE spart (key STRING COMMENT 'default', value STRING COMMENT 'default') +PARTITIONED BY (ds STRING, hr STRING) +STORED AS ORC; + +insert into table spart PARTITION (ds="2008-04-08", hr="12") values ('1','2'); +insert into table spart PARTITION (ds="2008-04-08", hr="11") values ('1','2'); + +desc formatted spart; + +explain extended analyze table spart partition(hr="11") compute statistics for columns; + +analyze table spart partition(hr="11") compute statistics for columns; + +desc formatted spart; + +desc formatted spart PARTITION(ds='2008-04-08', hr=11); +desc formatted spart PARTITION(ds='2008-04-08', hr=12); http://git-wip-us.apache.org/repos/asf/hive/blob/1321293e/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out b/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out index c1c5f62..cf296e3 100644 --- a/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/alter_table_invalidate_column_stats.q.out @@ -77,10 +77,12 @@ PREHOOK: query: analyze table testtable1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable1 POSTHOOK: query: analyze table testtable1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable1 PREHOOK: query: describe formatted statsdb1.testtable1 col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 @@ -202,12 +204,18 @@ PREHOOK: Input: statsdb1@testpart1 PREHOOK: Input: statsdb1@testpart1@part=part1 PREHOOK: Input: statsdb1@testpart1@part=part2 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testpart1 +PREHOOK: Output: statsdb1@testpart1@part=part1 +PREHOOK: Output: statsdb1@testpart1@part=part2 POSTHOOK: query: analyze table testpart1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testpart1 POSTHOOK: Input: statsdb1@testpart1@part=part1 POSTHOOK: Input: statsdb1@testpart1@part=part2 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testpart1 +POSTHOOK: Output: statsdb1@testpart1@part=part1 +POSTHOOK: Output: statsdb1@testpart1@part=part2 PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 @@ -552,10 +560,12 @@ PREHOOK: query: analyze table testtable1 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testtable1 POSTHOOK: query: analyze table testtable1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testtable1 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testtable1 PREHOOK: query: describe formatted statsdb1.testtable1 col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testtable1 @@ -677,12 +687,18 @@ PREHOOK: Input: statsdb1@testpart1 PREHOOK: Input: statsdb1@testpart1@part=part1 PREHOOK: Input: statsdb1@testpart1@part=part2 #### A masked pattern was here #### +PREHOOK: Output: statsdb1@testpart1 +PREHOOK: Output: statsdb1@testpart1@part=part1 +PREHOOK: Output: statsdb1@testpart1@part=part2 POSTHOOK: query: analyze table testpart1 compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: statsdb1@testpart1 POSTHOOK: Input: statsdb1@testpart1@part=part1 POSTHOOK: Input: statsdb1@testpart1@part=part2 #### A masked pattern was here #### +POSTHOOK: Output: statsdb1@testpart1 +POSTHOOK: Output: statsdb1@testpart1@part=part1 +POSTHOOK: Output: statsdb1@testpart1@part=part2 PREHOOK: query: describe formatted statsdb1.testpart1 partition (part = 'part1') col1 PREHOOK: type: DESCTABLE PREHOOK: Input: statsdb1@testpart1 http://git-wip-us.apache.org/repos/asf/hive/blob/1321293e/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out b/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out index 80ccddd..6d941fd 100644 --- a/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out +++ b/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out @@ -189,10 +189,12 @@ POSTHOOK: Input: default@calendar PREHOOK: query: analyze table calendar compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar #### A masked pattern was here #### POSTHOOK: query: analyze table calendar compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar #### A masked pattern was here #### PREHOOK: query: desc formatted calendar PREHOOK: type: DESCTABLE @@ -432,10 +434,12 @@ POSTHOOK: Input: default@calendar PREHOOK: query: analyze table calendar compute statistics for columns year PREHOOK: type: QUERY PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar #### A masked pattern was here #### POSTHOOK: query: analyze table calendar compute statistics for columns year POSTHOOK: type: QUERY POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar #### A masked pattern was here #### PREHOOK: query: desc formatted calendar PREHOOK: type: DESCTABLE @@ -565,10 +569,12 @@ POSTHOOK: Input: default@calendar PREHOOK: query: analyze table calendar compute statistics for columns month PREHOOK: type: QUERY PREHOOK: Input: default@calendar +PREHOOK: Output: default@calendar #### A masked pattern was here #### POSTHOOK: query: analyze table calendar compute statistics for columns month POSTHOOK: type: QUERY POSTHOOK: Input: default@calendar +POSTHOOK: Output: default@calendar #### A masked pattern was here #### PREHOOK: query: desc formatted calendar PREHOOK: type: DESCTABLE @@ -754,11 +760,15 @@ PREHOOK: query: analyze table calendarp partition (p=1) compute statistics for c PREHOOK: type: QUERY PREHOOK: Input: default@calendarp PREHOOK: Input: default@calendarp@p=1 +PREHOOK: Output: default@calendarp +PREHOOK: Output: default@calendarp@p=1 #### A masked pattern was here #### POSTHOOK: query: analyze table calendarp partition (p=1) compute statistics for columns POSTHOOK: type: QUERY POSTHOOK: Input: default@calendarp POSTHOOK: Input: default@calendarp@p=1 +POSTHOOK: Output: default@calendarp +POSTHOOK: Output: default@calendarp@p=1 #### A masked pattern was here #### PREHOOK: query: desc formatted calendarp partition (p=1) PREHOOK: type: DESCTABLE
