HIVE-13269: Simplify comparison expressions using column stats (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/76130a9d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/76130a9d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/76130a9d Branch: refs/heads/java8 Commit: 76130a9d54d773619d3c525789d2e4ae590bfe4f Parents: ba07055 Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Thu May 26 10:07:37 2016 +0100 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Thu May 26 10:07:37 2016 +0100 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 3 + data/conf/perf-reg/hive-site.xml | 5 + .../hive/ql/optimizer/calcite/HiveRexUtil.java | 24 + .../HiveReduceExpressionsWithStatsRule.java | 330 ++++++++++ .../hadoop/hive/ql/parse/CalcitePlanner.java | 4 + .../queries/clientpositive/remove_exprs_stats.q | 55 ++ .../clientpositive/remove_exprs_stats.q.out | 610 +++++++++++++++++++ 7 files changed, 1031 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/76130a9d/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b1f37ff..6a404bd 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1485,6 +1485,9 @@ public class HiveConf extends Configuration { "When hive.optimize.limittranspose is true, this variable specifies the minimal reduction in the\n" + "number of tuples of the outer input of the join or the input of the union that you should get in order to apply the rule."), + HIVE_OPTIMIZE_REDUCE_WITH_STATS("hive.optimize.filter.stats.reduction", false, "Whether to simplify comparison\n" + + "expressions in filter operators using column stats"), + HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME("hive.optimize.skewjoin.compiletime", false, "Whether to create a separate plan for skewed keys for the tables in the join.\n" + "This is based on the skewed keys stored in the metadata. At compile time, the plan is broken\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/76130a9d/data/conf/perf-reg/hive-site.xml ---------------------------------------------------------------------- diff --git a/data/conf/perf-reg/hive-site.xml b/data/conf/perf-reg/hive-site.xml index 9e929fc..012369f 100644 --- a/data/conf/perf-reg/hive-site.xml +++ b/data/conf/perf-reg/hive-site.xml @@ -277,4 +277,9 @@ <value>true</value> </property> +<property> + <name>hive.optimize.filter.stats.reduction</name> + <value>true</value> +</property> + </configuration> http://git-wip-us.apache.org/repos/asf/hive/blob/76130a9d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java index 6933fec..a5dcffb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java @@ -420,6 +420,12 @@ public class HiveRexUtil { private static RexCall invert(RexBuilder rexBuilder, RexCall call) { switch (call.getKind()) { + case EQUALS: + return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, + Lists.reverse(call.getOperands())); + case NOT_EQUALS: + return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, + Lists.reverse(call.getOperands())); case LESS_THAN: return (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.GREATER_THAN, Lists.reverse(call.getOperands())); @@ -469,6 +475,24 @@ public class HiveRexUtil { } } + public static SqlKind invert(SqlKind kind) { + switch (kind) { + case EQUALS: + return SqlKind.EQUALS; + case NOT_EQUALS: + return SqlKind.NOT_EQUALS; + case LESS_THAN: + return SqlKind.GREATER_THAN; + case GREATER_THAN: + return SqlKind.LESS_THAN; + case LESS_THAN_OR_EQUAL: + return SqlKind.GREATER_THAN_OR_EQUAL; + case GREATER_THAN_OR_EQUAL: + return SqlKind.LESS_THAN_OR_EQUAL; + } + return null; + } + public static class ExprSimplifier extends RexShuttle { private final RexBuilder rexBuilder; private final boolean unknownAsFalse; http://git-wip-us.apache.org/repos/asf/hive/blob/76130a9d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java new file mode 100644 index 0000000..ec488fe --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveReduceExpressionsWithStatsRule.java @@ -0,0 +1,330 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.math.BigDecimal; +import java.util.EnumSet; +import java.util.List; +import java.util.Set; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.metadata.RelColumnOrigin; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.Lists; + +/** + * This rule simplifies the condition in Filter operators using the + * column statistics (if available). + * + * For instance, given the following predicate: + * a > 5 + * we can infer that the predicate will evaluate to false if the max + * value for column a is 4. + * + * Currently we support the simplification of =, >=, <=, >, <, and + * IN operations. + */ +public class HiveReduceExpressionsWithStatsRule extends RelOptRule { + + protected static final Logger LOG = LoggerFactory.getLogger( + HiveReduceExpressionsWithStatsRule.class); + + public static final HiveReduceExpressionsWithStatsRule INSTANCE = + new HiveReduceExpressionsWithStatsRule(); + + private static final Set<SqlKind> COMPARISON = EnumSet.of(SqlKind.EQUALS, + SqlKind.GREATER_THAN_OR_EQUAL, + SqlKind.LESS_THAN_OR_EQUAL, + SqlKind.GREATER_THAN, + SqlKind.LESS_THAN); + + private HiveReduceExpressionsWithStatsRule() { + super(operand(Filter.class, operand(RelNode.class, any()))); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + + final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); + final RelMetadataQuery metadataProvider = RelMetadataQuery.instance(); + + // 1. Recompose filter possibly by pulling out common elements from DNF + // expressions + RexNode newFilterCondition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); + + // 2. Reduce filter with stats information + RexReplacer replacer = new RexReplacer(filter, rexBuilder, metadataProvider); + newFilterCondition = replacer.apply(newFilterCondition); + + // 3. Transform if we have created a new filter operator + if (!filter.getCondition().toString().equals(newFilterCondition.toString())) { + Filter newFilter = filter.copy(filter.getTraitSet(), filter.getInput(), newFilterCondition); + call.transformTo(newFilter); + } + + } + + /** + * Replaces expressions with their reductions. Note that we only have to + * look for RexCall, since nothing else is reducible in the first place. + */ + protected static class RexReplacer extends RexShuttle { + private final Filter filterOp; + private final RexBuilder rexBuilder; + private final RelMetadataQuery metadataProvider; + + RexReplacer(Filter filterOp, RexBuilder rexBuilder, RelMetadataQuery metadataProvider) { + this.filterOp = filterOp; + this.rexBuilder = rexBuilder; + this.metadataProvider = metadataProvider; + } + + @Override + public RexNode visitCall(RexCall call) { + if (COMPARISON.contains(call.getOperator().getKind())) { + RexInputRef ref = null; + RexLiteral literal = null; + SqlKind kind = null; + if (call.operands.get(0) instanceof RexInputRef + && call.operands.get(1) instanceof RexLiteral) { + ref = (RexInputRef) call.operands.get(0); + literal = (RexLiteral) call.operands.get(1); + kind = call.getOperator().getKind(); + } else if (call.operands.get(1) instanceof RexInputRef + && call.operands.get(0) instanceof RexLiteral) { + ref = (RexInputRef) call.operands.get(1); + literal = (RexLiteral) call.operands.get(0); + kind = HiveRexUtil.invert(call.getOperator().getKind()); + } + + // Found an expression that we can try to reduce + Number max = null; + Number min = null; + if (ref != null && literal != null && kind != null) { + Pair<Number,Number> maxMin = extractMaxMin(ref); + max = maxMin.left; + min = maxMin.right; + } + + if (max != null && min != null) { + // Stats were available, try to reduce + RexNode reduced = reduceCall(literal, kind, max, min); + if (reduced != null) { + return reduced; + } + } + + // We cannot apply the reduction + return call; + } else if (call.getOperator().getKind() == SqlKind.IN) { + if (call.getOperands().get(0) instanceof RexInputRef) { + // Ref + RexInputRef ref = (RexInputRef) call.getOperands().get(0); + // Found an expression that we can try to reduce + Number max = null; + Number min = null; + if (ref != null) { + Pair<Number,Number> maxMin = extractMaxMin(ref); + max = maxMin.left; + min = maxMin.right; + } + + if (max != null && min != null) { + // Stats were available, try to reduce + List<RexNode> newOperands = Lists.newArrayList(); + newOperands.add(ref); + for (int i = 1; i < call.getOperands().size(); i++) { + RexNode operand = call.getOperands().get(i); + if (operand instanceof RexLiteral) { + RexLiteral literal = (RexLiteral) operand; + RexNode reduced = reduceCall(literal, SqlKind.EQUALS, max, min); + if (reduced != null) { + if (reduced.isAlwaysTrue()) { + return rexBuilder.makeLiteral(true); + } + } else { + newOperands.add(literal); + } + } else { + newOperands.add(operand); + } + } + if (newOperands.size() == 1) { + return rexBuilder.makeLiteral(false); + } + return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands); + } + } else if (call.getOperands().get(0).getKind() == SqlKind.ROW) { + // Struct + RexCall struct = (RexCall) call.getOperands().get(0); + List<RexInputRef> refs = Lists.newArrayList(); + List<Pair<Number,Number>> maxMinStats = Lists.newArrayList(); + for (RexNode operand: struct.getOperands()) { + if (!(operand instanceof RexInputRef)) { + // Cannot simplify, we bail out + return call; + } + RexInputRef ref = (RexInputRef) operand; + refs.add(ref); + maxMinStats.add(extractMaxMin(ref)); + } + + // Try to reduce + List<RexNode> newOperands = Lists.newArrayList(); + newOperands.add(struct); + for (int i = 1; i < call.getOperands().size(); i++) { + RexCall constStruct = (RexCall) call.getOperands().get(i); + boolean allTrue = true; + boolean addOperand = true; + for (int j = 0; i < constStruct.getOperands().size(); j++) { + RexNode operand = constStruct.getOperands().get(j); + if (operand instanceof RexLiteral) { + RexLiteral literal = (RexLiteral) operand; + RexNode reduced = reduceCall(literal, SqlKind.EQUALS, + maxMinStats.get(j).left, maxMinStats.get(j).right); + if (reduced != null) { + if (reduced.isAlwaysFalse()) { + allTrue = false; + addOperand = false; + break; + } + } else { + allTrue = false; + } + } else { + allTrue = false; + } + } + if (allTrue) { + return rexBuilder.makeLiteral(true); + } + if (addOperand) { + newOperands.add(constStruct); + } + } + if (newOperands.size() == 1) { + return rexBuilder.makeLiteral(false); + } + return rexBuilder.makeCall(HiveIn.INSTANCE, newOperands); + } + + // We cannot apply the reduction + return call; + } + + // If we did not reduce, check the children nodes + RexNode node = super.visitCall(call); + if (node != call) { + node = HiveRexUtil.simplify(rexBuilder, node); + } + return node; + } + + private Pair<Number,Number> extractMaxMin(RexInputRef ref) { + Number max = null; + Number min = null; + RelColumnOrigin columnOrigin = this.metadataProvider.getColumnOrigin(filterOp, ref.getIndex()); + if (columnOrigin != null) { + RelOptHiveTable table = (RelOptHiveTable) columnOrigin.getOriginTable(); + if (table != null) { + ColStatistics colStats = + table.getColStat(Lists.newArrayList(columnOrigin.getOriginColumnOrdinal())).get(0); + if (colStats != null && StatsSetupConst.areColumnStatsUptoDate( + table.getHiveTableMD().getParameters(), colStats.getColumnName())) { + Range range = colStats.getRange(); + if (range != null) { + max = range.maxValue; + min = range.minValue; + } + } + } + } + return Pair.<Number,Number>of(max, min); + } + + @SuppressWarnings("unchecked") + private RexNode reduceCall(RexLiteral literal, SqlKind kind, Number max, Number min) { + // Stats were available, try to reduce + if (max != null && min != null) { + BigDecimal maxVal = new BigDecimal(max.floatValue()); + BigDecimal minVal = new BigDecimal(min.floatValue()); + RexLiteral maxLiteral = rexBuilder.makeExactLiteral(maxVal, literal.getType()); + RexLiteral minLiteral = rexBuilder.makeExactLiteral(minVal, literal.getType()); + + // Equals + if (kind == SqlKind.EQUALS) { + if (minLiteral.getValue().compareTo(literal.getValue()) > 0 || + maxLiteral.getValue().compareTo(literal.getValue()) < 0) { + return rexBuilder.makeLiteral(false); + } + } + + // Greater than (or equal), and less than (or equal) + if (kind == SqlKind.GREATER_THAN) { + if (minLiteral.getValue().compareTo(literal.getValue()) > 0) { + return rexBuilder.makeLiteral(true); + } else if (maxLiteral.getValue().compareTo(literal.getValue()) <= 0) { + return rexBuilder.makeLiteral(false); + } + } else if (kind == SqlKind.GREATER_THAN_OR_EQUAL) { + if (minLiteral.getValue().compareTo(literal.getValue()) >= 0) { + return rexBuilder.makeLiteral(true); + } else if (maxLiteral.getValue().compareTo(literal.getValue()) < 0) { + return rexBuilder.makeLiteral(false); + } + } else if (kind == SqlKind.LESS_THAN) { + if (minLiteral.getValue().compareTo(literal.getValue()) >= 0) { + return rexBuilder.makeLiteral(false); + } else if (maxLiteral.getValue().compareTo(literal.getValue()) < 0) { + return rexBuilder.makeLiteral(true); + } + } else if (kind == SqlKind.LESS_THAN_OR_EQUAL) { + if (minLiteral.getValue().compareTo(literal.getValue()) > 0) { + return rexBuilder.makeLiteral(false); + } else if (maxLiteral.getValue().compareTo(literal.getValue()) <= 0) { + return rexBuilder.makeLiteral(true); + } + } + } + return null; + } + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/76130a9d/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 49e65e7..d084552 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -161,6 +161,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectFilterPullUp import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveReduceExpressionsWithStatsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule; @@ -1156,6 +1157,9 @@ public class CalcitePlanner extends SemanticAnalyzer { rules.add(HiveFilterJoinRule.FILTER_ON_JOIN); rules.add(new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class)); rules.add(new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY)); + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_REDUCE_WITH_STATS)) { + rules.add(HiveReduceExpressionsWithStatsRule.INSTANCE); + } rules.add(HiveProjectFilterPullUpConstantsRule.INSTANCE); rules.add(HiveReduceExpressionsRule.PROJECT_INSTANCE); rules.add(HiveReduceExpressionsRule.FILTER_INSTANCE); http://git-wip-us.apache.org/repos/asf/hive/blob/76130a9d/ql/src/test/queries/clientpositive/remove_exprs_stats.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/remove_exprs_stats.q b/ql/src/test/queries/clientpositive/remove_exprs_stats.q new file mode 100644 index 0000000..66e6615 --- /dev/null +++ b/ql/src/test/queries/clientpositive/remove_exprs_stats.q @@ -0,0 +1,55 @@ +set hive.optimize.filter.stats.reduction=true; +set hive.mapred.mode=nonstrict; +set hive.stats.fetch.column.stats=true; + +create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile; + +create table loc_orc like loc_staging; +alter table loc_orc set fileformat orc; + +load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging; + +insert overwrite table loc_orc select * from loc_staging; + +analyze table loc_orc compute statistics for columns state,locid,zip,year; + +-- always true +explain select * from loc_orc where locid < 30; +-- always false +explain select * from loc_orc where locid > 30; +-- always true +explain select * from loc_orc where locid <= 30; +-- always false +explain select * from loc_orc where locid >= 30; + +-- nothing to do +explain select * from loc_orc where locid < 6; +-- always false +explain select * from loc_orc where locid > 6; +-- always true +explain select * from loc_orc where locid <= 6; +-- nothing to do +explain select * from loc_orc where locid >= 6; + +-- always false +explain select * from loc_orc where locid < 1; +-- nothing to do +explain select * from loc_orc where locid > 1; +-- nothing to do +explain select * from loc_orc where locid <= 1; +-- always true +explain select * from loc_orc where locid >= 1; + +-- 5 should stay +explain select * from loc_orc where locid IN (-4,5,30,40); +-- nothing to do +explain select * from loc_orc where locid IN (5,2,3); +-- 1 and 6 should be left +explain select * from loc_orc where locid IN (1,6,9); +-- always false +explain select * from loc_orc where locid IN (40,30); http://git-wip-us.apache.org/repos/asf/hive/blob/76130a9d/ql/src/test/results/clientpositive/remove_exprs_stats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out new file mode 100644 index 0000000..e29fb4e --- /dev/null +++ b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out @@ -0,0 +1,610 @@ +PREHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_staging +POSTHOOK: query: create table if not exists loc_staging ( + state string, + locid int, + zip bigint, + year int +) row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_staging +PREHOOK: query: create table loc_orc like loc_staging +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@loc_orc +POSTHOOK: query: create table loc_orc like loc_staging +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@loc_orc +PREHOOK: query: alter table loc_orc set fileformat orc +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@loc_orc +PREHOOK: Output: default@loc_orc +POSTHOOK: query: alter table loc_orc set fileformat orc +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@loc_orc +POSTHOOK: Output: default@loc_orc +PREHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@loc_staging +POSTHOOK: query: load data local inpath '../../data/files/loc.txt' overwrite into table loc_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@loc_staging +PREHOOK: query: insert overwrite table loc_orc select * from loc_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_staging +PREHOOK: Output: default@loc_orc +POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_staging +POSTHOOK: Output: default@loc_orc +POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] +POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] +POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] +PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +PREHOOK: type: QUERY +PREHOOK: Input: default@loc_orc +#### A masked pattern was here #### +POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year +POSTHOOK: type: QUERY +POSTHOOK: Input: default@loc_orc +#### A masked pattern was here #### +PREHOOK: query: -- always true +explain select * from loc_orc where locid < 30 +PREHOOK: type: QUERY +POSTHOOK: query: -- always true +explain select * from loc_orc where locid < 30 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- always false +explain select * from loc_orc where locid > 30 +PREHOOK: type: QUERY +POSTHOOK: query: -- always false +explain select * from loc_orc where locid > 30 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- always true +explain select * from loc_orc where locid <= 30 +PREHOOK: type: QUERY +POSTHOOK: query: -- always true +explain select * from loc_orc where locid <= 30 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- always false +explain select * from loc_orc where locid >= 30 +PREHOOK: type: QUERY +POSTHOOK: query: -- always false +explain select * from loc_orc where locid >= 30 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- nothing to do +explain select * from loc_orc where locid < 6 +PREHOOK: type: QUERY +POSTHOOK: query: -- nothing to do +explain select * from loc_orc where locid < 6 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid < 6) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- always false +explain select * from loc_orc where locid > 6 +PREHOOK: type: QUERY +POSTHOOK: query: -- always false +explain select * from loc_orc where locid > 6 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- always true +explain select * from loc_orc where locid <= 6 +PREHOOK: type: QUERY +POSTHOOK: query: -- always true +explain select * from loc_orc where locid <= 6 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- nothing to do +explain select * from loc_orc where locid >= 6 +PREHOOK: type: QUERY +POSTHOOK: query: -- nothing to do +explain select * from loc_orc where locid >= 6 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid >= 6) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- always false +explain select * from loc_orc where locid < 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- always false +explain select * from loc_orc where locid < 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- nothing to do +explain select * from loc_orc where locid > 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- nothing to do +explain select * from loc_orc where locid > 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid > 1) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- nothing to do +explain select * from loc_orc where locid <= 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- nothing to do +explain select * from loc_orc where locid <= 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid <= 1) (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- always true +explain select * from loc_orc where locid >= 1 +PREHOOK: type: QUERY +POSTHOOK: query: -- always true +explain select * from loc_orc where locid >= 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: -- 5 should stay +explain select * from loc_orc where locid IN (-4,5,30,40) +PREHOOK: type: QUERY +POSTHOOK: query: -- 5 should stay +explain select * from loc_orc where locid IN (-4,5,30,40) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid) IN (5) (type: boolean) + Statistics: Num rows: 5 Data size: 498 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 498 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 498 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- nothing to do +explain select * from loc_orc where locid IN (5,2,3) +PREHOOK: type: QUERY +POSTHOOK: query: -- nothing to do +explain select * from loc_orc where locid IN (5,2,3) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid) IN (5, 2, 3) (type: boolean) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- 1 and 6 should be left +explain select * from loc_orc where locid IN (1,6,9) +PREHOOK: type: QUERY +POSTHOOK: query: -- 1 and 6 should be left +explain select * from loc_orc where locid IN (1,6,9) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (locid) IN (1, 6) (type: boolean) + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- always false +explain select * from loc_orc where locid IN (40,30) +PREHOOK: type: QUERY +POSTHOOK: query: -- always false +explain select * from loc_orc where locid IN (40,30) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: loc_orc + Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +