HIVE-12808: Logical PPD: Push filter clauses through PTF(Windowing) into TS (Laljo John Pullokkaran via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e470c7b1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e470c7b1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e470c7b1 Branch: refs/heads/llap Commit: e470c7b16ccba8c42ea9efda29b2198d4ef3870d Parents: 2f73233 Author: jpullokk <[email protected]> Authored: Wed Feb 24 11:50:05 2016 -0800 Committer: Ashutosh Chauhan <[email protected]> Committed: Wed Feb 24 11:51:32 2016 -0800 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 2 + .../ql/optimizer/calcite/HiveCalciteUtil.java | 108 +- .../rules/HiveFilterProjectTransposeRule.java | 152 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 3 +- .../queries/clientpositive/ppd_windowing1.q | 46 + .../queries/clientpositive/ppd_windowing2.q | 49 + .../clientpositive/correlationoptimizer12.q.out | 68 +- .../results/clientpositive/perf/query70.q.out | 4 +- .../results/clientpositive/ppd_windowing1.q.out | 2029 ++++ .../results/clientpositive/ppd_windowing2.q.out | 9841 ++++++++++++++++++ .../results/clientpositive/ptfgroupbyjoin.q.out | 66 +- .../clientpositive/spark/subquery_in.q.out | 15 +- .../results/clientpositive/subquery_in.q.out | 15 +- .../subquery_unqualcolumnrefs.q.out | 30 +- .../clientpositive/tez/subquery_in.q.out | 15 +- 15 files changed, 12305 insertions(+), 138 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3a2e0c7..e868500 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1354,6 +1354,8 @@ public class HiveConf extends Configuration { "Whether to update stale indexes automatically"), HIVEOPTPPD("hive.optimize.ppd", true, "Whether to enable predicate pushdown"), + HIVEOPTPPD_WINDOWING("hive.optimize.ppd.windowing", true, + "Whether to enable predicate pushdown through windowing"), HIVEPPDRECOGNIZETRANSITIVITY("hive.ppd.recognizetransivity", true, "Whether to transitively replicate predicate filters over equijoin conditions."), HIVEPPDREMOVEDUPLICATEFILTERS("hive.ppd.remove.duplicatefilters", true, http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 58a7cff..fcdafdd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -720,56 +720,108 @@ public class HiveCalciteUtil { return deterministic; } - public static boolean isDeterministicFuncOnLiterals(RexNode expr) { - boolean deterministicFuncOnLiterals = true; + private static class DeterMinisticFuncVisitorImpl extends RexVisitorImpl<Void> { + protected DeterMinisticFuncVisitorImpl() { + super(true); + } - RexVisitor<Void> visitor = new RexVisitorImpl<Void>(true) { - @Override - public Void visitCall(org.apache.calcite.rex.RexCall call) { - if (!call.getOperator().isDeterministic()) { - throw new Util.FoundOne(call); - } - return super.visitCall(call); + @Override + public Void visitCall(org.apache.calcite.rex.RexCall call) { + if (!call.getOperator().isDeterministic()) { + throw new Util.FoundOne(call); } + return super.visitCall(call); + } + + @Override + public Void visitCorrelVariable(RexCorrelVariable correlVariable) { + throw new Util.FoundOne(correlVariable); + } + + @Override + public Void visitLocalRef(RexLocalRef localRef) { + throw new Util.FoundOne(localRef); + } + + @Override + public Void visitOver(RexOver over) { + throw new Util.FoundOne(over); + } + + @Override + public Void visitDynamicParam(RexDynamicParam dynamicParam) { + throw new Util.FoundOne(dynamicParam); + } + + @Override + public Void visitRangeRef(RexRangeRef rangeRef) { + throw new Util.FoundOne(rangeRef); + } + + @Override + public Void visitFieldAccess(RexFieldAccess fieldAccess) { + throw new Util.FoundOne(fieldAccess); + } + } + + public static boolean isDeterministicFuncOnLiterals(RexNode expr) { + boolean deterministicFuncOnLiterals = true; + RexVisitor<Void> visitor = new DeterMinisticFuncVisitorImpl() { @Override public Void visitInputRef(RexInputRef inputRef) { throw new Util.FoundOne(inputRef); } + }; - @Override - public Void visitLocalRef(RexLocalRef localRef) { - throw new Util.FoundOne(localRef); - } + try { + expr.accept(visitor); + } catch (Util.FoundOne e) { + deterministicFuncOnLiterals = false; + } - @Override - public Void visitOver(RexOver over) { - throw new Util.FoundOne(over); - } + return deterministicFuncOnLiterals; + } - @Override - public Void visitDynamicParam(RexDynamicParam dynamicParam) { - throw new Util.FoundOne(dynamicParam); + public List<RexNode> getDeterministicFuncWithSingleInputRef(List<RexNode> exprs, + final Set<Integer> validInputRefs) { + List<RexNode> determExprsWithSingleRef = new ArrayList<RexNode>(); + for (RexNode e : exprs) { + if (isDeterministicFuncWithSingleInputRef(e, validInputRefs)) { + determExprsWithSingleRef.add(e); } + } + return determExprsWithSingleRef; + } - @Override - public Void visitRangeRef(RexRangeRef rangeRef) { - throw new Util.FoundOne(rangeRef); - } + public static boolean isDeterministicFuncWithSingleInputRef(RexNode expr, + final Set<Integer> validInputRefs) { + boolean deterministicFuncWithSingleInputRef = true; + + RexVisitor<Void> visitor = new DeterMinisticFuncVisitorImpl() { + Set<Integer> inputRefs = new HashSet<Integer>(); @Override - public Void visitFieldAccess(RexFieldAccess fieldAccess) { - throw new Util.FoundOne(fieldAccess); + public Void visitInputRef(RexInputRef inputRef) { + if (validInputRefs.contains(inputRef.getIndex())) { + inputRefs.add(inputRef.getIndex()); + if (inputRefs.size() > 1) { + throw new Util.FoundOne(inputRef); + } + } else { + throw new Util.FoundOne(inputRef); + } + return null; } }; try { expr.accept(visitor); } catch (Util.FoundOne e) { - deterministicFuncOnLiterals = false; + deterministicFuncWithSingleInputRef = false; } - return deterministicFuncOnLiterals; + return deterministicFuncWithSingleInputRef; } public static <T> ImmutableMap<Integer, T> getColInfoMap(List<T> hiveCols, http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java index 1e947c3..d43c2c6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java @@ -17,34 +17,54 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.RelFactories.FilterFactory; import org.apache.calcite.rel.core.RelFactories.ProjectFactory; import org.apache.calcite.rel.rules.FilterProjectTransposeRule; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.tools.RelBuilder; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; public class HiveFilterProjectTransposeRule extends FilterProjectTransposeRule { + public static final HiveFilterProjectTransposeRule INSTANCE_DETERMINISTIC_WINDOWING = + new HiveFilterProjectTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, + HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, true, true); + public static final HiveFilterProjectTransposeRule INSTANCE_DETERMINISTIC = new HiveFilterProjectTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, - HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, true); + HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, true, false); public static final HiveFilterProjectTransposeRule INSTANCE = new HiveFilterProjectTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, - HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, false); + HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, false, false); private final boolean onlyDeterministic; - public HiveFilterProjectTransposeRule(Class<? extends Filter> filterClass, + private final boolean pushThroughWindowing; + + private HiveFilterProjectTransposeRule(Class<? extends Filter> filterClass, FilterFactory filterFactory, Class<? extends Project> projectClass, - ProjectFactory projectFactory, boolean onlyDeterministic) { + ProjectFactory projectFactory, boolean onlyDeterministic,boolean pushThroughWindowing) { super(filterClass, filterFactory, projectClass, projectFactory); this.onlyDeterministic = onlyDeterministic; + this.pushThroughWindowing = pushThroughWindowing; } @Override @@ -58,4 +78,128 @@ public class HiveFilterProjectTransposeRule extends FilterProjectTransposeRule { return super.matches(call); } + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + final Project origproject = call.rel(1); + RexNode filterCondToPushBelowProj = filter.getCondition(); + RexNode unPushedFilCondAboveProj = null; + + if (RexUtil.containsCorrelation(filterCondToPushBelowProj)) { + // If there is a correlation condition anywhere in the filter, don't + // push this filter past project since in some cases it can prevent a + // Correlate from being de-correlated. + return; + } + + if (RexOver.containsOver(origproject.getProjects(), null)) { + RexNode origFilterCond = filterCondToPushBelowProj; + filterCondToPushBelowProj = null; + if (pushThroughWindowing) { + Set<Integer> commonPartitionKeys = getCommonPartitionCols(origproject.getProjects()); + List<RexNode> newPartKeyFilConds = new ArrayList<RexNode>(); + List<RexNode> unpushedFilConds = new ArrayList<RexNode>(); + + // TODO: + // 1) Handle compound partition keys (partition by k1+k2) + // 2) When multiple window clauses are present in same select Even if + // Predicate can not pushed past all of them, we might still able to + // push + // it below some of them. + // Ex: select * from (select key, value, avg(c_int) over (partition by + // key), sum(c_float) over(partition by value) from t1)t1 where value < + // 10 + // --> select * from (select key, value, avg(c_int) over (partition by + // key) from (select key, value, sum(c_float) over(partition by value) + // from t1 where value < 10)t1)t2 + if (!commonPartitionKeys.isEmpty()) { + for (RexNode ce : RelOptUtil.conjunctions(origFilterCond)) { + RexNode newCondition = RelOptUtil.pushPastProject(ce, origproject); + if (HiveCalciteUtil.isDeterministicFuncWithSingleInputRef(newCondition, + commonPartitionKeys)) { + newPartKeyFilConds.add(newCondition); + } else { + unpushedFilConds.add(ce); + } + } + + if (!newPartKeyFilConds.isEmpty()) { + filterCondToPushBelowProj = RexUtil.composeConjunction(filter.getCluster().getRexBuilder(), + newPartKeyFilConds, true); + } + if (!unpushedFilConds.isEmpty()) { + unPushedFilCondAboveProj = RexUtil.composeConjunction(filter.getCluster().getRexBuilder(), + unpushedFilConds, true); + } + } + } + } + + if (filterCondToPushBelowProj != null) { + RelNode newProjRel = getNewProject(filterCondToPushBelowProj, unPushedFilCondAboveProj, origproject, filter.getCluster() + .getTypeFactory(), call.builder()); + call.transformTo(newProjRel); + } + } + + private static RelNode getNewProject(RexNode filterCondToPushBelowProj, RexNode unPushedFilCondAboveProj, Project oldProj, + RelDataTypeFactory typeFactory, RelBuilder relBuilder) { + + // convert the filter to one that references the child of the project + RexNode newPushedCondition = RelOptUtil.pushPastProject(filterCondToPushBelowProj, oldProj); + + // Remove cast of BOOLEAN NOT NULL to BOOLEAN or vice versa. Filter accepts + // nullable and not-nullable conditions, but a CAST might get in the way of + // other rewrites. + if (RexUtil.isNullabilityCast(typeFactory, newPushedCondition)) { + newPushedCondition = ((RexCall) newPushedCondition).getOperands().get(0); + } + + RelNode newPushedFilterRel = relBuilder.push(oldProj.getInput()).filter(newPushedCondition).build(); + + RelNode newProjRel = relBuilder.push(newPushedFilterRel) + .project(oldProj.getProjects(), oldProj.getRowType().getFieldNames()).build(); + + if (unPushedFilCondAboveProj != null) { + // Remove cast of BOOLEAN NOT NULL to BOOLEAN or vice versa. Filter accepts + // nullable and not-nullable conditions, but a CAST might get in the way of + // other rewrites. + if (RexUtil.isNullabilityCast(typeFactory, newPushedCondition)) { + unPushedFilCondAboveProj = ((RexCall) unPushedFilCondAboveProj).getOperands().get(0); + } + newProjRel = relBuilder.push(newProjRel).filter(unPushedFilCondAboveProj).build(); + } + + return newProjRel; + } + + private static Set<Integer> getCommonPartitionCols(List<RexNode> projections) { + RexOver overClause; + boolean firstOverClause = true; + Set<Integer> commonPartitionKeys = new HashSet<Integer>(); + + for (RexNode expr : projections) { + if (expr instanceof RexOver) { + overClause = (RexOver) expr; + + if (firstOverClause) { + firstOverClause = false; + commonPartitionKeys.addAll(getPartitionCols(overClause.getWindow().partitionKeys)); + } else { + commonPartitionKeys.retainAll(getPartitionCols(overClause.getWindow().partitionKeys)); + } + } + } + + return commonPartitionKeys; + } + + private static List<Integer> getPartitionCols(List<RexNode> partitionKeys) { + List<Integer> pCols = new ArrayList<Integer>(); + for (RexNode key : partitionKeys) { + if (key instanceof RexInputRef) { + pCols.add(((RexInputRef) key).getIndex()); + } + } + return pCols; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 809affb..f928a58 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1123,7 +1123,8 @@ public class CalcitePlanner extends SemanticAnalyzer { // constant propagation, constant folding perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, - HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, + conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING) ? HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING + : HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, HiveFilterSetOpTransposeRule.INSTANCE, HiveFilterSortTransposeRule.INSTANCE, HiveFilterJoinRule.JOIN, http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/test/queries/clientpositive/ppd_windowing1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/ppd_windowing1.q b/ql/src/test/queries/clientpositive/ppd_windowing1.q new file mode 100644 index 0000000..f380eab --- /dev/null +++ b/ql/src/test/queries/clientpositive/ppd_windowing1.q @@ -0,0 +1,46 @@ +set hive.mapred.mode=nonstrict; +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=false; + +-- Test simple PPD through Windowing +EXPLAIN select * from (SELECT key, sum(key) over(partition by key) as c1 from src)r1 where key > '2'; +EXPLAIN select * from (SELECT key, sum(key) over(partition by key) as c1 from src)r1 where key > 2; +EXPLAIN select * from (SELECT key, sum(key) over(partition by key) as c1 from src)r1 where cast(key as int) > 2; +EXPLAIN select * from (SELECT key, sum(key) over(partition by key) as c1 from src)r1 where (cast(key as int) + 1) > 2; +EXPLAIN select * from (SELECT (cast(key as int))+2 as key, sum(key) over(partition by key) as c1 from src)r1 where (cast(key as int) + 1) > 2; + + +-- Test PPD through Windowing where predicate is a subset of partition keys +EXPLAIN select * from (SELECT key, sum(key) over(partition by key, value) as c1 from src)r1 where key > '2'; +EXPLAIN select * from (SELECT key, sum(key) over(partition by key, value) as c1 from src)r1 where key > 2; +EXPLAIN select * from (SELECT key, sum(key) over(partition by key, value) as c1 from src)r1 where cast(key as int) > 2; +EXPLAIN select * from (SELECT key, sum(key) over(partition by key, value) as c1 from src)r1 where (cast(key as int) + 1) > 2; +EXPLAIN select * from (SELECT (cast(key as int))+2 as key, sum(key) over(partition by key, value) as c1 from src)r1 where (cast(key as int) + 1) > 2; + + +-- Test PPD through Windowing where predicate is a subset of partition keys, multiple windows are involved and UDAF is same +EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as c1, sum(key) over(partition by key) as c2 from src)r1 where key > '2'; +EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as c1, sum(key) over(partition by key) as c2 from src)r1 where key > 2; +EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as c1, sum(key) over(partition by key) as c2 from src)r1 where (cast(key as int) + 1) > 2; +EXPLAIN select * from (SELECT (cast(key as int))+2 as key, sum(key) over(partition by key,value) as c1, sum(key) over(partition by key) as c2 from src)r1 where (cast(key as int) + 1) > 2; + + +-- Test PPD through Windowing where predicate is a subset of partition keys, multiple windows are involved and UDAF has different args +EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as c1, sum(value) over(partition by key) as c2 from src)r1 where key > '2'; +EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as c1, sum(value) over(partition by key) as c2 from src)r1 where key > 2; +EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as c1, sum(value) over(partition by key) as c2 from src)r1 where (cast(key as int) + 1) > 2; +EXPLAIN select * from (SELECT (cast(key as int))+2 as key, sum(key) over(partition by key,value) as c1, sum(value) over(partition by key) as c2 from src)r1 where (cast(key as int) + 1) > 2; + + +-- Test predicate is not getting pushed down when multiple windows are involved and they don't have common partition keys +EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as c1, avg(value) over(partition by value) as c2 from src)r1 where key > '2'; + + +-- Test predicate is not getting pushed down when window has compound partition key +EXPLAIN select * from (SELECT key, sum(key) over(partition by key + 2) as c1 from src)r1 where key > '2'; +EXPLAIN select * from (SELECT key, sum(key) over(partition by key + value) as c1 from src)r1 where key > '2'; + +-- Test predicate is not getting pushed down when predicate involves more than one col +EXPLAIN select * from (SELECT key, value, sum(key) over(partition by key, value) as c1 from src)r1 where (key + value) > '2'; +EXPLAIN select * from (SELECT key, value, sum(key) over(partition by key + value) as c1 from src)r1 where (key + value) > '2'; +EXPLAIN select * from (SELECT (cast(key as int))+(cast(value as int)) as key, sum(key) over(partition by key) as c1 from src)r1 where key > 2; http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/test/queries/clientpositive/ppd_windowing2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/ppd_windowing2.q b/ql/src/test/queries/clientpositive/ppd_windowing2.q new file mode 100644 index 0000000..abfe89e --- /dev/null +++ b/ql/src/test/queries/clientpositive/ppd_windowing2.q @@ -0,0 +1,49 @@ +set hive.mapred.mode=nonstrict; +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=false; + + +-- NOTE: This is a correctness test. If you regen q.out, regen it with optimization turned off + +-- Test simple PPD through Windowing +select * from (SELECT key, sum(key) over(partition by key) as c1 from src)r1 where key > '2'; +select * from (SELECT key, sum(key) over(partition by key) as c1 from src)r1 where key > 2; +select * from (SELECT key, sum(key) over(partition by key) as c1 from src)r1 where cast(key as int) > 2; +select * from (SELECT key, sum(key) over(partition by key) as c1 from src)r1 where (cast(key as int) + 1) > 2; +select * from (SELECT (cast(key as int))+2 as key, sum(key) over(partition by key) as c1 from src)r1 where (cast(key as int) + 1) > 2; + + +-- Test PPD through Windowing where predicate is a subset of partition keys +select * from (SELECT key, sum(key) over(partition by key, value) as c1 from src)r1 where key > '2'; +select * from (SELECT key, sum(key) over(partition by key, value) as c1 from src)r1 where key > 2; +select * from (SELECT key, sum(key) over(partition by key, value) as c1 from src)r1 where cast(key as int) > 2; +select * from (SELECT key, sum(key) over(partition by key, value) as c1 from src)r1 where (cast(key as int) + 1) > 2; +select * from (SELECT (cast(key as int))+2 as key, sum(key) over(partition by key, value) as c1 from src)r1 where (cast(key as int) + 1) > 2; + + +-- Test PPD through Windowing where predicate is a subset of partition keys, multiple windows are involved and UDAF is same +select * from (SELECT key, sum(key) over(partition by key,value) as c1, sum(key) over(partition by key) as c2 from src)r1 where key > '2'; +select * from (SELECT key, sum(key) over(partition by key,value) as c1, sum(key) over(partition by key) as c2 from src)r1 where key > 2; +select * from (SELECT key, sum(key) over(partition by key,value) as c1, sum(key) over(partition by key) as c2 from src)r1 where (cast(key as int) + 1) > 2; +select * from (SELECT (cast(key as int))+2 as key, sum(key) over(partition by key,value) as c1, sum(key) over(partition by key) as c2 from src)r1 where (cast(key as int) + 1) > 2; + + +-- Test PPD through Windowing where predicate is a subset of partition keys, multiple windows are involved and UDAF has different args +select * from (SELECT key, sum(key) over(partition by key,value) as c1, sum(value) over(partition by key) as c2 from src)r1 where key > '2'; +select * from (SELECT key, sum(key) over(partition by key,value) as c1, sum(value) over(partition by key) as c2 from src)r1 where key > 2; +select * from (SELECT key, sum(key) over(partition by key,value) as c1, sum(value) over(partition by key) as c2 from src)r1 where (cast(key as int) + 1) > 2; +select * from (SELECT (cast(key as int))+2 as key, sum(key) over(partition by key,value) as c1, sum(value) over(partition by key) as c2 from src)r1 where (cast(key as int) + 1) > 2; + + +-- Test predicate is not getting pushed down when multiple windows are involved and they don't have common partition keys +select * from (SELECT key, sum(key) over(partition by key,value) as c1, avg(value) over(partition by value) as c2 from src)r1 where key > '2'; + + +-- Test predicate is not getting pushed down when window has compound partition key +select * from (SELECT key, sum(key) over(partition by key + 2) as c1 from src)r1 where key > '2'; +select * from (SELECT key, sum(key) over(partition by key + value) as c1 from src)r1 where key > '2'; + +-- Test predicate is not getting pushed down when predicate involves more than one col +select * from (SELECT key, value, sum(key) over(partition by key, value) as c1 from src)r1 where (key + value) > '2'; +select * from (SELECT key, value, sum(key) over(partition by key + value) as c1 from src)r1 where (key + value) > '2'; +select * from (SELECT (cast(key as int))+(cast(value as int)) as key, sum(key) over(partition by key) as c1 from src)r1 where key > 2; http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/test/results/clientpositive/correlationoptimizer12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer12.q.out b/ql/src/test/results/clientpositive/correlationoptimizer12.q.out index 8b8952d..615e47a 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer12.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer12.q.out @@ -27,12 +27,15 @@ STAGE PLANS: TableScan alias: x Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) @@ -58,19 +61,16 @@ STAGE PLANS: window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Select Operator + expressions: _col0 (type: string), count_window_0 (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), count_window_0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -112,12 +112,15 @@ STAGE PLANS: TableScan alias: y Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) @@ -143,19 +146,16 @@ STAGE PLANS: window function: GenericUDAFCountEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Select Operator + expressions: _col0 (type: string), count_window_0 (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), count_window_0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/test/results/clientpositive/perf/query70.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/perf/query70.q.out b/ql/src/test/results/clientpositive/perf/query70.q.out index 2521e0e..b921346 100644 --- a/ql/src/test/results/clientpositive/perf/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/query70.q.out @@ -95,7 +95,7 @@ Stage-0 Select Operator [SEL_32] (rows=6696 width=1119) Output:["_col0"] Filter Operator [FIL_83] (rows=6696 width=1119) - predicate:((rank_window_0 <= 5) and _col0 is not null) + predicate:(rank_window_0 <= 5) PTF Operator [PTF_31] (rows=20088 width=1119) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1(DESC)","partition by:":"_col0"}] Select Operator [SEL_30] (rows=20088 width=1119) @@ -143,7 +143,7 @@ Stage-0 Select Operator [SEL_14] (rows=1704 width=1910) Output:["_col0","_col1"] Filter Operator [FIL_85] (rows=1704 width=1910) - predicate:s_store_sk is not null + predicate:(s_store_sk is not null and s_state is not null) TableScan [TS_12] (rows=1704 width=1910) default@store,s,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"]
