[14/27] hive git commit: HIVE-12808: Logical PPD: Push filter clauses through PTF(Windowing) into TS (Laljo John Pullokkaran via Ashutosh Chauhan)

jdere Fri, 26 Feb 2016 15:16:43 -0800

HIVE-12808: Logical PPD: Push filter clauses through PTF(Windowing) into TS 
(Laljo John Pullokkaran via Ashutosh Chauhan)


Signed-off-by: Ashutosh Chauhan <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e470c7b1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e470c7b1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e470c7b1

Branch: refs/heads/llap
Commit: e470c7b16ccba8c42ea9efda29b2198d4ef3870d
Parents: 2f73233
Author: jpullokk <[email protected]>
Authored: Wed Feb 24 11:50:05 2016 -0800
Committer: Ashutosh Chauhan <[email protected]>
Committed: Wed Feb 24 11:51:32 2016 -0800

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |    2 +
 .../ql/optimizer/calcite/HiveCalciteUtil.java   |  108 +-
 .../rules/HiveFilterProjectTransposeRule.java   |  152 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |    3 +-
 .../queries/clientpositive/ppd_windowing1.q     |   46 +
 .../queries/clientpositive/ppd_windowing2.q     |   49 +
 .../clientpositive/correlationoptimizer12.q.out |   68 +-
 .../results/clientpositive/perf/query70.q.out   |    4 +-
 .../results/clientpositive/ppd_windowing1.q.out | 2029 ++++
 .../results/clientpositive/ppd_windowing2.q.out | 9841 ++++++++++++++++++
 .../results/clientpositive/ptfgroupbyjoin.q.out |   66 +-
 .../clientpositive/spark/subquery_in.q.out      |   15 +-
 .../results/clientpositive/subquery_in.q.out    |   15 +-
 .../subquery_unqualcolumnrefs.q.out             |   30 +-
 .../clientpositive/tez/subquery_in.q.out        |   15 +-
 15 files changed, 12305 insertions(+), 138 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 3a2e0c7..e868500 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1354,6 +1354,8 @@ public class HiveConf extends Configuration {
         "Whether to update stale indexes automatically"),
     HIVEOPTPPD("hive.optimize.ppd", true,
         "Whether to enable predicate pushdown"),
+    HIVEOPTPPD_WINDOWING("hive.optimize.ppd.windowing", true,
+        "Whether to enable predicate pushdown through windowing"),
     HIVEPPDRECOGNIZETRANSITIVITY("hive.ppd.recognizetransivity", true,
         "Whether to transitively replicate predicate filters over equijoin 
conditions."),
     HIVEPPDREMOVEDUPLICATEFILTERS("hive.ppd.remove.duplicatefilters", true,

http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
index 58a7cff..fcdafdd 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
@@ -720,56 +720,108 @@ public class HiveCalciteUtil {
     return deterministic;
   }
 
-  public static boolean isDeterministicFuncOnLiterals(RexNode expr) {
-    boolean deterministicFuncOnLiterals = true;
+  private static class DeterMinisticFuncVisitorImpl extends 
RexVisitorImpl<Void> {
+    protected DeterMinisticFuncVisitorImpl() {
+      super(true);
+    }
 
-    RexVisitor<Void> visitor = new RexVisitorImpl<Void>(true) {
-      @Override
-      public Void visitCall(org.apache.calcite.rex.RexCall call) {
-        if (!call.getOperator().isDeterministic()) {
-          throw new Util.FoundOne(call);
-        }
-        return super.visitCall(call);
+    @Override
+    public Void visitCall(org.apache.calcite.rex.RexCall call) {
+      if (!call.getOperator().isDeterministic()) {
+        throw new Util.FoundOne(call);
       }
+      return super.visitCall(call);
+    }
+
+    @Override
+    public Void visitCorrelVariable(RexCorrelVariable correlVariable) {
+      throw new Util.FoundOne(correlVariable);
+    }
+
+    @Override
+    public Void visitLocalRef(RexLocalRef localRef) {
+      throw new Util.FoundOne(localRef);
+    }
+
+    @Override
+    public Void visitOver(RexOver over) {
+      throw new Util.FoundOne(over);
+    }
+
+    @Override
+    public Void visitDynamicParam(RexDynamicParam dynamicParam) {
+      throw new Util.FoundOne(dynamicParam);
+    }
+
+    @Override
+    public Void visitRangeRef(RexRangeRef rangeRef) {
+      throw new Util.FoundOne(rangeRef);
+    }
+
+    @Override
+    public Void visitFieldAccess(RexFieldAccess fieldAccess) {
+      throw new Util.FoundOne(fieldAccess);
+    }
+  }
+
+  public static boolean isDeterministicFuncOnLiterals(RexNode expr) {
+    boolean deterministicFuncOnLiterals = true;
 
+    RexVisitor<Void> visitor = new DeterMinisticFuncVisitorImpl() {
       @Override
       public Void visitInputRef(RexInputRef inputRef) {
         throw new Util.FoundOne(inputRef);
       }
+    };
 
-      @Override
-      public Void visitLocalRef(RexLocalRef localRef) {
-        throw new Util.FoundOne(localRef);
-      }
+    try {
+      expr.accept(visitor);
+    } catch (Util.FoundOne e) {
+      deterministicFuncOnLiterals = false;
+    }
 
-      @Override
-      public Void visitOver(RexOver over) {
-        throw new Util.FoundOne(over);
-      }
+    return deterministicFuncOnLiterals;
+  }
 
-      @Override
-      public Void visitDynamicParam(RexDynamicParam dynamicParam) {
-        throw new Util.FoundOne(dynamicParam);
+  public List<RexNode> getDeterministicFuncWithSingleInputRef(List<RexNode> 
exprs,
+      final Set<Integer> validInputRefs) {
+    List<RexNode> determExprsWithSingleRef = new ArrayList<RexNode>();
+    for (RexNode e : exprs) {
+      if (isDeterministicFuncWithSingleInputRef(e, validInputRefs)) {
+        determExprsWithSingleRef.add(e);
       }
+    }
+    return determExprsWithSingleRef;
+  }
 
-      @Override
-      public Void visitRangeRef(RexRangeRef rangeRef) {
-        throw new Util.FoundOne(rangeRef);
-      }
+  public static boolean isDeterministicFuncWithSingleInputRef(RexNode expr,
+      final Set<Integer> validInputRefs) {
+    boolean deterministicFuncWithSingleInputRef = true;
+
+    RexVisitor<Void> visitor = new DeterMinisticFuncVisitorImpl() {
+      Set<Integer> inputRefs = new HashSet<Integer>();
 
       @Override
-      public Void visitFieldAccess(RexFieldAccess fieldAccess) {
-        throw new Util.FoundOne(fieldAccess);
+      public Void visitInputRef(RexInputRef inputRef) {
+        if (validInputRefs.contains(inputRef.getIndex())) {
+          inputRefs.add(inputRef.getIndex());
+          if (inputRefs.size() > 1) {
+            throw new Util.FoundOne(inputRef);
+          }
+        } else {
+          throw new Util.FoundOne(inputRef);
+        }
+        return null;
       }
     };
 
     try {
       expr.accept(visitor);
     } catch (Util.FoundOne e) {
-      deterministicFuncOnLiterals = false;
+      deterministicFuncWithSingleInputRef = false;
     }
 
-    return deterministicFuncOnLiterals;
+    return deterministicFuncWithSingleInputRef;
   }
 
   public static <T> ImmutableMap<Integer, T> getColInfoMap(List<T> hiveCols,

http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
index 1e947c3..d43c2c6 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFilterProjectTransposeRule.java
@@ -17,34 +17,54 @@
  */
 package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
 
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
 import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Filter;
 import org.apache.calcite.rel.core.Project;
 import org.apache.calcite.rel.core.RelFactories.FilterFactory;
 import org.apache.calcite.rel.core.RelFactories.ProjectFactory;
 import org.apache.calcite.rel.rules.FilterProjectTransposeRule;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rex.RexCall;
+import org.apache.calcite.rex.RexInputRef;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexOver;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.tools.RelBuilder;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
 
 public class HiveFilterProjectTransposeRule extends FilterProjectTransposeRule 
{
 
+  public static final HiveFilterProjectTransposeRule 
INSTANCE_DETERMINISTIC_WINDOWING =
+          new HiveFilterProjectTransposeRule(Filter.class, 
HiveRelFactories.HIVE_FILTER_FACTORY,
+          HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, true, 
true);
+
   public static final HiveFilterProjectTransposeRule INSTANCE_DETERMINISTIC =
           new HiveFilterProjectTransposeRule(Filter.class, 
HiveRelFactories.HIVE_FILTER_FACTORY,
-          HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, true);
+          HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, true, 
false);
 
   public static final HiveFilterProjectTransposeRule INSTANCE =
           new HiveFilterProjectTransposeRule(Filter.class, 
HiveRelFactories.HIVE_FILTER_FACTORY,
-          HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, false);
+          HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, false, 
false);
 
   private final boolean onlyDeterministic;
 
-  public HiveFilterProjectTransposeRule(Class<? extends Filter> filterClass,
+  private final boolean pushThroughWindowing;
+
+  private HiveFilterProjectTransposeRule(Class<? extends Filter> filterClass,
       FilterFactory filterFactory, Class<? extends Project> projectClass,
-      ProjectFactory projectFactory, boolean onlyDeterministic) {
+      ProjectFactory projectFactory, boolean onlyDeterministic,boolean 
pushThroughWindowing) {
     super(filterClass, filterFactory, projectClass, projectFactory);
     this.onlyDeterministic = onlyDeterministic;
+    this.pushThroughWindowing = pushThroughWindowing;
   }
 
   @Override
@@ -58,4 +78,128 @@ public class HiveFilterProjectTransposeRule extends 
FilterProjectTransposeRule {
     return super.matches(call);
   }
 
+  public void onMatch(RelOptRuleCall call) {
+    final Filter filter = call.rel(0);
+    final Project origproject = call.rel(1);
+    RexNode filterCondToPushBelowProj = filter.getCondition();
+    RexNode unPushedFilCondAboveProj = null;
+
+    if (RexUtil.containsCorrelation(filterCondToPushBelowProj)) {
+      // If there is a correlation condition anywhere in the filter, don't
+      // push this filter past project since in some cases it can prevent a
+      // Correlate from being de-correlated.
+      return;
+    }
+
+    if (RexOver.containsOver(origproject.getProjects(), null)) {
+      RexNode origFilterCond = filterCondToPushBelowProj;
+      filterCondToPushBelowProj = null;
+      if (pushThroughWindowing) {
+        Set<Integer> commonPartitionKeys = 
getCommonPartitionCols(origproject.getProjects());
+        List<RexNode> newPartKeyFilConds = new ArrayList<RexNode>();
+        List<RexNode> unpushedFilConds = new ArrayList<RexNode>();
+
+        // TODO:
+        // 1) Handle compound partition keys (partition by k1+k2)
+        // 2) When multiple window clauses are present in same select Even if
+        // Predicate can not pushed past all of them, we might still able to
+        // push
+        // it below some of them.
+        // Ex: select * from (select key, value, avg(c_int) over (partition by
+        // key), sum(c_float) over(partition by value) from t1)t1 where value <
+        // 10
+        // --> select * from (select key, value, avg(c_int) over (partition by
+        // key) from (select key, value, sum(c_float) over(partition by value)
+        // from t1 where value < 10)t1)t2
+        if (!commonPartitionKeys.isEmpty()) {
+          for (RexNode ce : RelOptUtil.conjunctions(origFilterCond)) {
+            RexNode newCondition = RelOptUtil.pushPastProject(ce, origproject);
+            if 
(HiveCalciteUtil.isDeterministicFuncWithSingleInputRef(newCondition,
+                commonPartitionKeys)) {
+              newPartKeyFilConds.add(newCondition);
+            } else {
+              unpushedFilConds.add(ce);
+            }
+          }
+
+          if (!newPartKeyFilConds.isEmpty()) {
+            filterCondToPushBelowProj = 
RexUtil.composeConjunction(filter.getCluster().getRexBuilder(),
+                    newPartKeyFilConds, true);
+          }
+          if (!unpushedFilConds.isEmpty()) {
+            unPushedFilCondAboveProj = 
RexUtil.composeConjunction(filter.getCluster().getRexBuilder(),
+                    unpushedFilConds, true);
+          }
+        }
+      }
+    }
+
+    if (filterCondToPushBelowProj != null) {
+      RelNode newProjRel = getNewProject(filterCondToPushBelowProj, 
unPushedFilCondAboveProj, origproject, filter.getCluster()
+          .getTypeFactory(), call.builder());
+      call.transformTo(newProjRel);
+    }
+  }
+
+  private static RelNode getNewProject(RexNode filterCondToPushBelowProj, 
RexNode unPushedFilCondAboveProj, Project oldProj,
+      RelDataTypeFactory typeFactory, RelBuilder relBuilder) {
+
+    // convert the filter to one that references the child of the project
+    RexNode newPushedCondition = 
RelOptUtil.pushPastProject(filterCondToPushBelowProj, oldProj);
+
+    // Remove cast of BOOLEAN NOT NULL to BOOLEAN or vice versa. Filter accepts
+    // nullable and not-nullable conditions, but a CAST might get in the way of
+    // other rewrites.
+    if (RexUtil.isNullabilityCast(typeFactory, newPushedCondition)) {
+      newPushedCondition = ((RexCall) newPushedCondition).getOperands().get(0);
+    }
+
+    RelNode newPushedFilterRel = 
relBuilder.push(oldProj.getInput()).filter(newPushedCondition).build();
+
+    RelNode newProjRel = relBuilder.push(newPushedFilterRel)
+        .project(oldProj.getProjects(), 
oldProj.getRowType().getFieldNames()).build();
+
+    if (unPushedFilCondAboveProj != null) {
+      // Remove cast of BOOLEAN NOT NULL to BOOLEAN or vice versa. Filter 
accepts
+      // nullable and not-nullable conditions, but a CAST might get in the way 
of
+      // other rewrites.
+      if (RexUtil.isNullabilityCast(typeFactory, newPushedCondition)) {
+        unPushedFilCondAboveProj = ((RexCall) 
unPushedFilCondAboveProj).getOperands().get(0);
+      }
+      newProjRel = 
relBuilder.push(newProjRel).filter(unPushedFilCondAboveProj).build();
+    }
+
+    return newProjRel;
+  }
+
+  private static Set<Integer> getCommonPartitionCols(List<RexNode> 
projections) {
+    RexOver overClause;
+    boolean firstOverClause = true;
+    Set<Integer> commonPartitionKeys = new HashSet<Integer>();
+
+    for (RexNode expr : projections) {
+      if (expr instanceof RexOver) {
+        overClause = (RexOver) expr;
+
+        if (firstOverClause) {
+          firstOverClause = false;
+          
commonPartitionKeys.addAll(getPartitionCols(overClause.getWindow().partitionKeys));
+        } else {
+          
commonPartitionKeys.retainAll(getPartitionCols(overClause.getWindow().partitionKeys));
+        }
+      }
+    }
+
+    return commonPartitionKeys;
+  }
+
+  private static List<Integer> getPartitionCols(List<RexNode> partitionKeys) {
+    List<Integer> pCols = new ArrayList<Integer>();
+    for (RexNode key : partitionKeys) {
+      if (key instanceof RexInputRef) {
+        pCols.add(((RexInputRef) key).getIndex());
+      }
+    }
+    return pCols;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 809affb..f928a58 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1123,7 +1123,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
       // constant propagation, constant folding
       perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
       basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, 
HepMatchOrder.BOTTOM_UP,
-          HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC,
+          conf.getBoolVar(HiveConf.ConfVars.HIVEOPTPPD_WINDOWING) ? 
HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC_WINDOWING
+              : HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC,
           HiveFilterSetOpTransposeRule.INSTANCE,
           HiveFilterSortTransposeRule.INSTANCE,
           HiveFilterJoinRule.JOIN,

http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/test/queries/clientpositive/ppd_windowing1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/ppd_windowing1.q 
b/ql/src/test/queries/clientpositive/ppd_windowing1.q
new file mode 100644
index 0000000..f380eab
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/ppd_windowing1.q
@@ -0,0 +1,46 @@
+set hive.mapred.mode=nonstrict;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
+
+-- Test simple PPD through Windowing
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key) as c1 from 
src)r1 where key > '2';
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key) as c1 from 
src)r1 where key > 2;
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key) as c1 from 
src)r1 where cast(key as int) > 2;
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key) as c1 from 
src)r1 where (cast(key as int) + 1) > 2;
+EXPLAIN select * from (SELECT (cast(key as int))+2 as key, sum(key) 
over(partition by key) as c1 from src)r1 where (cast(key as int) + 1) > 2;
+
+
+-- Test PPD through Windowing where predicate is a subset of partition keys
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key, value) as 
c1  from src)r1 where key > '2';
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key, value) as 
c1  from src)r1 where key > 2;
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key, value) as 
c1  from src)r1 where cast(key as int) > 2;
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key, value) as 
c1  from src)r1 where (cast(key as int) + 1) > 2;
+EXPLAIN select * from (SELECT (cast(key as int))+2 as key, sum(key) 
over(partition by key, value) as c1  from src)r1 where (cast(key as int) + 1) > 
2;
+
+
+-- Test PPD through Windowing where predicate is a subset of partition keys, 
multiple windows are involved and UDAF is same
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as 
c1, sum(key) over(partition by key) as c2  from src)r1 where key > '2';
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as 
c1, sum(key) over(partition by key) as c2  from src)r1 where key > 2;
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as 
c1, sum(key) over(partition by key) as c2  from src)r1 where (cast(key as int) 
+ 1) > 2;
+EXPLAIN select * from (SELECT (cast(key as int))+2 as key, sum(key) 
over(partition by key,value) as c1, sum(key) over(partition by key) as c2  from 
src)r1 where (cast(key as int) + 1) > 2;
+
+
+-- Test PPD through Windowing where predicate is a subset of partition keys, 
multiple windows are involved and UDAF has different args 
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as 
c1, sum(value) over(partition by key) as c2  from src)r1 where key > '2';
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as 
c1, sum(value) over(partition by key) as c2  from src)r1 where key > 2;
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as 
c1, sum(value) over(partition by key) as c2  from src)r1 where (cast(key as 
int) + 1) > 2;
+EXPLAIN select * from (SELECT (cast(key as int))+2 as key, sum(key) 
over(partition by key,value) as c1, sum(value) over(partition by key) as c2  
from src)r1 where (cast(key as int) + 1) > 2;
+
+
+-- Test predicate is not getting pushed down when multiple windows are 
involved and they don't have common partition keys 
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key,value) as 
c1, avg(value) over(partition by value) as c2  from src)r1 where key > '2';
+
+
+-- Test predicate is not getting pushed down when window has compound 
partition key
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key + 2) as c1 
from src)r1 where key > '2';
+EXPLAIN select * from (SELECT key, sum(key) over(partition by key + value) as 
c1 from src)r1 where key > '2';
+
+-- Test predicate is not getting pushed down when predicate involves more than 
one col 
+EXPLAIN select * from (SELECT key, value, sum(key) over(partition by key, 
value) as c1 from src)r1 where (key + value) > '2';
+EXPLAIN select * from (SELECT key, value, sum(key) over(partition by key + 
value) as c1 from src)r1 where (key + value) > '2';
+EXPLAIN select * from (SELECT (cast(key as int))+(cast(value as int)) as key, 
sum(key) over(partition by key) as c1 from src)r1 where key > 2;

http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/test/queries/clientpositive/ppd_windowing2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/ppd_windowing2.q 
b/ql/src/test/queries/clientpositive/ppd_windowing2.q
new file mode 100644
index 0000000..abfe89e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/ppd_windowing2.q
@@ -0,0 +1,49 @@
+set hive.mapred.mode=nonstrict;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=false;
+
+
+-- NOTE: This is a correctness test. If you regen q.out, regen it with 
optimization turned off
+
+-- Test simple PPD through Windowing
+select * from (SELECT key, sum(key) over(partition by key) as c1 from src)r1 
where key > '2';
+select * from (SELECT key, sum(key) over(partition by key) as c1 from src)r1 
where key > 2;
+select * from (SELECT key, sum(key) over(partition by key) as c1 from src)r1 
where cast(key as int) > 2;
+select * from (SELECT key, sum(key) over(partition by key) as c1 from src)r1 
where (cast(key as int) + 1) > 2;
+select * from (SELECT (cast(key as int))+2 as key, sum(key) over(partition by 
key) as c1 from src)r1 where (cast(key as int) + 1) > 2;
+
+
+-- Test PPD through Windowing where predicate is a subset of partition keys
+select * from (SELECT key, sum(key) over(partition by key, value) as c1  from 
src)r1 where key > '2';
+select * from (SELECT key, sum(key) over(partition by key, value) as c1  from 
src)r1 where key > 2;
+select * from (SELECT key, sum(key) over(partition by key, value) as c1  from 
src)r1 where cast(key as int) > 2;
+select * from (SELECT key, sum(key) over(partition by key, value) as c1  from 
src)r1 where (cast(key as int) + 1) > 2;
+select * from (SELECT (cast(key as int))+2 as key, sum(key) over(partition by 
key, value) as c1  from src)r1 where (cast(key as int) + 1) > 2;
+
+
+-- Test PPD through Windowing where predicate is a subset of partition keys, 
multiple windows are involved and UDAF is same
+select * from (SELECT key, sum(key) over(partition by key,value) as c1, 
sum(key) over(partition by key) as c2  from src)r1 where key > '2';
+select * from (SELECT key, sum(key) over(partition by key,value) as c1, 
sum(key) over(partition by key) as c2  from src)r1 where key > 2;
+select * from (SELECT key, sum(key) over(partition by key,value) as c1, 
sum(key) over(partition by key) as c2  from src)r1 where (cast(key as int) + 1) 
> 2;
+select * from (SELECT (cast(key as int))+2 as key, sum(key) over(partition by 
key,value) as c1, sum(key) over(partition by key) as c2  from src)r1 where 
(cast(key as int) + 1) > 2;
+
+
+-- Test PPD through Windowing where predicate is a subset of partition keys, 
multiple windows are involved and UDAF has different args 
+select * from (SELECT key, sum(key) over(partition by key,value) as c1, 
sum(value) over(partition by key) as c2  from src)r1 where key > '2';
+select * from (SELECT key, sum(key) over(partition by key,value) as c1, 
sum(value) over(partition by key) as c2  from src)r1 where key > 2;
+select * from (SELECT key, sum(key) over(partition by key,value) as c1, 
sum(value) over(partition by key) as c2  from src)r1 where (cast(key as int) + 
1) > 2;
+select * from (SELECT (cast(key as int))+2 as key, sum(key) over(partition by 
key,value) as c1, sum(value) over(partition by key) as c2  from src)r1 where 
(cast(key as int) + 1) > 2;
+
+
+-- Test predicate is not getting pushed down when multiple windows are 
involved and they don't have common partition keys 
+select * from (SELECT key, sum(key) over(partition by key,value) as c1, 
avg(value) over(partition by value) as c2  from src)r1 where key > '2';
+
+
+-- Test predicate is not getting pushed down when window has compound 
partition key
+select * from (SELECT key, sum(key) over(partition by key + 2) as c1 from 
src)r1 where key > '2';
+select * from (SELECT key, sum(key) over(partition by key + value) as c1 from 
src)r1 where key > '2';
+
+-- Test predicate is not getting pushed down when predicate involves more than 
one col 
+select * from (SELECT key, value, sum(key) over(partition by key, value) as c1 
from src)r1 where (key + value) > '2';
+select * from (SELECT key, value, sum(key) over(partition by key + value) as 
c1 from src)r1 where (key + value) > '2';
+select * from (SELECT (cast(key as int))+(cast(value as int)) as key, sum(key) 
over(partition by key) as c1 from src)r1 where key > 2;

http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/test/results/clientpositive/correlationoptimizer12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer12.q.out 
b/ql/src/test/results/clientpositive/correlationoptimizer12.q.out
index 8b8952d..615e47a 100644
--- a/ql/src/test/results/clientpositive/correlationoptimizer12.q.out
+++ b/ql/src/test/results/clientpositive/correlationoptimizer12.q.out
@@ -27,12 +27,15 @@ STAGE PLANS:
           TableScan
             alias: x
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-            Reduce Output Operator
-              key expressions: key (type: string)
-              sort order: +
-              Map-reduce partition columns: key (type: string)
+            Filter Operator
+              predicate: key is not null (type: boolean)
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-              value expressions: value (type: string)
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
+                value expressions: value (type: string)
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
string)
@@ -58,19 +61,16 @@ STAGE PLANS:
                         window function: GenericUDAFCountEvaluator
                         window frame: PRECEDING(MAX)~FOLLOWING(MAX)
             Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-            Filter Operator
-              predicate: _col0 is not null (type: boolean)
+            Select Operator
+              expressions: _col0 (type: string), count_window_0 (type: bigint)
+              outputColumnNames: _col0, _col1
               Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
-              Select Operator
-                expressions: _col0 (type: string), count_window_0 (type: 
bigint)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  table:
-                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-2
     Map Reduce
@@ -112,12 +112,15 @@ STAGE PLANS:
           TableScan
             alias: y
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-            Reduce Output Operator
-              key expressions: key (type: string)
-              sort order: +
-              Map-reduce partition columns: key (type: string)
+            Filter Operator
+              predicate: key is not null (type: boolean)
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-              value expressions: value (type: string)
+              Reduce Output Operator
+                key expressions: key (type: string)
+                sort order: +
+                Map-reduce partition columns: key (type: string)
+                Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
+                value expressions: value (type: string)
       Reduce Operator Tree:
         Select Operator
           expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
string)
@@ -143,19 +146,16 @@ STAGE PLANS:
                         window function: GenericUDAFCountEvaluator
                         window frame: PRECEDING(MAX)~FOLLOWING(MAX)
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-            Filter Operator
-              predicate: _col0 is not null (type: boolean)
+            Select Operator
+              expressions: _col0 (type: string), count_window_0 (type: bigint)
+              outputColumnNames: _col0, _col1
               Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-              Select Operator
-                expressions: _col0 (type: string), count_window_0 (type: 
bigint)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
-                File Output Operator
-                  compressed: false
-                  table:
-                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                      serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/e470c7b1/ql/src/test/results/clientpositive/perf/query70.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query70.q.out 
b/ql/src/test/results/clientpositive/perf/query70.q.out
index 2521e0e..b921346 100644
--- a/ql/src/test/results/clientpositive/perf/query70.q.out
+++ b/ql/src/test/results/clientpositive/perf/query70.q.out
@@ -95,7 +95,7 @@ Stage-0
                                           Select Operator [SEL_32] (rows=6696 
width=1119)
                                             Output:["_col0"]
                                             Filter Operator [FIL_83] 
(rows=6696 width=1119)
-                                              predicate:((rank_window_0 <= 5) 
and _col0 is not null)
+                                              predicate:(rank_window_0 <= 5)
                                               PTF Operator [PTF_31] 
(rows=20088 width=1119)
                                                 Function 
definitions:[{},{"name:":"windowingtablefunction","order 
by:":"_col1(DESC)","partition by:":"_col0"}]
                                                 Select Operator [SEL_30] 
(rows=20088 width=1119)
@@ -143,7 +143,7 @@ Stage-0
                                                                     Select 
Operator [SEL_14] (rows=1704 width=1910)
                                                                       
Output:["_col0","_col1"]
                                                                       Filter 
Operator [FIL_85] (rows=1704 width=1910)
-                                                                        
predicate:s_store_sk is not null
+                                                                        
predicate:(s_store_sk is not null and s_state is not null)
                                                                         
TableScan [TS_12] (rows=1704 width=1910)
                                                                           
default@store,s,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"]

[14/27] hive git commit: HIVE-12808: Logical PPD: Push filter clauses through PTF(Windowing) into TS (Laljo John Pullokkaran via Ashutosh Chauhan)

Reply via email to