IMPALA-3574: Handle runtime filters with TupleIsNull predicates This commit fixes an issue where an IllegalStateException is thrown while generating runtime filters if a target expr of a join conjunct is wrapped in a IF(TupleIsNull, NULL, e) expr. As this is not a valid expr to be assigned to a scan node (target of a runtime filter), we unwrap these exprs and replace exprs of the form IF(TupleIsNull, NULL, e) with 'e' while producing the targer exprs for runtime filters. The original expr of the join conjunct is not modified.
Change-Id: I2e3e207b4c8522283a1cd0d14be83d42eba58f5a Reviewed-on: http://gerrit.cloudera.org:8080/3147 Reviewed-by: Dimitris Tsirogiannis <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/fa30a0c8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/fa30a0c8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/fa30a0c8 Branch: refs/heads/master Commit: fa30a0c818c0a9a6dbacb25d9e55bd24fb0c9b93 Parents: 49610e2 Author: Dimitris Tsirogiannis <[email protected]> Authored: Thu May 19 15:20:51 2016 -0700 Committer: Tim Armstrong <[email protected]> Committed: Mon May 23 08:40:20 2016 -0700 ---------------------------------------------------------------------- .../impala/analysis/TupleIsNullPredicate.java | 20 ++++++++++++ .../impala/planner/RuntimeFilterGenerator.java | 4 +++ .../PlannerTest/runtime-filter-propagation.test | 33 ++++++++++++++++++++ 3 files changed, 57 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fa30a0c8/fe/src/main/java/com/cloudera/impala/analysis/TupleIsNullPredicate.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TupleIsNullPredicate.java b/fe/src/main/java/com/cloudera/impala/analysis/TupleIsNullPredicate.java index 2267324..a7832a2 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/TupleIsNullPredicate.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/TupleIsNullPredicate.java @@ -157,6 +157,26 @@ public class TupleIsNullPredicate extends Predicate { return analyzer.isTrueWithNullSlots(isNotNullLiteralPred); } + /** + * Recursive function that replaces all 'IF(TupleIsNull(), NULL, e)' exprs in + * 'expr' with e and returns the modified expr. + */ + public static Expr unwrapExpr(Expr expr) { + if (expr instanceof FunctionCallExpr) { + FunctionCallExpr fnCallExpr = (FunctionCallExpr) expr; + List<Expr> params = fnCallExpr.getParams().exprs(); + if (fnCallExpr.getFnName().getFunction().equals("if") && + params.get(0) instanceof TupleIsNullPredicate && + params.get(1) instanceof NullLiteral) { + return unwrapExpr(params.get(2)); + } + } + for (int i = 0; i < expr.getChildren().size(); ++i) { + expr.setChild(i, unwrapExpr(expr.getChild(i))); + } + return expr; + } + @Override public Expr clone() { return new TupleIsNullPredicate(this); } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fa30a0c8/fe/src/main/java/com/cloudera/impala/planner/RuntimeFilterGenerator.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/com/cloudera/impala/planner/RuntimeFilterGenerator.java b/fe/src/main/java/com/cloudera/impala/planner/RuntimeFilterGenerator.java index 8bdd126..d090b08 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/RuntimeFilterGenerator.java +++ b/fe/src/main/java/com/cloudera/impala/planner/RuntimeFilterGenerator.java @@ -31,6 +31,7 @@ import com.cloudera.impala.analysis.SlotId; import com.cloudera.impala.analysis.SlotRef; import com.cloudera.impala.analysis.TupleDescriptor; import com.cloudera.impala.analysis.TupleId; +import com.cloudera.impala.analysis.TupleIsNullPredicate; import com.cloudera.impala.catalog.Table; import com.cloudera.impala.common.IdGenerator; import com.cloudera.impala.planner.PlanNode; @@ -238,6 +239,9 @@ public final class RuntimeFilterGenerator { Preconditions.checkNotNull(targetSlots); if (targetSlots.isEmpty()) return null; + // Ensure that the targer expr does not contain TupleIsNull predicates as these + // can't be evaluated at a scan node. + targetExpr = TupleIsNullPredicate.unwrapExpr(targetExpr.clone()); LOG.trace("Generating runtime filter from predicate " + joinPredicate); return new RuntimeFilter(idGen.getNextId(), filterSrcNode, srcExpr, targetExpr, targetSlots); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/fa30a0c8/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test index 43dee70..8e2e142 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/runtime-filter-propagation.test @@ -1085,3 +1085,36 @@ select straight_join 1 from functional.alltypestiny a1 partitions=4/4 files=4 size=460B runtime filters: RF001 -> a1.int_col ==== +# IMPALA-3574: Runtime filter generated from a targer expr that contains a TupleIsNull +# predicate. +select distinct t1.int_col +from functional.alltypestiny t1 left join + (select coalesce(t3.int_col, 384) as int_col + from (select int_col from functional.alltypes) t1 + right semi join (select int_col from functional.alltypesagg) t3 + on t1.int_col = t3.int_col) t2 + on t2.int_col = t1.month +where t1.month is not null +---- PLAN +05:AGGREGATE [FINALIZE] +| group by: t1.int_col +| +04:HASH JOIN [RIGHT OUTER JOIN] +| hash predicates: if(TupleIsNull(), NULL, coalesce(int_col, 384)) = t1.month +| runtime filters: RF000 <- t1.month +| +|--00:SCAN HDFS [functional.alltypestiny t1] +| partitions=4/4 files=4 size=460B +| +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: int_col = int_col +| runtime filters: RF001 <- int_col +| +|--01:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| runtime filters: RF000 -> coalesce(functional.alltypes.int_col, 384) +| +02:SCAN HDFS [functional.alltypesagg] + partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> coalesce(int_col, 384), RF001 -> int_col +====
