IMPALA-5016: Simplify COALESCE() in SimplifyConditionalsRule. Simplify COALESCE by skipping leading nulls and applying the following transformations: COALESCE(null, a, b) -> COALESCE(a, b); COALESCE(<literal>, a, b) -> <literal>, when literal is not NullLiteral; COALESCE(<partition-slotref>, a, b) -> <partition-slotref>, when the partition column does not contain NULL.
Testing: added unit tests in ExprRewriteRulesTest Change-Id: I0325a9e437261b15313bbdf2f22d83376a84b575 Reviewed-on: http://gerrit.cloudera.org:8080/7015 Reviewed-by: Alex Behm <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/61696f3b Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/61696f3b Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/61696f3b Branch: refs/heads/master Commit: 61696f3ba81ba27d830bf262c6e56b0712f19e53 Parents: de9f523 Author: hzfengyu <[email protected]> Authored: Sun May 28 23:58:18 2017 +0800 Committer: Impala Public Jenkins <[email protected]> Committed: Mon Jun 12 20:31:03 2017 +0000 ---------------------------------------------------------------------- .../rewrite/SimplifyConditionalsRule.java | 94 +++++++++++++++++--- .../impala/analysis/ExprRewriteRulesTest.java | 49 +++++++++- 2 files changed, 126 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/61696f3b/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java b/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java index 176b89e..287b4be 100644 --- a/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java +++ b/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java @@ -30,8 +30,13 @@ import org.apache.impala.analysis.Expr; import org.apache.impala.analysis.FunctionCallExpr; import org.apache.impala.analysis.FunctionName; import org.apache.impala.analysis.NullLiteral; +import org.apache.impala.analysis.SlotDescriptor; +import org.apache.impala.analysis.SlotRef; +import org.apache.impala.catalog.HdfsTable; import org.apache.impala.common.AnalysisException; + import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; /*** * This rule simplifies conditional functions with constant conditions. It relies on @@ -43,6 +48,7 @@ import com.google.common.base.Preconditions; * id = 0 OR false -> id = 0 * false AND id = 1 -> false * case when false then 0 when true then 1 end -> 1 + * coalesce(1, 0) -> 1 */ public class SimplifyConditionalsRule implements ExprRewriteRule { public static ExprRewriteRule INSTANCE = new SimplifyConditionalsRule(); @@ -76,24 +82,84 @@ public class SimplifyConditionalsRule implements ExprRewriteRule { * Simplifies IF by returning the corresponding child if the condition has a constant * TRUE, FALSE, or NULL (equivalent to FALSE) value. */ + private Expr simplifyIfFunctionCallExpr(FunctionCallExpr expr) { + Preconditions.checkState(expr.getChildren().size() == 3); + if (expr.getChild(0) instanceof BoolLiteral) { + if (((BoolLiteral) expr.getChild(0)).getValue()) { + // IF(TRUE) + return expr.getChild(1); + } else { + // IF(FALSE) + return expr.getChild(2); + } + } else if (expr.getChild(0) instanceof NullLiteral) { + // IF(NULL) + return expr.getChild(2); + } + return expr; + } + + /** + * Simplify COALESCE by skipping leading nulls and applying the following transformations: + * COALESCE(null, a, b) -> COALESCE(a, b); + * COALESCE(<literal>, a, b) -> <literal>, when literal is not NullLiteral; + * COALESCE(<partition-slotref>, a, b) -> <partition-slotref>, + * when the partition column does not contain NULL. + */ + private Expr simplifyCoalesceFunctionCallExpr(FunctionCallExpr expr) { + int numChildren = expr.getChildren().size(); + Expr result = NullLiteral.create(expr.getType()); + for (int i = 0; i < numChildren; ++i) { + Expr childExpr = expr.getChildren().get(i); + // Skip leading nulls. + if (childExpr.isNullLiteral()) continue; + if ((i == numChildren - 1) || canSimplifyCoalesceUsingChild(childExpr)) { + result = childExpr; + } else if (i == 0) { + result = expr; + } else { + List<Expr> newChildren = Lists.newArrayList(expr.getChildren().subList(i, numChildren)); + result = new FunctionCallExpr(expr.getFnName(), newChildren); + } + break; + } + return result; + } + + /** + * Checks if the given child expr is nullable. Returns true if one of the following holds: + * child is a non-NULL literal; + * child is a possibly cast SlotRef against a non-nullable slot; + * child is a possible cast SlotRef against a partition column that does not contain NULL. + */ + private boolean canSimplifyCoalesceUsingChild(Expr child) { + if (child.isLiteral() && !child.isNullLiteral()) return true; + + SlotRef slotRef = child.unwrapSlotRef(false); + if (slotRef == null) return false; + SlotDescriptor slotDesc = slotRef.getDesc(); + if (!slotDesc.getIsNullable()) return true; + // Check partition column using partition metadata. + if (slotDesc.getParent().getTable() instanceof HdfsTable + && slotDesc.getColumn() != null + && slotDesc.getParent().getTable().isClusteringColumn(slotDesc.getColumn())) { + HdfsTable table = (HdfsTable) slotDesc.getParent().getTable(); + // Return true if the partition column does not have a NULL value. + if (table.getNullPartitionIds(slotDesc.getColumn().getPosition()).isEmpty()) { + return true; + } + } + return false; + } + private Expr simplifyFunctionCallExpr(FunctionCallExpr expr) { FunctionName fnName = expr.getFnName(); // TODO: Add the other conditional functions, eg. ifnull, istrue, etc. if (fnName.getFunction().equals("if")) { - Preconditions.checkState(expr.getChildren().size() == 3); - if (expr.getChild(0) instanceof BoolLiteral) { - if (((BoolLiteral) expr.getChild(0)).getValue()) { - // IF(TRUE) - return expr.getChild(1); - } else { - // IF(FALSE) - return expr.getChild(2); - } - } else if (expr.getChild(0) instanceof NullLiteral) { - // IF(NULL) - return expr.getChild(2); - } + return simplifyIfFunctionCallExpr(expr); + } else if (fnName.getFunction().equals("coalesce")) { + return simplifyCoalesceFunctionCallExpr(expr); } return expr; } @@ -209,4 +275,4 @@ public class SimplifyConditionalsRule implements ExprRewriteRule { } return new CaseExpr(caseExpr, newWhenClauses, elseExpr); } -} +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/61696f3b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java index 3ee4141..9650f76 100644 --- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java @@ -42,12 +42,22 @@ public class ExprRewriteRulesTest extends FrontendTestBase { public Expr RewritesOk(String expr, ExprRewriteRule rule, String expectedExpr) throws AnalysisException { - return RewritesOk(expr, Lists.newArrayList(rule), expectedExpr); + return RewritesOk("functional.alltypessmall", expr, rule, expectedExpr); + } + + public Expr RewritesOk(String tableName, String expr, ExprRewriteRule rule, String expectedExpr) + throws AnalysisException { + return RewritesOk(tableName, expr, Lists.newArrayList(rule), expectedExpr); } public Expr RewritesOk(String expr, List<ExprRewriteRule> rules, String expectedExpr) throws AnalysisException { - String stmtStr = "select " + expr + " from functional.alltypessmall"; + return RewritesOk("functional.alltypessmall", expr, rules, expectedExpr); + } + + public Expr RewritesOk(String tableName, String expr, List<ExprRewriteRule> rules, + String expectedExpr) throws AnalysisException { + String stmtStr = "select " + expr + " from " + tableName; SelectStmt stmt = (SelectStmt) ParsesOk(stmtStr); Analyzer analyzer = createAnalyzer(Catalog.DEFAULT_DB); stmt.analyze(analyzer); @@ -333,6 +343,39 @@ public class ExprRewriteRulesTest extends FrontendTestBase { RewritesOk("case when true then 0 when false then sum(id) end", rule, null); RewritesOk( "case when true then count(id) when false then sum(id) end", rule, "count(id)"); + + // IMPALA-5016: Simplify COALESCE function + // Test skipping leading nulls. + RewritesOk("coalesce(null, id, year)", rule, "coalesce(id, year)"); + RewritesOk("coalesce(null, 1, id)", rule, "1"); + RewritesOk("coalesce(null, null, id)", rule, "id"); + // If the leading parameter is a non-NULL constant, rewrite to that constant. + RewritesOk("coalesce(1, id, year)", rule, "1"); + // If COALESCE has only one parameter, rewrite to the parameter. + RewritesOk("coalesce(id)", rule, "id"); + // If all parameters are NULL, rewrite to NULL. + RewritesOk("coalesce(null, null)", rule, "NULL"); + // Do not rewrite non-literal constant exprs, rely on constant folding. + RewritesOk("coalesce(null is null, id)", rule, null); + RewritesOk("coalesce(10 + null, id)", rule, null); + // Combine COALESCE rule with FoldConstantsRule. + RewritesOk("coalesce(1 + 2, id, year)", rules, "3"); + RewritesOk("coalesce(null is null, bool_col)", rules, "TRUE"); + RewritesOk("coalesce(10 + null, id, year)", rules, "coalesce(id, year)"); + // If the leading parameter is partition column, try to rewrite with partition metadata. + RewritesOk("coalesce(year, id)", rule, "year"); + RewritesOk("coalesce(year, bigint_col)", rule, "year"); + RewritesOk("coalesce(cast(year as string), string_col)", rule, "CAST(year AS STRING)"); + RewritesOk("coalesce(id, year)", rule, null); + RewritesOk("coalesce(null, year, id)", rule, "year"); + // If the leading partition column has NULL value, do not rewrite. + RewritesOk("functional.alltypesagg", "coalesce(year, id)", rule, "year"); + RewritesOk("functional.alltypesagg", "coalesce(day, id)", rule, null); + // If the leading column is not nullable, rewrite to the column. + RewritesOk("functional_kudu.alltypessmall", "coalesce(id, year)", rule, "id"); + RewritesOk("functional_kudu.alltypessmall", "coalesce(cast(id as string), string_col)", rule, + "CAST(id AS STRING)"); + RewritesOk("functional_kudu.alltypessmall", "coalesce(null, id, year)", rule, "id"); } @Test @@ -364,4 +407,4 @@ public class ExprRewriteRulesTest extends FrontendTestBase { RewritesOk("int_col = tinyint_col", rule, null); RewritesOk("tinyint_col = int_col", rule, null); } -} +} \ No newline at end of file
