IMPALA-5016: Simplify COALESCE() in SimplifyConditionalsRule.

Simplify COALESCE by skipping leading nulls and applying the following
transformations:
COALESCE(null, a, b) -> COALESCE(a, b);
COALESCE(<literal>, a, b) -> <literal>, when literal is not NullLiteral;
COALESCE(<partition-slotref>, a, b) -> <partition-slotref>,
when the partition column does not contain NULL.

Testing:
added unit tests in ExprRewriteRulesTest

Change-Id: I0325a9e437261b15313bbdf2f22d83376a84b575
Reviewed-on: http://gerrit.cloudera.org:8080/7015
Reviewed-by: Alex Behm <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/61696f3b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/61696f3b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/61696f3b

Branch: refs/heads/master
Commit: 61696f3ba81ba27d830bf262c6e56b0712f19e53
Parents: de9f523
Author: hzfengyu <[email protected]>
Authored: Sun May 28 23:58:18 2017 +0800
Committer: Impala Public Jenkins <[email protected]>
Committed: Mon Jun 12 20:31:03 2017 +0000

----------------------------------------------------------------------
 .../rewrite/SimplifyConditionalsRule.java       | 94 +++++++++++++++++---
 .../impala/analysis/ExprRewriteRulesTest.java   | 49 +++++++++-
 2 files changed, 126 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/61696f3b/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java
----------------------------------------------------------------------
diff --git 
a/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java 
b/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java
index 176b89e..287b4be 100644
--- a/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java
+++ b/fe/src/main/java/org/apache/impala/rewrite/SimplifyConditionalsRule.java
@@ -30,8 +30,13 @@ import org.apache.impala.analysis.Expr;
 import org.apache.impala.analysis.FunctionCallExpr;
 import org.apache.impala.analysis.FunctionName;
 import org.apache.impala.analysis.NullLiteral;
+import org.apache.impala.analysis.SlotDescriptor;
+import org.apache.impala.analysis.SlotRef;
+import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.common.AnalysisException;
+
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
 
 /***
  * This rule simplifies conditional functions with constant conditions. It 
relies on
@@ -43,6 +48,7 @@ import com.google.common.base.Preconditions;
  * id = 0 OR false -> id = 0
  * false AND id = 1 -> false
  * case when false then 0 when true then 1 end -> 1
+ * coalesce(1, 0) -> 1
  */
 public class SimplifyConditionalsRule implements ExprRewriteRule {
   public static ExprRewriteRule INSTANCE = new SimplifyConditionalsRule();
@@ -76,24 +82,84 @@ public class SimplifyConditionalsRule implements 
ExprRewriteRule {
    * Simplifies IF by returning the corresponding child if the condition has a 
constant
    * TRUE, FALSE, or NULL (equivalent to FALSE) value.
    */
+  private Expr simplifyIfFunctionCallExpr(FunctionCallExpr expr) {
+    Preconditions.checkState(expr.getChildren().size() == 3);
+    if (expr.getChild(0) instanceof BoolLiteral) {
+      if (((BoolLiteral) expr.getChild(0)).getValue()) {
+        // IF(TRUE)
+        return expr.getChild(1);
+      } else {
+        // IF(FALSE)
+        return expr.getChild(2);
+      }
+    } else if (expr.getChild(0) instanceof NullLiteral) {
+      // IF(NULL)
+      return expr.getChild(2);
+    }
+    return expr;
+  }
+
+  /**
+   * Simplify COALESCE by skipping leading nulls and applying the following 
transformations:
+   * COALESCE(null, a, b) -> COALESCE(a, b);
+   * COALESCE(<literal>, a, b) -> <literal>, when literal is not NullLiteral;
+   * COALESCE(<partition-slotref>, a, b) -> <partition-slotref>,
+   * when the partition column does not contain NULL.
+   */
+  private Expr simplifyCoalesceFunctionCallExpr(FunctionCallExpr expr) {
+    int numChildren = expr.getChildren().size();
+    Expr result = NullLiteral.create(expr.getType());
+    for (int i = 0; i < numChildren; ++i) {
+      Expr childExpr = expr.getChildren().get(i);
+      // Skip leading nulls.
+      if (childExpr.isNullLiteral()) continue;
+      if ((i == numChildren - 1) || canSimplifyCoalesceUsingChild(childExpr)) {
+        result = childExpr;
+      } else if (i == 0) {
+        result = expr;
+      } else {
+        List<Expr> newChildren = 
Lists.newArrayList(expr.getChildren().subList(i, numChildren));
+        result = new FunctionCallExpr(expr.getFnName(), newChildren);
+      }
+      break;
+    }
+    return result;
+  }
+
+  /**
+   * Checks if the given child expr is nullable. Returns true if one of the 
following holds:
+   * child is a non-NULL literal;
+   * child is a possibly cast SlotRef against a non-nullable slot;
+   * child is a possible cast SlotRef against a partition column that does not 
contain NULL.
+   */
+  private boolean canSimplifyCoalesceUsingChild(Expr child) {
+    if (child.isLiteral() && !child.isNullLiteral()) return true;
+
+    SlotRef slotRef = child.unwrapSlotRef(false);
+    if (slotRef == null) return false;
+    SlotDescriptor slotDesc = slotRef.getDesc();
+    if (!slotDesc.getIsNullable()) return true;
+    // Check partition column using partition metadata.
+    if (slotDesc.getParent().getTable() instanceof HdfsTable
+        && slotDesc.getColumn() != null
+        && 
slotDesc.getParent().getTable().isClusteringColumn(slotDesc.getColumn())) {
+      HdfsTable table = (HdfsTable) slotDesc.getParent().getTable();
+      // Return true if the partition column does not have a NULL value.
+      if 
(table.getNullPartitionIds(slotDesc.getColumn().getPosition()).isEmpty()) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   private Expr simplifyFunctionCallExpr(FunctionCallExpr expr) {
     FunctionName fnName = expr.getFnName();
 
     // TODO: Add the other conditional functions, eg. ifnull, istrue, etc.
     if (fnName.getFunction().equals("if")) {
-      Preconditions.checkState(expr.getChildren().size() == 3);
-      if (expr.getChild(0) instanceof BoolLiteral) {
-        if (((BoolLiteral) expr.getChild(0)).getValue()) {
-          // IF(TRUE)
-          return expr.getChild(1);
-        } else {
-          // IF(FALSE)
-          return expr.getChild(2);
-        }
-      } else if (expr.getChild(0) instanceof NullLiteral) {
-        // IF(NULL)
-        return expr.getChild(2);
-      }
+      return simplifyIfFunctionCallExpr(expr);
+    } else if (fnName.getFunction().equals("coalesce")) {
+      return simplifyCoalesceFunctionCallExpr(expr);
     }
     return expr;
   }
@@ -209,4 +275,4 @@ public class SimplifyConditionalsRule implements 
ExprRewriteRule {
     }
     return new CaseExpr(caseExpr, newWhenClauses, elseExpr);
   }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/61696f3b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
----------------------------------------------------------------------
diff --git 
a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java 
b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
index 3ee4141..9650f76 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ExprRewriteRulesTest.java
@@ -42,12 +42,22 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
 
   public Expr RewritesOk(String expr, ExprRewriteRule rule, String 
expectedExpr)
       throws AnalysisException {
-    return RewritesOk(expr, Lists.newArrayList(rule), expectedExpr);
+    return RewritesOk("functional.alltypessmall", expr, rule, expectedExpr);
+  }
+
+  public Expr RewritesOk(String tableName, String expr, ExprRewriteRule rule, 
String expectedExpr)
+      throws AnalysisException {
+    return RewritesOk(tableName, expr, Lists.newArrayList(rule), expectedExpr);
   }
 
   public Expr RewritesOk(String expr, List<ExprRewriteRule> rules, String 
expectedExpr)
       throws AnalysisException {
-    String stmtStr = "select " + expr + " from functional.alltypessmall";
+    return RewritesOk("functional.alltypessmall", expr, rules, expectedExpr);
+  }
+
+  public Expr RewritesOk(String tableName, String expr, List<ExprRewriteRule> 
rules,
+      String expectedExpr) throws AnalysisException {
+    String stmtStr = "select " + expr + " from " + tableName;
     SelectStmt stmt = (SelectStmt) ParsesOk(stmtStr);
     Analyzer analyzer = createAnalyzer(Catalog.DEFAULT_DB);
     stmt.analyze(analyzer);
@@ -333,6 +343,39 @@ public class ExprRewriteRulesTest extends FrontendTestBase 
{
     RewritesOk("case when true then 0 when false then sum(id) end", rule, 
null);
     RewritesOk(
         "case when true then count(id) when false then sum(id) end", rule, 
"count(id)");
+
+    // IMPALA-5016: Simplify COALESCE function
+    // Test skipping leading nulls.
+    RewritesOk("coalesce(null, id, year)", rule, "coalesce(id, year)");
+    RewritesOk("coalesce(null, 1, id)", rule, "1");
+    RewritesOk("coalesce(null, null, id)", rule, "id");
+    // If the leading parameter is a non-NULL constant, rewrite to that 
constant.
+    RewritesOk("coalesce(1, id, year)", rule, "1");
+    // If COALESCE has only one parameter, rewrite to the parameter.
+    RewritesOk("coalesce(id)", rule, "id");
+    // If all parameters are NULL, rewrite to NULL.
+    RewritesOk("coalesce(null, null)", rule, "NULL");
+    // Do not rewrite non-literal constant exprs, rely on constant folding.
+    RewritesOk("coalesce(null is null, id)", rule, null);
+    RewritesOk("coalesce(10 + null, id)", rule, null);
+    // Combine COALESCE rule with FoldConstantsRule.
+    RewritesOk("coalesce(1 + 2, id, year)", rules, "3");
+    RewritesOk("coalesce(null is null, bool_col)", rules, "TRUE");
+    RewritesOk("coalesce(10 + null, id, year)", rules, "coalesce(id, year)");
+    // If the leading parameter is partition column, try to rewrite with 
partition metadata.
+    RewritesOk("coalesce(year, id)", rule, "year");
+    RewritesOk("coalesce(year, bigint_col)", rule, "year");
+    RewritesOk("coalesce(cast(year as string), string_col)", rule, "CAST(year 
AS STRING)");
+    RewritesOk("coalesce(id, year)", rule, null);
+    RewritesOk("coalesce(null, year, id)", rule, "year");
+    // If the leading partition column has NULL value, do not rewrite.
+    RewritesOk("functional.alltypesagg", "coalesce(year, id)", rule, "year");
+    RewritesOk("functional.alltypesagg", "coalesce(day, id)", rule, null);
+    // If the leading column is not nullable, rewrite to the column.
+    RewritesOk("functional_kudu.alltypessmall", "coalesce(id, year)", rule, 
"id");
+    RewritesOk("functional_kudu.alltypessmall", "coalesce(cast(id as string), 
string_col)", rule,
+        "CAST(id AS STRING)");
+    RewritesOk("functional_kudu.alltypessmall", "coalesce(null, id, year)", 
rule, "id");
   }
 
   @Test
@@ -364,4 +407,4 @@ public class ExprRewriteRulesTest extends FrontendTestBase {
     RewritesOk("int_col = tinyint_col", rule, null);
     RewritesOk("tinyint_col = int_col", rule, null);
   }
-}
+}
\ No newline at end of file

Reply via email to