Repository: incubator-impala
Updated Branches:
  refs/heads/master 878fcf5a7 -> 874d20d0f


IMPALA-4859: Push down IS NULL / IS NOT NULL to Kudu

This detects IS NULL / IS NOT NULL and creates a Kudu
predicate to push this to Kudu.

For testing, there are planner tests to verify that the
predicate is pushed to Kudu. There are also end-to-end
tests for correctness.

Change-Id: I9c96fec8d41f77222879c0ffdd6940b168e47e65
Reviewed-on: http://gerrit.cloudera.org:8080/5958
Reviewed-by: Marcel Kornacker <[email protected]>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/077c07ee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/077c07ee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/077c07ee

Branch: refs/heads/master
Commit: 077c07eec77c05f97d5e45704454b788ed1b45e0
Parents: 878fcf5
Author: Joe McDonnell <[email protected]>
Authored: Wed Feb 8 10:48:55 2017 -0800
Committer: Impala Public Jenkins <[email protected]>
Committed: Sat Mar 25 04:51:36 2017 +0000

----------------------------------------------------------------------
 .../org/apache/impala/planner/KuduScanNode.java | 33 +++++++++++++++++++-
 .../queries/PlannerTest/kudu-selectivity.test   | 14 +++++++++
 .../queries/PlannerTest/kudu.test               |  6 ++--
 .../queries/QueryTest/kudu-scan-node.test       | 30 ++++++++++++++++++
 4 files changed, 80 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/077c07ee/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java 
b/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
index 7cd6e7c..64e5fde 100644
--- a/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java
@@ -27,6 +27,7 @@ import org.apache.impala.analysis.BinaryPredicate;
 import org.apache.impala.analysis.BoolLiteral;
 import org.apache.impala.analysis.Expr;
 import org.apache.impala.analysis.InPredicate;
+import org.apache.impala.analysis.IsNullPredicate;
 import org.apache.impala.analysis.LiteralExpr;
 import org.apache.impala.analysis.NullLiteral;
 import org.apache.impala.analysis.NumericLiteral;
@@ -322,7 +323,8 @@ public class KuduScanNode extends ScanNode {
     while (it.hasNext()) {
       Expr predicate = it.next();
       if (tryConvertBinaryKuduPredicate(analyzer, rpcTable, predicate) ||
-          tryConvertInListKuduPredicate(analyzer, rpcTable, predicate)) {
+          tryConvertInListKuduPredicate(analyzer, rpcTable, predicate) ||
+          tryConvertIsNullKuduPredicate(analyzer, rpcTable, predicate)) {
         it.remove();
       }
     }
@@ -436,6 +438,35 @@ public class KuduScanNode extends ScanNode {
   }
 
   /**
+   * If IS NULL/IS NOT NULL 'expr' can be converted to a KuduPredicate,
+   * returns true and updates kuduPredicates_ and kuduConjuncts_.
+   */
+  private boolean tryConvertIsNullKuduPredicate(Analyzer analyzer,
+      org.apache.kudu.client.KuduTable table, Expr expr) {
+    if (!(expr instanceof IsNullPredicate)) return false;
+    IsNullPredicate predicate = (IsNullPredicate) expr;
+
+    // Do not convert if expression is more than a SlotRef
+    // This is true even for casts, as certain casts can take a non-NULL
+    // value and produce a NULL. For example, CAST('test' as tinyint)
+    // is NULL.
+    if (!(predicate.getChild(0) instanceof SlotRef)) return false;
+    SlotRef ref = (SlotRef) predicate.getChild(0);
+
+    String colName = ref.getDesc().getColumn().getName();
+    ColumnSchema column = table.getSchema().getColumn(colName);
+    KuduPredicate kuduPredicate = null;
+    if (predicate.isNotNull()) {
+      kuduPredicate = KuduPredicate.newIsNotNullPredicate(column);
+    } else {
+      kuduPredicate = KuduPredicate.newIsNullPredicate(column);
+    }
+    kuduConjuncts_.add(predicate);
+    kuduPredicates_.add(kuduPredicate);
+    return true;
+  }
+
+  /**
    * Return the value of the InList child expression 'e' as an Object that can 
be
    * added to a KuduPredicate. If the Expr is not supported by Kudu or the 
type doesn't
    * match the expected PrimitiveType 'type', null is returned.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/077c07ee/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
index eba7741..b064d2b 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test
@@ -130,3 +130,17 @@ F00:PLAN FRAGMENT [UNPARTITIONED]
      hosts=3 per-host-mem=unavailable
      tuple-ids=0 row-size=126B cardinality=4
 ====
+select * from functional_kudu.alltypes where
+tinyint_col is not null and
+smallint_col is null and
+cast(date_string_col as tinyint) is null
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED]
+  PLAN-ROOT SINK
+  |
+  00:SCAN KUDU [functional_kudu.alltypes]
+     predicates: CAST(date_string_col AS TINYINT) IS NULL
+     kudu predicates: smallint_col IS NULL, tinyint_col IS NOT NULL
+     hosts=3 per-host-mem=unavailable
+     tuple-ids=0 row-size=126B cardinality=730
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/077c07ee/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test 
b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
index 63db8ba..c99de37 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
@@ -166,7 +166,8 @@ where cast(sin(id) as boolean) = true and name is null
 PLAN-ROOT SINK
 |
 00:SCAN KUDU [functional_kudu.testtbl]
-   predicates: name IS NULL, CAST(sin(id) AS BOOLEAN) = TRUE
+   predicates: CAST(sin(id) AS BOOLEAN) = TRUE
+   kudu predicates: name IS NULL
 ---- SCANRANGELOCATIONS
 NODE 0:
   ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 
id=1004), (int64 id=1008))}
@@ -178,7 +179,8 @@ PLAN-ROOT SINK
 01:EXCHANGE [UNPARTITIONED]
 |
 00:SCAN KUDU [functional_kudu.testtbl]
-   predicates: name IS NULL, CAST(sin(id) AS BOOLEAN) = TRUE
+   predicates: CAST(sin(id) AS BOOLEAN) = TRUE
+   kudu predicates: name IS NULL
 ====
 # IMPALA-3856: KuduScanNode crash when pushing predicates including a cast
 select o_orderkey from tpch_kudu.orders where o_orderkey < 10.0 order by 1

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/077c07ee/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test 
b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
index 0e7bf03..243b7e5 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test
@@ -55,3 +55,33 @@ select count(int_col) from functional_kudu.tinyinttable
 ---- TYPES
 BIGINT
 ====
+---- QUERY
+# IMPALA-4859: Test Kudu IS NULL/IS NOT NULL pushdown
+select count(*) from functional_kudu.alltypesagg where id < 10 and float_col 
is null;
+---- RESULTS
+2
+---- TYPES
+BIGINT
+====
+---- QUERY
+select count(*) from functional_kudu.alltypesagg where id < 10 and float_col 
is not null;
+---- RESULTS
+9
+---- TYPES
+BIGINT
+====
+---- QUERY
+# alltypes.id is primary key/not nullable, verify IS NOT NULL/IS NULL pushdown 
works
+select count(*) from functional_kudu.alltypes where id is not null;
+---- RESULTS
+7300
+---- TYPES
+BIGINT
+====
+---- QUERY
+select count(*) from functional_kudu.alltypes where id is null;
+---- RESULTS
+0
+---- TYPES
+BIGINT
+====
\ No newline at end of file

Reply via email to