Repository: incubator-impala Updated Branches: refs/heads/master 878fcf5a7 -> 874d20d0f
IMPALA-4859: Push down IS NULL / IS NOT NULL to Kudu This detects IS NULL / IS NOT NULL and creates a Kudu predicate to push this to Kudu. For testing, there are planner tests to verify that the predicate is pushed to Kudu. There are also end-to-end tests for correctness. Change-Id: I9c96fec8d41f77222879c0ffdd6940b168e47e65 Reviewed-on: http://gerrit.cloudera.org:8080/5958 Reviewed-by: Marcel Kornacker <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/077c07ee Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/077c07ee Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/077c07ee Branch: refs/heads/master Commit: 077c07eec77c05f97d5e45704454b788ed1b45e0 Parents: 878fcf5 Author: Joe McDonnell <[email protected]> Authored: Wed Feb 8 10:48:55 2017 -0800 Committer: Impala Public Jenkins <[email protected]> Committed: Sat Mar 25 04:51:36 2017 +0000 ---------------------------------------------------------------------- .../org/apache/impala/planner/KuduScanNode.java | 33 +++++++++++++++++++- .../queries/PlannerTest/kudu-selectivity.test | 14 +++++++++ .../queries/PlannerTest/kudu.test | 6 ++-- .../queries/QueryTest/kudu-scan-node.test | 30 ++++++++++++++++++ 4 files changed, 80 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/077c07ee/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java b/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java index 7cd6e7c..64e5fde 100644 --- a/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/KuduScanNode.java @@ -27,6 +27,7 @@ import org.apache.impala.analysis.BinaryPredicate; import org.apache.impala.analysis.BoolLiteral; import org.apache.impala.analysis.Expr; import org.apache.impala.analysis.InPredicate; +import org.apache.impala.analysis.IsNullPredicate; import org.apache.impala.analysis.LiteralExpr; import org.apache.impala.analysis.NullLiteral; import org.apache.impala.analysis.NumericLiteral; @@ -322,7 +323,8 @@ public class KuduScanNode extends ScanNode { while (it.hasNext()) { Expr predicate = it.next(); if (tryConvertBinaryKuduPredicate(analyzer, rpcTable, predicate) || - tryConvertInListKuduPredicate(analyzer, rpcTable, predicate)) { + tryConvertInListKuduPredicate(analyzer, rpcTable, predicate) || + tryConvertIsNullKuduPredicate(analyzer, rpcTable, predicate)) { it.remove(); } } @@ -436,6 +438,35 @@ public class KuduScanNode extends ScanNode { } /** + * If IS NULL/IS NOT NULL 'expr' can be converted to a KuduPredicate, + * returns true and updates kuduPredicates_ and kuduConjuncts_. + */ + private boolean tryConvertIsNullKuduPredicate(Analyzer analyzer, + org.apache.kudu.client.KuduTable table, Expr expr) { + if (!(expr instanceof IsNullPredicate)) return false; + IsNullPredicate predicate = (IsNullPredicate) expr; + + // Do not convert if expression is more than a SlotRef + // This is true even for casts, as certain casts can take a non-NULL + // value and produce a NULL. For example, CAST('test' as tinyint) + // is NULL. + if (!(predicate.getChild(0) instanceof SlotRef)) return false; + SlotRef ref = (SlotRef) predicate.getChild(0); + + String colName = ref.getDesc().getColumn().getName(); + ColumnSchema column = table.getSchema().getColumn(colName); + KuduPredicate kuduPredicate = null; + if (predicate.isNotNull()) { + kuduPredicate = KuduPredicate.newIsNotNullPredicate(column); + } else { + kuduPredicate = KuduPredicate.newIsNullPredicate(column); + } + kuduConjuncts_.add(predicate); + kuduPredicates_.add(kuduPredicate); + return true; + } + + /** * Return the value of the InList child expression 'e' as an Object that can be * added to a KuduPredicate. If the Expr is not supported by Kudu or the type doesn't * match the expected PrimitiveType 'type', null is returned. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/077c07ee/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test index eba7741..b064d2b 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu-selectivity.test @@ -130,3 +130,17 @@ F00:PLAN FRAGMENT [UNPARTITIONED] hosts=3 per-host-mem=unavailable tuple-ids=0 row-size=126B cardinality=4 ==== +select * from functional_kudu.alltypes where +tinyint_col is not null and +smallint_col is null and +cast(date_string_col as tinyint) is null +---- PLAN +F00:PLAN FRAGMENT [UNPARTITIONED] + PLAN-ROOT SINK + | + 00:SCAN KUDU [functional_kudu.alltypes] + predicates: CAST(date_string_col AS TINYINT) IS NULL + kudu predicates: smallint_col IS NULL, tinyint_col IS NOT NULL + hosts=3 per-host-mem=unavailable + tuple-ids=0 row-size=126B cardinality=730 +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/077c07ee/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test index 63db8ba..c99de37 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test @@ -166,7 +166,8 @@ where cast(sin(id) as boolean) = true and name is null PLAN-ROOT SINK | 00:SCAN KUDU [functional_kudu.testtbl] - predicates: name IS NULL, CAST(sin(id) AS BOOLEAN) = TRUE + predicates: CAST(sin(id) AS BOOLEAN) = TRUE + kudu predicates: name IS NULL ---- SCANRANGELOCATIONS NODE 0: ScanToken{table=impala::functional_kudu.testtbl, range-partition: [(int64 id=1004), (int64 id=1008))} @@ -178,7 +179,8 @@ PLAN-ROOT SINK 01:EXCHANGE [UNPARTITIONED] | 00:SCAN KUDU [functional_kudu.testtbl] - predicates: name IS NULL, CAST(sin(id) AS BOOLEAN) = TRUE + predicates: CAST(sin(id) AS BOOLEAN) = TRUE + kudu predicates: name IS NULL ==== # IMPALA-3856: KuduScanNode crash when pushing predicates including a cast select o_orderkey from tpch_kudu.orders where o_orderkey < 10.0 order by 1 http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/077c07ee/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test index 0e7bf03..243b7e5 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test +++ b/testdata/workloads/functional-query/queries/QueryTest/kudu-scan-node.test @@ -55,3 +55,33 @@ select count(int_col) from functional_kudu.tinyinttable ---- TYPES BIGINT ==== +---- QUERY +# IMPALA-4859: Test Kudu IS NULL/IS NOT NULL pushdown +select count(*) from functional_kudu.alltypesagg where id < 10 and float_col is null; +---- RESULTS +2 +---- TYPES +BIGINT +==== +---- QUERY +select count(*) from functional_kudu.alltypesagg where id < 10 and float_col is not null; +---- RESULTS +9 +---- TYPES +BIGINT +==== +---- QUERY +# alltypes.id is primary key/not nullable, verify IS NOT NULL/IS NULL pushdown works +select count(*) from functional_kudu.alltypes where id is not null; +---- RESULTS +7300 +---- TYPES +BIGINT +==== +---- QUERY +select count(*) from functional_kudu.alltypes where id is null; +---- RESULTS +0 +---- TYPES +BIGINT +==== \ No newline at end of file
