This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 5baadd1  IMPALA-10406: Query with analytic functions doesn't need to 
materialize the predicates bounded to kudu
5baadd1 is described below

commit 5baadd1da7d554ea3446e2a025afe8e991339765
Author: xqhe <[email protected]>
AuthorDate: Thu Dec 24 17:04:37 2020 +0800

    IMPALA-10406: Query with analytic functions doesn't need to materialize the 
predicates bounded to kudu
    
    Before when query with analytic functions will materialize the
    unassigned conjuncts.
    But for the predicates that can be evaluated by kudu don't need to
    materialize.
    
    This optimization can reduce the amount of data to exchange and sort.
    
    Testing:
     - Add planner test in analytic-fns.test
    
    Change-Id: Iba8371eff6ae1bcffd51b44843175c52f2127e46
    Reviewed-on: http://gerrit.cloudera.org:8080/16905
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Tim Armstrong <[email protected]>
---
 .../org/apache/impala/analysis/SelectStmt.java     |  19 ++++
 .../queries/PlannerTest/analytic-fns.test          | 116 +++++++++++++++++++++
 2 files changed, 135 insertions(+)

diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index be1d8c1..49330e1 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -19,12 +19,15 @@ package org.apache.impala.analysis;
 
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 
 import org.apache.impala.analysis.Path.PathType;
 import org.apache.impala.authorization.Privilege;
 import org.apache.impala.catalog.Column;
+import org.apache.impala.catalog.FeKuduTable;
 import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.FeView;
 import org.apache.impala.catalog.StructField;
@@ -1124,6 +1127,22 @@ public class SelectStmt extends QueryStmt {
       List<TupleId> tids = new ArrayList<>();
       getMaterializedTupleIds(tids); // includes the analytic tuple
       List<Expr> conjuncts = analyzer.getUnassignedConjuncts(tids, false);
+      // The predicates that can be bounded to KuduScanNode don't need to 
materialize
+      // here. Because we don't need to materialize the predicates that can be 
evaluated
+      // by Kudu.
+      for (TupleId tid : tids) {
+        if (analyzer.getTupleDesc(tid).getTable() instanceof FeKuduTable) {
+          Iterator<Expr> iterator = conjuncts.iterator();
+          while (iterator.hasNext()) {
+            Expr e = iterator.next();
+            List<TupleId> etids = new ArrayList<>();
+            e.getIds(etids, null);
+            if (1 == etids.size() && etids.get(0) == tid) {
+              iterator.remove();
+            }
+          }
+        }
+      }
       materializeSlots(analyzer, conjuncts);
       analyticInfo_.materializeRequiredSlots(analyzer, baseTblSmap_);
     }
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test 
b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
index 1b27eec..845b876 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/analytic-fns.test
@@ -3090,3 +3090,119 @@ PLAN-ROOT SINK
    HDFS partitions=1/1 files=4 size=289.02MB
    row-size=38B cardinality=150.00K
 ====
+# IMPALA-10406: Query with analytic functions doesn't need to materialize the 
predicates bounded to kudu
+SELECT MIN(n_nationkey) OVER (PARTITION BY n_regionkey)
+FROM tpch_kudu.nation t1
+WHERE t1.n_name IN ('ALGERIA', 'ARGENTINA')
+---- PLAN
+PLAN-ROOT SINK
+|
+02:ANALYTIC
+|  functions: min(n_nationkey)
+|  partition by: n_regionkey
+|  row-size=6B cardinality=2
+|
+01:SORT
+|  order by: n_regionkey ASC NULLS LAST
+|  row-size=4B cardinality=2
+|
+00:SCAN KUDU [tpch_kudu.nation t1]
+   kudu predicates: t1.n_name IN ('ALGERIA', 'ARGENTINA')
+   row-size=4B cardinality=2
+====
+# IMPALA-10406: Query with analytic functions doesn't need to materialize the 
predicates bounded to kudu
+SELECT MIN(n_nationkey) OVER (PARTITION BY n_regionkey)
+FROM (
+    SELECT *
+    FROM tpch_kudu.nation
+    UNION ALL
+    SELECT *
+    FROM tpch_parquet.nation
+) t1
+WHERE t1.n_name IN ('ALGERIA', 'ARGENTINA')
+---- PLAN
+PLAN-ROOT SINK
+|
+04:ANALYTIC
+|  functions: min(n_nationkey)
+|  partition by: n_regionkey
+|  row-size=6B cardinality=4
+|
+03:SORT
+|  order by: n_regionkey ASC NULLS LAST
+|  row-size=4B cardinality=4
+|
+00:UNION
+|  row-size=4B cardinality=4
+|
+|--02:SCAN HDFS [tpch_parquet.nation]
+|     HDFS partitions=1/1 files=1 size=3.04KB
+|     predicates: tpch_parquet.nation.n_name IN ('ALGERIA', 'ARGENTINA')
+|     row-size=23B cardinality=2
+|
+01:SCAN KUDU [tpch_kudu.nation]
+   kudu predicates: tpch_kudu.nation.n_name IN ('ALGERIA', 'ARGENTINA')
+   row-size=4B cardinality=2
+====
+# IMPALA-10406: Query with analytic functions doesn't need to materialize the 
predicates bounded to kudu
+SELECT MIN(n_nationkey) OVER (PARTITION BY n_regionkey)
+FROM functional.alltypes t1
+    JOIN tpch_kudu.nation t2 ON t1.id = t2.n_nationkey
+WHERE t2.n_name IN ('ALGERIA', 'ARGENTINA');
+---- PLAN
+PLAN-ROOT SINK
+|
+04:ANALYTIC
+|  functions: min(n_nationkey)
+|  partition by: n_regionkey
+|  row-size=10B cardinality=2
+|
+03:SORT
+|  order by: n_regionkey ASC NULLS LAST
+|  row-size=8B cardinality=2
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: t1.id = t2.n_nationkey
+|  runtime filters: RF000 <- t2.n_nationkey
+|  row-size=8B cardinality=2
+|
+|--01:SCAN KUDU [tpch_kudu.nation t2]
+|     kudu predicates: t2.n_name IN ('ALGERIA', 'ARGENTINA')
+|     row-size=4B cardinality=2
+|
+00:SCAN HDFS [functional.alltypes t1]
+   HDFS partitions=24/24 files=24 size=478.45KB
+   runtime filters: RF000 -> t1.id
+   row-size=4B cardinality=7.30K
+====
+# IMPALA-10406: Query with analytic functions doesn't need to materialize the 
predicates bounded to kudu
+# This materializes the 't2.n_name', because the hash join predicates
+SELECT MIN(n_nationkey) OVER (PARTITION BY n_regionkey)
+FROM functional.alltypes t1
+    LEFT JOIN tpch_kudu.nation t2 ON t1.id = t2.n_nationkey
+WHERE t2.n_name IN ('ALGERIA', 'ARGENTINA')
+--- PLAN
+PLAN-ROOT SINK
+|
+04:ANALYTIC
+|  functions: min(n_nationkey)
+|  partition by: n_regionkey
+|  row-size=29B cardinality=7.30K
+|
+03:SORT
+|  order by: n_regionkey ASC NULLS LAST
+|  row-size=27B cardinality=7.30K
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: t1.id = t2.n_nationkey
+|  other predicates: t2.n_name IN ('ALGERIA', 'ARGENTINA')
+|  row-size=31B cardinality=7.30K
+|
+|--01:SCAN KUDU [tpch_kudu.nation t2]
+|     kudu predicates: t2.n_name IN ('ALGERIA', 'ARGENTINA')
+|     row-size=27B cardinality=2
+|
+00:SCAN HDFS [functional.alltypes t1]
+   HDFS partitions=24/24 files=24 size=478.45KB
+   row-size=4B cardinality=7.30K
+====

Reply via email to