This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch branch-4.1.2 in repository https://gitbox.apache.org/repos/asf/impala.git
commit 4037f43f2e6a33706e2c5a18e7fdba95b69f8601 Author: stiga-huang <[email protected]> AuthorDate: Mon Jan 16 19:34:13 2023 +0800 IMPALA-11843: Fix IndexOutOfBoundsException in analytic limit pushdown When finding analytic conjuncts for analytic limit pushdown, the following conditions are checked: - The conjunct should be a binary predicate - Left hand side is a SlotRef referencing the analytic expression, e.g. "rn" of "row_number() as rn" - The underlying analytic function is rank(), dense_rank() or row_number() - The window frame is UNBOUNDED PRECEDING to CURRENT ROW - Right hand side is a valid numeric limit - The op is =, <, or <= See more details in AnalyticPlanner.inferPartitionLimits(). While checking the 2nd and 3rd condition, we get the source exprs of the SlotRef. The source exprs could be empty if the SlotRef is actually referencing a column of the table, i.e. a column materialized by the scan node. Currently, we check the first source expr directly regardless whether the list is empty, which causes the IndexOutOfBoundsException. This patch fixes it by augmenting the check to consider an empty list. Also fixes a similar code in AnalyticEvalNode. Tests: - Add FE and e2e regression tests Change-Id: I26d6bd58be58d09a29b8b81972e76665f41cf103 Reviewed-on: http://gerrit.cloudera.org:8080/19422 Reviewed-by: Aman Sinha <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../apache/impala/planner/AnalyticEvalNode.java | 2 +- .../org/apache/impala/planner/AnalyticPlanner.java | 2 +- .../limit-pushdown-partitioned-top-n.test | 32 ++++++++++++++++++++++ .../queries/QueryTest/partitioned-top-n.test | 12 ++++++++ 4 files changed, 46 insertions(+), 2 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/planner/AnalyticEvalNode.java b/fe/src/main/java/org/apache/impala/planner/AnalyticEvalNode.java index b0dd6987d..d4fb6abf4 100644 --- a/fe/src/main/java/org/apache/impala/planner/AnalyticEvalNode.java +++ b/fe/src/main/java/org/apache/impala/planner/AnalyticEvalNode.java @@ -587,7 +587,7 @@ public class AnalyticEvalNode extends PlanNode { return falseStatus; } List<Expr> lhsSourceExprs = ((SlotRef) lhs).getDesc().getSourceExprs(); - if (lhsSourceExprs.size() > 1 || + if (lhsSourceExprs.size() != 1 || !(lhsSourceExprs.get(0) instanceof AnalyticExpr)) { return falseStatus; } diff --git a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java index c070ea69f..0fc8b333f 100644 --- a/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/AnalyticPlanner.java @@ -910,7 +910,7 @@ public class AnalyticPlanner { if (!(lhs instanceof SlotRef)) continue; List<Expr> lhsSourceExprs = ((SlotRef) lhs).getDesc().getSourceExprs(); - if (lhsSourceExprs.size() > 1 || + if (lhsSourceExprs.size() != 1 || !(lhsSourceExprs.get(0) instanceof AnalyticExpr)) { continue; } diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test b/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test index 24c59d02f..3b9c65263 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/limit-pushdown-partitioned-top-n.test @@ -891,3 +891,35 @@ PLAN-ROOT SINK HDFS partitions=1/1 files=1 size=38B row-size=24B cardinality=unavailable ==== +# Regression test for IMPALA-11843. Several conjuncts to consider pushing down. +select id, rn from ( + select id, + row_number() over (order by id desc) rn, + max(id) over () max_id + from functional.alltypesagg) t +where id = max_id and rn < 10 +---- PLAN +PLAN-ROOT SINK +| +04:SELECT +| predicates: id = max(id), id = max(id), row_number() < 10 +| row-size=16B cardinality=1.10K +| +03:ANALYTIC +| functions: row_number() +| order by: id DESC +| window: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| row-size=16B cardinality=11.00K +| +02:ANALYTIC +| functions: max(id) +| row-size=8B cardinality=11.00K +| +01:SORT +| order by: id DESC +| row-size=4B cardinality=11.00K +| +00:SCAN HDFS [functional.alltypesagg] + HDFS partitions=11/11 files=11 size=814.73KB + row-size=4B cardinality=11.00K +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test b/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test index 7cfff846f..538d289f8 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test +++ b/testdata/workloads/functional-query/queries/QueryTest/partitioned-top-n.test @@ -120,3 +120,15 @@ where rnk < 10 ---- TYPES BIGINT ==== +---- QUERY +select id, rn from ( + select id, + row_number() over (order by id desc) rn, + max(id) over () max_id + from functional.alltypesagg) t +where id = max_id and rn < 10 +---- RESULTS +9999,1 +---- TYPES +INT,BIGINT +====
