This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0853ceec4a7c3bfbc51cd24bb95d77f314104360
Author: Peter Rozsa <[email protected]>
AuthorDate: Tue Nov 28 14:08:29 2023 +0100

    IMPALA-12580: Fix Iceberg predicate filtering skippedExpression calculation
    
    This patch changes the collection method of skipped expressions from
    toSet to toList. toSet uses HashSet, which calls hashCode for
    expression nodes, which is not always applicable because expressions
    recreated by constant propagation have no id (null) and the default
    hashCode implementation for Expr requires it.
    
    Change-Id: I692d3b186e5e73caf9c66ada4afbe36e49641952
    Reviewed-on: http://gerrit.cloudera.org:8080/20735
    Reviewed-by: Gabor Kaszab <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../apache/impala/planner/IcebergScanPlanner.java  | 17 ++++----
 .../queries/PlannerTest/iceberg-predicates.test    | 49 ++++++++++++++++++++++
 .../queries/QueryTest/iceberg-query.test           | 17 ++++++++
 3 files changed, 73 insertions(+), 10 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java 
b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java
index 419adf207..7baccb2ac 100644
--- a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java
@@ -120,10 +120,9 @@ public class IcebergScanPlanner {
   // Residual expressions after Iceberg planning
   private final Set<Expression> residualExpressions_ =
       new TreeSet<>(Comparator.comparing(ExpressionUtil::toSanitizedString));
-  // Expressions filtered by Iceberg's planFiles, subset of 
'translatedExpressions_''s
-  // values
-  private final Set<Expr> skippedExpressions_ =
-      new TreeSet<>(Comparator.comparingInt(System::identityHashCode));
+  // Expressions filtered by Iceberg's planFiles, subset of
+  // 'impalaIcebergPredicateMapping_''s values
+  private final List<Expr> skippedExpressions_ = new ArrayList<>();
   // Impala expressions that can't be translated into Iceberg expressions
   private final List<Expr> untranslatedExpressions_ = new ArrayList<>();
   // Conjuncts on columns not involved in IDENTITY-partitioning.
@@ -433,15 +432,15 @@ public class IcebergScanPlanner {
           new IcebergExpressionCollector());
       for (Expression located : locatedExpressions) {
         Expr expr = impalaIcebergPredicateMapping_.get(located);
-        /* If we fail to locate any of the Iceberg residual expressions then 
we skip
-         filtering the predicates to be pushed down to Impala scanner.*/
+        // If we fail to locate any of the Iceberg residual expressions then 
we skip
+        // filtering the predicates to be pushed down to Impala scanner.
         if (expr == null) return false;
         expressionsToRetain.add(expr);
       }
     }
     skippedExpressions_.addAll(
         conjuncts_.stream().filter(expr -> 
!expressionsToRetain.contains(expr)).collect(
-            Collectors.toSet()));
+            Collectors.toList()));
     conjuncts_ = expressionsToRetain;
     LOG.debug("Iceberg predicate pushdown subsetting took {} ms",
         (System.currentTimeMillis() - startTime));
@@ -449,9 +448,7 @@ public class IcebergScanPlanner {
   }
 
   private List<Expr> getSkippedConjuncts() {
-    if (!residualExpressions_.isEmpty()) {
-      return new ArrayList<>(skippedExpressions_);
-    }
+    if (!residualExpressions_.isEmpty()) return skippedExpressions_;
     return new ArrayList<>(impalaIcebergPredicateMapping_.values());
   }
 
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-predicates.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-predicates.test
index f3892e23f..8d22e0b32 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-predicates.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-predicates.test
@@ -166,4 +166,53 @@ PLAN-ROOT SINK
    predicates: id < 10
    skipped Iceberg predicates: action IS NOT NULL
    row-size=44B cardinality=1
+====
+---- QUERY
+# Regression test for IMPALA-12580: predicate filtering could throw 
UnsupportedOperationException
+select p_int from functional_parquet.iceberg_alltypes_part where i = 1 and 
p_int = i;
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional_parquet.iceberg_alltypes_part]
+   HDFS partitions=1/1 files=1 size=433B
+   predicates: i = 1
+   skipped Iceberg predicates: functional_parquet.iceberg_alltypes_part.p_int 
= 1
+   row-size=8B cardinality=1
+====
+---- QUERY
+# Regression test for IMPALA-12580: predicate filtering could throw 
UnsupportedOperationException
+select * from functional_parquet.iceberg_alltypes_part
+where i = 1 and p_bigint = 10 + i and p_int = p_bigint - 10;
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional_parquet.iceberg_alltypes_part]
+   HDFS partitions=1/1 files=1 size=433B
+   predicates: i = 1
+   skipped Iceberg predicates: p_bigint = 11, p_int = 1
+   row-size=49B cardinality=1
+====
+---- QUERY
+select * from functional_parquet.iceberg_alltypes_part
+where p_string = concat(chr(cast(p_int + 104 as int)), "mpala") and
+p_string = concat("i", "mpala") and p_decimal between 122 and 124 and i = 
floor(p_float)
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional_parquet.iceberg_alltypes_part]
+   HDFS partitions=1/1 files=1 size=433B
+   predicates: i = floor(p_float), concat(chr(CAST(p_int + 104 AS INT)), 
'mpala') = 'impala'
+   skipped Iceberg predicates: p_string = 'impala', p_decimal >= 122, 
p_decimal <= 124
+   row-size=49B cardinality=1
+====
+---- QUERY
+select * from functional_parquet.iceberg_alltypes_part where i in (1,2,3) and 
i = p_int;
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional_parquet.iceberg_alltypes_part]
+   HDFS partitions=1/1 files=1 size=433B
+   predicates: i = p_int, i IN (1, 2, 3)
+   skipped Iceberg predicates: functional_parquet.iceberg_alltypes_part.p_int 
IN (1, 2, 3)
+   row-size=49B cardinality=1
 ====
\ No newline at end of file
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
index 8f131c864..cd43496c2 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test
@@ -1169,3 +1169,20 @@ select * from iceberg_v2_delete_equality for 
system_version as of 32076731677950
 ---- TYPES
 BIGINT,STRING
 ====
+---- QUERY
+# Regression test for IMPALA-12580: predicate filtering could throw 
UnsupportedOperationException
+select i from functional_parquet.iceberg_alltypes_part
+where i = 1 and p_bigint = 10 + i and p_int = p_bigint - 10;
+---- RESULTS
+1
+---- TYPES
+INT
+====
+---- QUERY
+# Regression test for IMPALA-12580: predicate filtering could throw 
UnsupportedOperationException
+select p_int from functional_parquet.iceberg_alltypes_part where i = 1 and 
p_int = i;
+---- RESULTS
+1
+---- TYPES
+INT
+====
\ No newline at end of file

Reply via email to