This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 0853ceec4a7c3bfbc51cd24bb95d77f314104360 Author: Peter Rozsa <[email protected]> AuthorDate: Tue Nov 28 14:08:29 2023 +0100 IMPALA-12580: Fix Iceberg predicate filtering skippedExpression calculation This patch changes the collection method of skipped expressions from toSet to toList. toSet uses HashSet, which calls hashCode for expression nodes, which is not always applicable because expressions recreated by constant propagation have no id (null) and the default hashCode implementation for Expr requires it. Change-Id: I692d3b186e5e73caf9c66ada4afbe36e49641952 Reviewed-on: http://gerrit.cloudera.org:8080/20735 Reviewed-by: Gabor Kaszab <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../apache/impala/planner/IcebergScanPlanner.java | 17 ++++---- .../queries/PlannerTest/iceberg-predicates.test | 49 ++++++++++++++++++++++ .../queries/QueryTest/iceberg-query.test | 17 ++++++++ 3 files changed, 73 insertions(+), 10 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java index 419adf207..7baccb2ac 100644 --- a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java @@ -120,10 +120,9 @@ public class IcebergScanPlanner { // Residual expressions after Iceberg planning private final Set<Expression> residualExpressions_ = new TreeSet<>(Comparator.comparing(ExpressionUtil::toSanitizedString)); - // Expressions filtered by Iceberg's planFiles, subset of 'translatedExpressions_''s - // values - private final Set<Expr> skippedExpressions_ = - new TreeSet<>(Comparator.comparingInt(System::identityHashCode)); + // Expressions filtered by Iceberg's planFiles, subset of + // 'impalaIcebergPredicateMapping_''s values + private final List<Expr> skippedExpressions_ = new ArrayList<>(); // Impala expressions that can't be translated into Iceberg expressions private final List<Expr> untranslatedExpressions_ = new ArrayList<>(); // Conjuncts on columns not involved in IDENTITY-partitioning. @@ -433,15 +432,15 @@ public class IcebergScanPlanner { new IcebergExpressionCollector()); for (Expression located : locatedExpressions) { Expr expr = impalaIcebergPredicateMapping_.get(located); - /* If we fail to locate any of the Iceberg residual expressions then we skip - filtering the predicates to be pushed down to Impala scanner.*/ + // If we fail to locate any of the Iceberg residual expressions then we skip + // filtering the predicates to be pushed down to Impala scanner. if (expr == null) return false; expressionsToRetain.add(expr); } } skippedExpressions_.addAll( conjuncts_.stream().filter(expr -> !expressionsToRetain.contains(expr)).collect( - Collectors.toSet())); + Collectors.toList())); conjuncts_ = expressionsToRetain; LOG.debug("Iceberg predicate pushdown subsetting took {} ms", (System.currentTimeMillis() - startTime)); @@ -449,9 +448,7 @@ public class IcebergScanPlanner { } private List<Expr> getSkippedConjuncts() { - if (!residualExpressions_.isEmpty()) { - return new ArrayList<>(skippedExpressions_); - } + if (!residualExpressions_.isEmpty()) return skippedExpressions_; return new ArrayList<>(impalaIcebergPredicateMapping_.values()); } diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-predicates.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-predicates.test index f3892e23f..8d22e0b32 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-predicates.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-predicates.test @@ -166,4 +166,53 @@ PLAN-ROOT SINK predicates: id < 10 skipped Iceberg predicates: action IS NOT NULL row-size=44B cardinality=1 +==== +---- QUERY +# Regression test for IMPALA-12580: predicate filtering could throw UnsupportedOperationException +select p_int from functional_parquet.iceberg_alltypes_part where i = 1 and p_int = i; +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [functional_parquet.iceberg_alltypes_part] + HDFS partitions=1/1 files=1 size=433B + predicates: i = 1 + skipped Iceberg predicates: functional_parquet.iceberg_alltypes_part.p_int = 1 + row-size=8B cardinality=1 +==== +---- QUERY +# Regression test for IMPALA-12580: predicate filtering could throw UnsupportedOperationException +select * from functional_parquet.iceberg_alltypes_part +where i = 1 and p_bigint = 10 + i and p_int = p_bigint - 10; +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [functional_parquet.iceberg_alltypes_part] + HDFS partitions=1/1 files=1 size=433B + predicates: i = 1 + skipped Iceberg predicates: p_bigint = 11, p_int = 1 + row-size=49B cardinality=1 +==== +---- QUERY +select * from functional_parquet.iceberg_alltypes_part +where p_string = concat(chr(cast(p_int + 104 as int)), "mpala") and +p_string = concat("i", "mpala") and p_decimal between 122 and 124 and i = floor(p_float) +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [functional_parquet.iceberg_alltypes_part] + HDFS partitions=1/1 files=1 size=433B + predicates: i = floor(p_float), concat(chr(CAST(p_int + 104 AS INT)), 'mpala') = 'impala' + skipped Iceberg predicates: p_string = 'impala', p_decimal >= 122, p_decimal <= 124 + row-size=49B cardinality=1 +==== +---- QUERY +select * from functional_parquet.iceberg_alltypes_part where i in (1,2,3) and i = p_int; +---- PLAN +PLAN-ROOT SINK +| +00:SCAN HDFS [functional_parquet.iceberg_alltypes_part] + HDFS partitions=1/1 files=1 size=433B + predicates: i = p_int, i IN (1, 2, 3) + skipped Iceberg predicates: functional_parquet.iceberg_alltypes_part.p_int IN (1, 2, 3) + row-size=49B cardinality=1 ==== \ No newline at end of file diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test index 8f131c864..cd43496c2 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test @@ -1169,3 +1169,20 @@ select * from iceberg_v2_delete_equality for system_version as of 32076731677950 ---- TYPES BIGINT,STRING ==== +---- QUERY +# Regression test for IMPALA-12580: predicate filtering could throw UnsupportedOperationException +select i from functional_parquet.iceberg_alltypes_part +where i = 1 and p_bigint = 10 + i and p_int = p_bigint - 10; +---- RESULTS +1 +---- TYPES +INT +==== +---- QUERY +# Regression test for IMPALA-12580: predicate filtering could throw UnsupportedOperationException +select p_int from functional_parquet.iceberg_alltypes_part where i = 1 and p_int = i; +---- RESULTS +1 +---- TYPES +INT +==== \ No newline at end of file
