This is an automated email from the ASF dual-hosted git repository. dbecker pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit a6ad1392da0ed7f43195b578f0433cda84b4bdc5 Author: Daniel Becker <dbec...@apache.org> AuthorDate: Tue Jul 8 17:55:15 2025 +0200 IMPALA-13888: LEFT ANTI JOIN is not working with Iceberg V2 tables on the right side If an Iceberg table contains delete files, queries where it is on the right side of a left anti-join fail: select * from alltypes a LEFT ANTI JOIN iceberg_v2_positional_update_all_rows b ON a.id = b.i; AnalysisException: Illegal column/field reference 'b.input__file__name' of semi-/anti-joined table 'b' This is because semi-joined tuples need to be made visible explicitly in order for paths pointing inside them to be resolvable, see Analyzer::resolvePaths(). This commit adds code to IcebergScanPlanner to make the tuple containing the virtual fields visible if it is semi-joined. Testing: - Added regressions tets in iceberg-v2-read-position-deletes.test. Change-Id: I19de9c7c7ed1d61cde281d270c4cc3ce0b7c582d Reviewed-on: http://gerrit.cloudera.org:8080/23147 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- .../apache/impala/planner/IcebergScanPlanner.java | 9 ++++++ .../iceberg-v2-read-position-deletes.test | 32 ++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java index ef739f4d7..b55b6eaf2 100644 --- a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java @@ -359,6 +359,12 @@ public class IcebergScanPlanner { private void addDataVirtualPositionSlots(TableRef tblRef) throws AnalysisException { List<String> rawPath = Lists.newArrayList( tblRef.getUniqueAlias(), VirtualColumn.INPUT_FILE_NAME.getName()); + + // If we are inside a semi-join, make the tuple visible so that paths in the tuple can + // be resolved in it. See IMPALA-13888. + boolean isSemiJoined = analyzer_.isSemiJoined(tblRef.getId()); + if (isSemiJoined) analyzer_.setVisibleSemiJoinedTuple(tblRef.getId()); + SlotDescriptor fileNameSlotDesc = SingleNodePlanner.addSlotRefToDesc(analyzer_, rawPath); fileNameSlotDesc.setStats(virtualInputFileNameStats()); @@ -367,6 +373,9 @@ public class IcebergScanPlanner { tblRef.getUniqueAlias(), VirtualColumn.FILE_POSITION.getName()); SlotDescriptor filePosSlotDesc = SingleNodePlanner.addSlotRefToDesc(analyzer_, rawPath); + + if (isSemiJoined) analyzer_.setVisibleSemiJoinedTuple(null); + filePosSlotDesc.setStats(virtualFilePositionStats()); } diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test index 067eddbaf..bd3e4d372 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test @@ -596,3 +596,35 @@ where `data` not in (select min(`data`) from functional_parquet.iceberg_v2_delet ---- TYPES STRING ==== +---- QUERY +# Regression test for IMPALA-IMPALA-13888. +select a.id +from alltypestiny a LEFT ANTI JOIN iceberg_v2_positional_update_all_rows b +ON a.id = b.i; +---- RESULTS +4 +5 +0 +6 +7 +---- TYPES +INT +==== +---- QUERY +# Regression test for IMPALA-IMPALA-13888 with multiple joins. +select a.id from alltypestiny a +LEFT ANTI JOIN iceberg_v2_positional_update_all_rows b +ON a.id = b.i +LEFT ANTI JOIN iceberg_v2_positional_update_all_rows c +ON a.id = c.i +LEFT ANTI JOIN iceberg_v2_positional_update_all_rows d +ON a.id = d.i; +---- RESULTS +4 +5 +0 +6 +7 +---- TYPES +INT +====