This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit a6ad1392da0ed7f43195b578f0433cda84b4bdc5
Author: Daniel Becker <dbec...@apache.org>
AuthorDate: Tue Jul 8 17:55:15 2025 +0200

    IMPALA-13888: LEFT ANTI JOIN is not working with Iceberg V2 tables on the 
right side
    
    If an Iceberg table contains delete files, queries where it is on the
    right side of a left anti-join fail:
      select *
      from alltypes a
        LEFT ANTI JOIN
        iceberg_v2_positional_update_all_rows b
      ON a.id = b.i;
    
      AnalysisException: Illegal column/field reference
      'b.input__file__name' of semi-/anti-joined table 'b'
    
    This is because semi-joined tuples need to be made visible explicitly in
    order for paths pointing inside them to be resolvable, see
    Analyzer::resolvePaths().
    
    This commit adds code to IcebergScanPlanner to make the tuple containing
    the virtual fields visible if it is semi-joined.
    
    Testing:
      - Added regressions tets in iceberg-v2-read-position-deletes.test.
    
    Change-Id: I19de9c7c7ed1d61cde281d270c4cc3ce0b7c582d
    Reviewed-on: http://gerrit.cloudera.org:8080/23147
    Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
---
 .../apache/impala/planner/IcebergScanPlanner.java  |  9 ++++++
 .../iceberg-v2-read-position-deletes.test          | 32 ++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java 
b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java
index ef739f4d7..b55b6eaf2 100644
--- a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java
@@ -359,6 +359,12 @@ public class IcebergScanPlanner {
   private void addDataVirtualPositionSlots(TableRef tblRef) throws 
AnalysisException {
     List<String> rawPath = Lists.newArrayList(
         tblRef.getUniqueAlias(), VirtualColumn.INPUT_FILE_NAME.getName());
+
+    // If we are inside a semi-join, make the tuple visible so that paths in 
the tuple can
+    // be resolved in it. See IMPALA-13888.
+    boolean isSemiJoined = analyzer_.isSemiJoined(tblRef.getId());
+    if (isSemiJoined) analyzer_.setVisibleSemiJoinedTuple(tblRef.getId());
+
     SlotDescriptor fileNameSlotDesc =
         SingleNodePlanner.addSlotRefToDesc(analyzer_, rawPath);
     fileNameSlotDesc.setStats(virtualInputFileNameStats());
@@ -367,6 +373,9 @@ public class IcebergScanPlanner {
         tblRef.getUniqueAlias(), VirtualColumn.FILE_POSITION.getName());
     SlotDescriptor filePosSlotDesc =
         SingleNodePlanner.addSlotRefToDesc(analyzer_, rawPath);
+
+    if (isSemiJoined) analyzer_.setVisibleSemiJoinedTuple(null);
+
     filePosSlotDesc.setStats(virtualFilePositionStats());
   }
 
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test
 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test
index 067eddbaf..bd3e4d372 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test
@@ -596,3 +596,35 @@ where `data` not in (select min(`data`) from 
functional_parquet.iceberg_v2_delet
 ---- TYPES
 STRING
 ====
+---- QUERY
+# Regression test for IMPALA-IMPALA-13888.
+select a.id
+from alltypestiny a LEFT ANTI JOIN iceberg_v2_positional_update_all_rows b
+ON a.id = b.i;
+---- RESULTS
+4
+5
+0
+6
+7
+---- TYPES
+INT
+====
+---- QUERY
+# Regression test for IMPALA-IMPALA-13888 with multiple joins.
+select a.id from alltypestiny a
+LEFT ANTI JOIN iceberg_v2_positional_update_all_rows b
+ON a.id = b.i
+LEFT ANTI JOIN iceberg_v2_positional_update_all_rows c
+ON a.id = c.i
+LEFT ANTI JOIN iceberg_v2_positional_update_all_rows d
+ON a.id = d.i;
+---- RESULTS
+4
+5
+0
+6
+7
+---- TYPES
+INT
+====

Reply via email to