This is an automated email from the ASF dual-hosted git repository.

etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/main by this push:
     new c05f6c98d6 API: required nested fields within optional structs can 
produce null (#13804)
c05f6c98d6 is described below

commit c05f6c98d66a5a05ec5dca0b7a1c28036187317d
Author: Dejan Gvozdenac <d.gvozdena...@gmail.com>
AuthorDate: Fri Sep 19 02:53:17 2025 -0400

    API: required nested fields within optional structs can produce null 
(#13804)
---
 api/src/main/java/org/apache/iceberg/Accessor.java |   5 +
 .../main/java/org/apache/iceberg/Accessors.java    |  50 +++++++---
 .../apache/iceberg/expressions/BoundReference.java |   4 +-
 .../iceberg/expressions/TestBoundReference.java    | 108 +++++++++++++++++++++
 .../iceberg/parquet/TestBloomRowGroupFilter.java   |   8 +-
 .../org/apache/iceberg/spark/sql/TestSelect.java   |  17 ++++
 .../org/apache/iceberg/spark/sql/TestSelect.java   |  17 ++++
 .../org/apache/iceberg/spark/sql/TestSelect.java   |  17 ++++
 8 files changed, 209 insertions(+), 17 deletions(-)

diff --git a/api/src/main/java/org/apache/iceberg/Accessor.java 
b/api/src/main/java/org/apache/iceberg/Accessor.java
index 2a20a04df9..20b09bf910 100644
--- a/api/src/main/java/org/apache/iceberg/Accessor.java
+++ b/api/src/main/java/org/apache/iceberg/Accessor.java
@@ -25,4 +25,9 @@ public interface Accessor<T> extends Serializable {
   Object get(T container);
 
   Type type();
+
+  /** Returns true if the current field or any ancestor in the access path is 
optional. */
+  default boolean hasOptionalFieldInPath() {
+    return false;
+  }
 }
diff --git a/api/src/main/java/org/apache/iceberg/Accessors.java 
b/api/src/main/java/org/apache/iceberg/Accessors.java
index 0b36730fbb..06ee0a916c 100644
--- a/api/src/main/java/org/apache/iceberg/Accessors.java
+++ b/api/src/main/java/org/apache/iceberg/Accessors.java
@@ -59,11 +59,13 @@ public class Accessors {
     private final int position;
     private final Type type;
     private final Class<?> javaClass;
+    private final boolean hasOptionalFieldInPath;
 
-    PositionAccessor(int pos, Type type) {
+    PositionAccessor(int pos, Type type, boolean isOptional) {
       this.position = pos;
       this.type = type;
       this.javaClass = type.typeId().javaClass();
+      this.hasOptionalFieldInPath = isOptional;
     }
 
     @Override
@@ -84,6 +86,11 @@ public class Accessors {
       return javaClass;
     }
 
+    @Override
+    public boolean hasOptionalFieldInPath() {
+      return hasOptionalFieldInPath;
+    }
+
     @Override
     public String toString() {
       return "Accessor(positions=[" + position + "], type=" + type + ")";
@@ -95,12 +102,14 @@ public class Accessors {
     private final int p1;
     private final Type type;
     private final Class<?> javaClass;
+    private final boolean hasOptionalFieldInPath;
 
-    Position2Accessor(int pos, PositionAccessor wrapped) {
+    Position2Accessor(int pos, PositionAccessor wrapped, boolean isOptional) {
       this.p0 = pos;
       this.p1 = wrapped.position();
       this.type = wrapped.type();
       this.javaClass = wrapped.javaClass();
+      this.hasOptionalFieldInPath = isOptional || 
wrapped.hasOptionalFieldInPath();
     }
 
     @Override
@@ -117,6 +126,11 @@ public class Accessors {
       return javaClass;
     }
 
+    @Override
+    public boolean hasOptionalFieldInPath() {
+      return hasOptionalFieldInPath;
+    }
+
     @Override
     public String toString() {
       return "Accessor(positions=[" + p0 + ", " + p1 + "], type=" + type + ")";
@@ -129,13 +143,15 @@ public class Accessors {
     private final int p2;
     private final Type type;
     private final Class<?> javaClass;
+    private final boolean hasOptionalFieldInPath;
 
-    Position3Accessor(int pos, Position2Accessor wrapped) {
+    Position3Accessor(int pos, Position2Accessor wrapped, boolean isOptional) {
       this.p0 = pos;
       this.p1 = wrapped.p0;
       this.p2 = wrapped.p1;
       this.type = wrapped.type();
       this.javaClass = wrapped.javaClass();
+      this.hasOptionalFieldInPath = isOptional || 
wrapped.hasOptionalFieldInPath();
     }
 
     @Override
@@ -148,6 +164,11 @@ public class Accessors {
       return type;
     }
 
+    @Override
+    public boolean hasOptionalFieldInPath() {
+      return hasOptionalFieldInPath;
+    }
+
     @Override
     public String toString() {
       return "Accessor(positions=[" + p0 + ", " + p1 + ", " + p2 + "], type=" 
+ type + ")";
@@ -157,10 +178,12 @@ public class Accessors {
   private static class WrappedPositionAccessor implements Accessor<StructLike> 
{
     private final int position;
     private final Accessor<StructLike> accessor;
+    private final boolean hasOptionalFieldInPath;
 
-    WrappedPositionAccessor(int pos, Accessor<StructLike> accessor) {
+    WrappedPositionAccessor(int pos, Accessor<StructLike> accessor, boolean 
isOptional) {
       this.position = pos;
       this.accessor = accessor;
+      this.hasOptionalFieldInPath = isOptional || 
accessor.hasOptionalFieldInPath();
     }
 
     @Override
@@ -177,27 +200,32 @@ public class Accessors {
       return accessor.type();
     }
 
+    @Override
+    public boolean hasOptionalFieldInPath() {
+      return hasOptionalFieldInPath;
+    }
+
     @Override
     public String toString() {
       return "WrappedAccessor(position=" + position + ", wrapped=" + accessor 
+ ")";
     }
   }
 
-  private static Accessor<StructLike> newAccessor(int pos, Type type) {
-    return new PositionAccessor(pos, type);
+  private static Accessor<StructLike> newAccessor(int pos, boolean isOptional, 
Type type) {
+    return new PositionAccessor(pos, type, isOptional);
   }
 
   private static Accessor<StructLike> newAccessor(
       int pos, boolean isOptional, Accessor<StructLike> accessor) {
     if (isOptional) {
       // the wrapped position handles null layers
-      return new WrappedPositionAccessor(pos, accessor);
+      return new WrappedPositionAccessor(pos, accessor, isOptional);
     } else if (accessor.getClass() == PositionAccessor.class) {
-      return new Position2Accessor(pos, (PositionAccessor) accessor);
+      return new Position2Accessor(pos, (PositionAccessor) accessor, 
isOptional);
     } else if (accessor instanceof Position2Accessor) {
-      return new Position3Accessor(pos, (Position2Accessor) accessor);
+      return new Position3Accessor(pos, (Position2Accessor) accessor, 
isOptional);
     } else {
-      return new WrappedPositionAccessor(pos, accessor);
+      return new WrappedPositionAccessor(pos, accessor, isOptional);
     }
   }
 
@@ -226,7 +254,7 @@ public class Accessors {
         }
 
         // Add an accessor for this field as an Object (may or may not be 
primitive).
-        accessors.put(field.fieldId(), newAccessor(i, field.type()));
+        accessors.put(field.fieldId(), newAccessor(i, field.isOptional(), 
field.type()));
       }
 
       return accessors;
diff --git 
a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java 
b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java
index 0295fe518f..decda85f2e 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/BoundReference.java
@@ -57,7 +57,9 @@ public class BoundReference<T> implements BoundTerm<T>, 
Reference<T> {
 
   @Override
   public boolean producesNull() {
-    return field.isOptional();
+    // A leaf required field can evaluate to null if it is optional itself or 
any
+    // ancestor on the path is optional.
+    return accessor.hasOptionalFieldInPath();
   }
 
   @Override
diff --git 
a/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java 
b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java
new file mode 100644
index 0000000000..ed921b248f
--- /dev/null
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestBoundReference.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.expressions;
+
+import static org.apache.iceberg.types.Types.NestedField.optional;
+import static org.apache.iceberg.types.Types.NestedField.required;
+import static org.assertj.core.api.Assertions.assertThat;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Stream;
+import org.apache.iceberg.Accessor;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.types.Types;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+public class TestBoundReference {
+  // Build a schema with a single nested struct with optionalList.size() 
levels with the following
+  // structure:
+  // s1: struct(s2: struct(s3: struct(..., sn: struct(leaf: int))))
+  // where each s{i} is an optional struct if optionalList.get(i) is true and 
a required struct if
+  // false
+  private static Schema buildSchemaFromOptionalList(List<Boolean> 
optionalList, String leafName) {
+    Preconditions.checkArgument(
+        optionalList != null && !optionalList.isEmpty(), "optionalList must 
not be null or empty");
+    Types.NestedField leaf =
+        optionalList.get(optionalList.size() - 1)
+            ? optional(optionalList.size(), leafName, Types.IntegerType.get())
+            : required(optionalList.size(), leafName, Types.IntegerType.get());
+
+    Types.StructType current = Types.StructType.of(leaf);
+
+    for (int i = optionalList.size() - 2; i >= 0; i--) {
+      int id = i + 1;
+      String name = "s" + (i + 1);
+      current =
+          Types.StructType.of(
+              optionalList.get(i) ? optional(id, name, current) : required(id, 
name, current));
+    }
+
+    return new Schema(current.fields());
+  }
+
+  private static Stream<Arguments> producesNullCases() {
+    // the test cases specify two arguments:
+    // - the first is a list of booleans that indicate whether fields in the 
nested sequence of
+    //   structs are optional or required. For example, [false, true, false] 
will construct a
+    //   struct like s1.s2.s3 with s1 being required, s2 being optional, and 
s3 being required.
+    // - the second is a boolean that indicates whether calling producesNull() 
on the BoundReference
+    //   of the leaf field should return true or false.
+    return Stream.of(
+        // basic fields, no struct levels
+        Arguments.of(Arrays.asList(false), false),
+        Arguments.of(Arrays.asList(true), true),
+        // one level
+        Arguments.of(Arrays.asList(false, false), false),
+        Arguments.of(Arrays.asList(false, true), true),
+        Arguments.of(Arrays.asList(true, false), true),
+        // two levels
+        Arguments.of(Arrays.asList(false, false, false), false),
+        Arguments.of(Arrays.asList(false, false, true), true),
+        Arguments.of(Arrays.asList(true, false, false), true),
+        Arguments.of(Arrays.asList(false, true, false), true),
+        // three levels
+        Arguments.of(Arrays.asList(false, false, false, false), false),
+        Arguments.of(Arrays.asList(false, false, false, true), true),
+        Arguments.of(Arrays.asList(true, false, false, false), true),
+        Arguments.of(Arrays.asList(false, true, false, false), true),
+        // four levels
+        Arguments.of(Arrays.asList(false, false, false, false, false), false),
+        Arguments.of(Arrays.asList(false, false, false, false, true), true),
+        Arguments.of(Arrays.asList(true, false, false, false, false), true),
+        Arguments.of(Arrays.asList(false, true, true, true, false), true));
+  }
+
+  @ParameterizedTest
+  @MethodSource("producesNullCases")
+  public void testProducesNull(List<Boolean> optionalList, boolean 
expectedProducesNull) {
+    String leafName = "leaf";
+    Schema schema = buildSchemaFromOptionalList(optionalList, leafName);
+    int leafId = optionalList.size();
+    Types.NestedField leafField = schema.findField(leafId);
+    Accessor<StructLike> accessor = schema.accessorForField(leafId);
+
+    BoundReference<Integer> ref = new BoundReference<>(leafField, accessor, 
leafName);
+    assertThat(ref.producesNull()).isEqualTo(expectedProducesNull);
+  }
+}
diff --git 
a/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java 
b/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java
index f54ddfcc5a..694c72876c 100644
--- 
a/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java
+++ 
b/parquet/src/test/java/org/apache/iceberg/parquet/TestBloomRowGroupFilter.java
@@ -297,9 +297,7 @@ public class TestBloomRowGroupFilter {
     shouldRead =
         new ParquetBloomRowGroupFilter(SCHEMA, 
notNull("struct_not_null.int_field"))
             .shouldRead(parquetSchema, rowGroupMetadata, bloomStore);
-    assertThat(shouldRead)
-        .as("Should read: this field is required and are always not-null")
-        .isTrue();
+    assertThat(shouldRead).as("Should read: bloom filter doesn't 
help").isTrue();
   }
 
   @Test
@@ -323,8 +321,8 @@ public class TestBloomRowGroupFilter {
         new ParquetBloomRowGroupFilter(SCHEMA, 
isNull("struct_not_null.int_field"))
             .shouldRead(parquetSchema, rowGroupMetadata, bloomStore);
     assertThat(shouldRead)
-        .as("Should skip: this field is required and are always not-null")
-        .isFalse();
+        .as("Should read: required nested field can still be null if any 
ancestor is optional")
+        .isTrue();
   }
 
   @Test
diff --git 
a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java 
b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
index ab15139820..a21dfd388d 100644
--- 
a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
+++ 
b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
@@ -613,4 +613,21 @@ public class TestSelect extends CatalogTestBase {
     assertEquals("Should return all expected rows", ImmutableList.of(row(1)), 
result);
     sql("DROP TABLE IF EXISTS %s", complexTypeTableName);
   }
+
+  @TestTemplate
+  public void testRequiredNestedFieldInOptionalStructFilter() {
+    String nestedStructTable = tableName("nested_struct_table");
+    sql(
+        "CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT 
NULL>) "
+            + "USING iceberg",
+        nestedStructTable);
+    sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable);
+    sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable);
+
+    List<Object[]> result =
+        sql("SELECT id FROM %s WHERE address.street IS NULL", 
nestedStructTable);
+
+    assertEquals("Should return all expected rows", ImmutableList.of(row(0)), 
result);
+    sql("DROP TABLE IF EXISTS %s", nestedStructTable);
+  }
 }
diff --git 
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java 
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
index 240622750f..68b93be479 100644
--- 
a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
+++ 
b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
@@ -613,4 +613,21 @@ public class TestSelect extends CatalogTestBase {
     assertEquals("Should return all expected rows", ImmutableList.of(row(1)), 
result);
     sql("DROP TABLE IF EXISTS %s", complexTypeTableName);
   }
+
+  @TestTemplate
+  public void testRequiredNestedFieldInOptionalStructFilter() {
+    String nestedStructTable = tableName("nested_struct_table");
+    sql(
+        "CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT 
NULL>) "
+            + "USING iceberg",
+        nestedStructTable);
+    sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable);
+    sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable);
+
+    List<Object[]> result =
+        sql("SELECT id FROM %s WHERE address.street IS NULL", 
nestedStructTable);
+
+    assertEquals("Should return all expected rows", ImmutableList.of(row(0)), 
result);
+    sql("DROP TABLE IF EXISTS %s", nestedStructTable);
+  }
 }
diff --git 
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java 
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
index 240622750f..68b93be479 100644
--- 
a/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
+++ 
b/spark/v4.0/spark/src/test/java/org/apache/iceberg/spark/sql/TestSelect.java
@@ -613,4 +613,21 @@ public class TestSelect extends CatalogTestBase {
     assertEquals("Should return all expected rows", ImmutableList.of(row(1)), 
result);
     sql("DROP TABLE IF EXISTS %s", complexTypeTableName);
   }
+
+  @TestTemplate
+  public void testRequiredNestedFieldInOptionalStructFilter() {
+    String nestedStructTable = tableName("nested_struct_table");
+    sql(
+        "CREATE TABLE %s (id INT NOT NULL, address STRUCT<street: STRING NOT 
NULL>) "
+            + "USING iceberg",
+        nestedStructTable);
+    sql("INSERT INTO %s VALUES (0, NULL)", nestedStructTable);
+    sql("INSERT INTO %s VALUES (1, STRUCT('123 Main St'))", nestedStructTable);
+
+    List<Object[]> result =
+        sql("SELECT id FROM %s WHERE address.street IS NULL", 
nestedStructTable);
+
+    assertEquals("Should return all expected rows", ImmutableList.of(row(0)), 
result);
+    sql("DROP TABLE IF EXISTS %s", nestedStructTable);
+  }
 }

Reply via email to