This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 40c46f9396 [iceberg] Fix contains_nan in partition summary for 
float/double (#7788)
40c46f9396 is described below

commit 40c46f939602189edc03798c96f2405912834af2
Author: Arnav Balyan <[email protected]>
AuthorDate: Fri May 8 20:13:49 2026 +0530

    [iceberg] Fix contains_nan in partition summary for float/double (#7788)
    
    - IcebergManifestFile hardcodes contains_nan = false in
    `IcebergPartitionSummary`.
    - This value is used by readers for predicate pruning, hardcoding to
    false causes the files to get skipped that contain NaN matches.
    - Detect NaN via the existing min/max and pass it correctly. Issue only
    emerges for float/double values.
---
 .../iceberg/manifest/IcebergManifestFile.java      | 21 +++++++++-
 .../paimon/iceberg/IcebergCompatibilityTest.java   | 45 ++++++++++++++++++++++
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git 
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
 
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
index a717ca0da0..46fd143906 100644
--- 
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
+++ 
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
@@ -255,10 +255,19 @@ public class IcebergManifestFile extends 
ObjectsFile<IcebergManifestEntry> {
             for (int i = 0; i < stats.length; i++) {
                 SimpleColStats fieldStats = stats[i];
                 DataType type = partitionType.getTypeAt(i);
+                boolean containsNan = false;
+                switch (type.getTypeRoot()) {
+                    case FLOAT:
+                    case DOUBLE:
+                        containsNan = isNaN(fieldStats.min()) || 
isNaN(fieldStats.max());
+                        break;
+                    default:
+                        // contains_nan is only meaningful for FLOAT/DOUBLE 
per the Iceberg spec
+                }
                 partitionSummaries.add(
                         new IcebergPartitionSummary(
                                 Objects.requireNonNull(fieldStats.nullCount()) 
> 0,
-                                false, // TODO correct it?
+                                containsNan,
                                 toByteBuffer(type, fieldStats.min()).array(),
                                 toByteBuffer(type, fieldStats.max()).array()));
             }
@@ -278,5 +287,15 @@ public class IcebergManifestFile extends 
ObjectsFile<IcebergManifestEntry> {
                     deletedRowsCount,
                     partitionSummaries);
         }
+
+        private boolean isNaN(@Nullable Object value) {
+            if (value instanceof Float) {
+                return Float.isNaN((Float) value);
+            }
+            if (value instanceof Double) {
+                return Double.isNaN((Double) value);
+            }
+            return false;
+        }
     }
 }
diff --git 
a/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
 
b/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
index 2b5e794a57..2e09a61df1 100644
--- 
a/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
+++ 
b/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
@@ -752,6 +752,51 @@ public class IcebergCompatibilityTest {
                         "Record(2, {20=[Record(cherry, 200), Record(pear, 
201)]})");
     }
 
+    @Test
+    public void testDoublePartitionContainsNan() throws Exception {
+        RowType rowType =
+                RowType.of(
+                        new DataType[] {DataTypes.DOUBLE(), DataTypes.INT()},
+                        new String[] {"value", "id"});
+        FileStoreTable table =
+                createPaimonTable(
+                        rowType, Collections.singletonList("value"), 
Collections.emptyList(), -1);
+
+        String commitUser = UUID.randomUUID().toString();
+        TableWriteImpl<?> write = table.newWrite(commitUser);
+        TableCommitImpl commit = table.newCommit(commitUser);
+
+        write.write(GenericRow.of(1.0, 100), 1);
+        write.write(GenericRow.of(2.0, 200), 1);
+        write.write(GenericRow.of(Double.NaN, 300), 1);
+        commit.commit(1, write.prepareCommit(false, 1));
+        write.close();
+        commit.close();
+
+        FileIO fileIO = table.fileIO();
+        IcebergMetadata metadata =
+                IcebergMetadata.fromPath(
+                        fileIO, new Path(table.location(), 
"metadata/v1.metadata.json"));
+
+        String currentSnapshotManifest = 
metadata.currentSnapshot().manifestList();
+        File snapShotAvroFile = new File(currentSnapshotManifest);
+
+        boolean sawNanPartitionSummary = false;
+        try (DataFileReader<GenericRecord> dataFileReader =
+                new DataFileReader<>(
+                        new SeekableFileInput(snapShotAvroFile), new 
GenericDatumReader<>())) {
+            while (dataFileReader.hasNext()) {
+                GenericRecord record = dataFileReader.next();
+                String partitionSummary = record.get("partitions").toString();
+                if (partitionSummary.contains("contains_nan\": true")) {
+                    sawNanPartitionSummary = true;
+                }
+            }
+        }
+
+        assertThat(sawNanPartitionSummary).isTrue();
+    }
+
     @Test
     public void testStringPartitionNullPadding() throws Exception {
         RowType rowType =

Reply via email to