This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 40c46f9396 [iceberg] Fix contains_nan in partition summary for
float/double (#7788)
40c46f9396 is described below
commit 40c46f939602189edc03798c96f2405912834af2
Author: Arnav Balyan <[email protected]>
AuthorDate: Fri May 8 20:13:49 2026 +0530
[iceberg] Fix contains_nan in partition summary for float/double (#7788)
- IcebergManifestFile hardcodes contains_nan = false in
`IcebergPartitionSummary`.
- This value is used by readers for predicate pruning, hardcoding to
false causes the files to get skipped that contain NaN matches.
- Detect NaN via the existing min/max and pass it correctly. Issue only
emerges for float/double values.
---
.../iceberg/manifest/IcebergManifestFile.java | 21 +++++++++-
.../paimon/iceberg/IcebergCompatibilityTest.java | 45 ++++++++++++++++++++++
2 files changed, 65 insertions(+), 1 deletion(-)
diff --git
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
index a717ca0da0..46fd143906 100644
---
a/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
+++
b/paimon-core/src/main/java/org/apache/paimon/iceberg/manifest/IcebergManifestFile.java
@@ -255,10 +255,19 @@ public class IcebergManifestFile extends
ObjectsFile<IcebergManifestEntry> {
for (int i = 0; i < stats.length; i++) {
SimpleColStats fieldStats = stats[i];
DataType type = partitionType.getTypeAt(i);
+ boolean containsNan = false;
+ switch (type.getTypeRoot()) {
+ case FLOAT:
+ case DOUBLE:
+ containsNan = isNaN(fieldStats.min()) ||
isNaN(fieldStats.max());
+ break;
+ default:
+ // contains_nan is only meaningful for FLOAT/DOUBLE
per the Iceberg spec
+ }
partitionSummaries.add(
new IcebergPartitionSummary(
Objects.requireNonNull(fieldStats.nullCount())
> 0,
- false, // TODO correct it?
+ containsNan,
toByteBuffer(type, fieldStats.min()).array(),
toByteBuffer(type, fieldStats.max()).array()));
}
@@ -278,5 +287,15 @@ public class IcebergManifestFile extends
ObjectsFile<IcebergManifestEntry> {
deletedRowsCount,
partitionSummaries);
}
+
+ private boolean isNaN(@Nullable Object value) {
+ if (value instanceof Float) {
+ return Float.isNaN((Float) value);
+ }
+ if (value instanceof Double) {
+ return Double.isNaN((Double) value);
+ }
+ return false;
+ }
}
}
diff --git
a/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
b/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
index 2b5e794a57..2e09a61df1 100644
---
a/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
+++
b/paimon-core/src/test/java/org/apache/paimon/iceberg/IcebergCompatibilityTest.java
@@ -752,6 +752,51 @@ public class IcebergCompatibilityTest {
"Record(2, {20=[Record(cherry, 200), Record(pear,
201)]})");
}
+ @Test
+ public void testDoublePartitionContainsNan() throws Exception {
+ RowType rowType =
+ RowType.of(
+ new DataType[] {DataTypes.DOUBLE(), DataTypes.INT()},
+ new String[] {"value", "id"});
+ FileStoreTable table =
+ createPaimonTable(
+ rowType, Collections.singletonList("value"),
Collections.emptyList(), -1);
+
+ String commitUser = UUID.randomUUID().toString();
+ TableWriteImpl<?> write = table.newWrite(commitUser);
+ TableCommitImpl commit = table.newCommit(commitUser);
+
+ write.write(GenericRow.of(1.0, 100), 1);
+ write.write(GenericRow.of(2.0, 200), 1);
+ write.write(GenericRow.of(Double.NaN, 300), 1);
+ commit.commit(1, write.prepareCommit(false, 1));
+ write.close();
+ commit.close();
+
+ FileIO fileIO = table.fileIO();
+ IcebergMetadata metadata =
+ IcebergMetadata.fromPath(
+ fileIO, new Path(table.location(),
"metadata/v1.metadata.json"));
+
+ String currentSnapshotManifest =
metadata.currentSnapshot().manifestList();
+ File snapShotAvroFile = new File(currentSnapshotManifest);
+
+ boolean sawNanPartitionSummary = false;
+ try (DataFileReader<GenericRecord> dataFileReader =
+ new DataFileReader<>(
+ new SeekableFileInput(snapShotAvroFile), new
GenericDatumReader<>())) {
+ while (dataFileReader.hasNext()) {
+ GenericRecord record = dataFileReader.next();
+ String partitionSummary = record.get("partitions").toString();
+ if (partitionSummary.contains("contains_nan\": true")) {
+ sawNanPartitionSummary = true;
+ }
+ }
+ }
+
+ assertThat(sawNanPartitionSummary).isTrue();
+ }
+
@Test
public void testStringPartitionNullPadding() throws Exception {
RowType rowType =