This is an automated email from the ASF dual-hosted git repository.
etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new b35c7ec1b0 Core: Fix NAN_VALUE_COUNTS serialization for ContentFile
(#14721)
b35c7ec1b0 is described below
commit b35c7ec1b03e3897da68960cd556d635b2f5ae54
Author: Huaxin Gao <[email protected]>
AuthorDate: Mon Dec 1 00:45:09 2025 -0800
Core: Fix NAN_VALUE_COUNTS serialization for ContentFile (#14721)
---
.../java/org/apache/iceberg/ContentFileParser.java | 2 +-
.../org/apache/iceberg/TestContentFileParser.java | 39 ++++++++++++++++++++++
2 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java
b/core/src/main/java/org/apache/iceberg/ContentFileParser.java
index b48334d822..3d0deba3df 100644
--- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java
+++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java
@@ -228,7 +228,7 @@ public class ContentFileParser {
DataFile.NULL_VALUE_COUNTS.type(), contentFile.nullValueCounts(),
generator);
}
- if (contentFile.nullValueCounts() != null) {
+ if (contentFile.nanValueCounts() != null) {
generator.writeFieldName(NAN_VALUE_COUNTS);
SingleValueParser.toJson(
DataFile.NAN_VALUE_COUNTS.type(), contentFile.nanValueCounts(),
generator);
diff --git a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java
b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java
index 3f463f722e..f8f0d77b0f 100644
--- a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java
+++ b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java
@@ -64,6 +64,21 @@ public class TestContentFileParser {
.hasMessage("Invalid partition spec: null");
}
+ @Test
+ public void testNanCountsOnlyWritesNanValueCounts() throws Exception {
+ PartitionSpec spec = PartitionSpec.unpartitioned();
+ DataFile dataFile = dataFileWithOnlyNanCounts(spec);
+ String jsonStr = ContentFileParser.toJson(dataFile, spec);
+ // ensure nan counts are present and null counts are not emitted
+ assertThat(jsonStr).contains("\"nan-value-counts\"");
+ assertThat(jsonStr).doesNotContain("\"null-value-counts\"");
+ JsonNode jsonNode = JsonUtil.mapper().readTree(jsonStr);
+ ContentFile<?> deserialized =
+ ContentFileParser.fromJson(jsonNode, Map.of(TestBase.SPEC.specId(),
spec));
+ assertThat(deserialized).isInstanceOf(DataFile.class);
+ assertContentFileEquals(dataFile, deserialized, spec);
+ }
+
@ParameterizedTest
@MethodSource("provideSpecAndDataFile")
public void testDataFile(PartitionSpec spec, DataFile dataFile, String
expectedJson)
@@ -125,6 +140,30 @@ public class TestContentFileParser {
return builder.build();
}
+ private static DataFile dataFileWithOnlyNanCounts(PartitionSpec spec) {
+ DataFiles.Builder builder =
+ DataFiles.builder(spec)
+ .withPath("/path/to/data-nan-only.parquet")
+ .withMetrics(
+ new Metrics(
+ 1L, // record count
+ null, // column sizes
+ null, // value counts
+ null, // null value counts (intentionally null)
+ ImmutableMap.of(3, 0L), // nan value counts present
+ null, // lower bounds
+ null // upper bounds
+ ))
+ .withFileSizeInBytes(10)
+ .withRecordCount(1);
+
+ if (spec.isPartitioned()) {
+ builder.withPartitionPath("data_bucket=1");
+ }
+
+ return builder.build();
+ }
+
private static String dataFileJsonWithRequiredOnly(PartitionSpec spec) {
if (spec.isUnpartitioned()) {
return
"{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\",\"file-format\":\"PARQUET\","