This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch 1.10.x
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/1.10.x by this push:
new 53b252ad60 Core: Fix NAN_VALUE_COUNTS serialization for ContentFile
(#14721) (#14775)
53b252ad60 is described below
commit 53b252ad60f3ff57e1c646b09e0e8bf8dfa9b923
Author: Huaxin Gao <[email protected]>
AuthorDate: Fri Dec 5 17:54:44 2025 -0800
Core: Fix NAN_VALUE_COUNTS serialization for ContentFile (#14721) (#14775)
(cherry picked from commit b35c7ec1b03e3897da68960cd556d635b2f5ae54)
---
.../java/org/apache/iceberg/ContentFileParser.java | 2 +-
.../org/apache/iceberg/TestContentFileParser.java | 39 ++++++++++++++++++++++
2 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/core/src/main/java/org/apache/iceberg/ContentFileParser.java
b/core/src/main/java/org/apache/iceberg/ContentFileParser.java
index b48334d822..3d0deba3df 100644
--- a/core/src/main/java/org/apache/iceberg/ContentFileParser.java
+++ b/core/src/main/java/org/apache/iceberg/ContentFileParser.java
@@ -228,7 +228,7 @@ public class ContentFileParser {
DataFile.NULL_VALUE_COUNTS.type(), contentFile.nullValueCounts(),
generator);
}
- if (contentFile.nullValueCounts() != null) {
+ if (contentFile.nanValueCounts() != null) {
generator.writeFieldName(NAN_VALUE_COUNTS);
SingleValueParser.toJson(
DataFile.NAN_VALUE_COUNTS.type(), contentFile.nanValueCounts(),
generator);
diff --git a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java
b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java
index 3f463f722e..f8f0d77b0f 100644
--- a/core/src/test/java/org/apache/iceberg/TestContentFileParser.java
+++ b/core/src/test/java/org/apache/iceberg/TestContentFileParser.java
@@ -64,6 +64,21 @@ public class TestContentFileParser {
.hasMessage("Invalid partition spec: null");
}
+ @Test
+ public void testNanCountsOnlyWritesNanValueCounts() throws Exception {
+ PartitionSpec spec = PartitionSpec.unpartitioned();
+ DataFile dataFile = dataFileWithOnlyNanCounts(spec);
+ String jsonStr = ContentFileParser.toJson(dataFile, spec);
+ // ensure nan counts are present and null counts are not emitted
+ assertThat(jsonStr).contains("\"nan-value-counts\"");
+ assertThat(jsonStr).doesNotContain("\"null-value-counts\"");
+ JsonNode jsonNode = JsonUtil.mapper().readTree(jsonStr);
+ ContentFile<?> deserialized =
+ ContentFileParser.fromJson(jsonNode, Map.of(TestBase.SPEC.specId(),
spec));
+ assertThat(deserialized).isInstanceOf(DataFile.class);
+ assertContentFileEquals(dataFile, deserialized, spec);
+ }
+
@ParameterizedTest
@MethodSource("provideSpecAndDataFile")
public void testDataFile(PartitionSpec spec, DataFile dataFile, String
expectedJson)
@@ -125,6 +140,30 @@ public class TestContentFileParser {
return builder.build();
}
+ private static DataFile dataFileWithOnlyNanCounts(PartitionSpec spec) {
+ DataFiles.Builder builder =
+ DataFiles.builder(spec)
+ .withPath("/path/to/data-nan-only.parquet")
+ .withMetrics(
+ new Metrics(
+ 1L, // record count
+ null, // column sizes
+ null, // value counts
+ null, // null value counts (intentionally null)
+ ImmutableMap.of(3, 0L), // nan value counts present
+ null, // lower bounds
+ null // upper bounds
+ ))
+ .withFileSizeInBytes(10)
+ .withRecordCount(1);
+
+ if (spec.isPartitioned()) {
+ builder.withPartitionPath("data_bucket=1");
+ }
+
+ return builder.build();
+ }
+
private static String dataFileJsonWithRequiredOnly(PartitionSpec spec) {
if (spec.isUnpartitioned()) {
return
"{\"spec-id\":0,\"content\":\"DATA\",\"file-path\":\"/path/to/data-a.parquet\",\"file-format\":\"PARQUET\","