This is an automated email from the ASF dual-hosted git repository.

wgtmac pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git


The following commit(s) were added to refs/heads/master by this push:
     new 63aebcc0c GH-3574: Statistics.toParquetStatistics always set 
null_count(#3575)
63aebcc0c is described below

commit 63aebcc0cf3684ba6c9dc2b7243c6007d3a1269f
Author: Mahdi Dibaiee <[email protected]>
AuthorDate: Tue Jun 16 15:42:11 2026 +0100

    GH-3574: Statistics.toParquetStatistics always set null_count(#3575)
---
 .../apache/parquet/format/converter/ParquetMetadataConverter.java  | 4 +++-
 .../parquet/format/converter/TestParquetMetadataConverter.java     | 7 ++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
 
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
index 3597898c3..89e39c3b6 100644
--- 
a/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
+++ 
b/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
@@ -800,12 +800,14 @@ public class ParquetMetadataConverter {
   public static Statistics toParquetStatistics(
       org.apache.parquet.column.statistics.Statistics stats, int 
truncateLength) {
     Statistics formatStats = new Statistics();
+    if (!stats.isEmpty()) {
+      formatStats.setNull_count(stats.getNumNulls());
+    }
     // Don't write stats larger than the max size rather than truncating. The
     // rationale is that some engines may use the minimum value in the page as
     // the true minimum for aggregations and there is no way to mark that a
     // value has been truncated and is a lower bound and not in the page.
     if (!stats.isEmpty() && withinLimit(stats, truncateLength)) {
-      formatStats.setNull_count(stats.getNumNulls());
       if (stats.hasNonNullValue()) {
         byte[] min;
         byte[] max;
diff --git 
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
 
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
index e4851d8ec..7972a75ba 100644
--- 
a/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
+++ 
b/parquet-hadoop/src/test/java/org/apache/parquet/format/converter/TestParquetMetadataConverter.java
@@ -807,7 +807,7 @@ public class TestParquetMetadataConverter {
     }
     Assert.assertEquals("Num nulls should match", 3004, 
formatStats.getNull_count());
 
-    // convert to empty stats because the values are too large
+    // min/max are not written because the values are too large, but null 
count is always written
     stats.setMinMaxFromBytes(max, max);
 
     formatStats = helper.toParquetStatistics(stats);
@@ -816,7 +816,7 @@ public class TestParquetMetadataConverter {
     Assert.assertFalse("Max should not be set", formatStats.isSetMax());
     Assert.assertFalse("Min_value should not be set", 
formatStats.isSetMin_value());
     Assert.assertFalse("Max_value should not be set", 
formatStats.isSetMax_value());
-    Assert.assertFalse("Num nulls should not be set", 
formatStats.isSetNull_count());
+    Assert.assertEquals("Num nulls should match", 3004, 
formatStats.getNull_count());
 
     Statistics roundTripStats = 
ParquetMetadataConverter.fromParquetStatisticsInternal(
         Version.FULL_VERSION,
@@ -824,7 +824,8 @@ public class TestParquetMetadataConverter {
         new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, ""),
         ParquetMetadataConverter.SortOrder.SIGNED);
 
-    Assert.assertTrue(roundTripStats.isEmpty());
+    Assert.assertFalse("Round-trip stats should not be empty (null count is 
set)", roundTripStats.isEmpty());
+    Assert.assertEquals("Round-trip null count should match", 3004, 
roundTripStats.getNumNulls());
   }
 
   @Test

Reply via email to