This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new 00b6bab59 GH-3188: Set the global configured column stats enable flag
to default (#3189)
00b6bab59 is described below
commit 00b6bab597d55e5fc7af7e19f951794c31d87577
Author: huaxiangsun <[email protected]>
AuthorDate: Fri Apr 11 18:00:49 2025 -0700
GH-3188: Set the global configured column stats enable flag to default
(#3189)
---
.../src/main/java/org/apache/parquet/column/ParquetProperties.java | 1 +
.../src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java | 7 ++++++-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git
a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
index 9aaef4b3c..cb5931581 100644
---
a/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
+++
b/parquet-column/src/main/java/org/apache/parquet/column/ParquetProperties.java
@@ -708,6 +708,7 @@ public class ParquetProperties {
}
public Builder withStatisticsEnabled(boolean enabled) {
+ this.statistics.withDefaultValue(enabled);
this.statisticsEnabled = enabled;
return this;
}
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
index c8e8f71a9..739aa85d2 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
@@ -543,7 +543,7 @@ public class TestParquetWriter {
}
@Test
- public void testSizeStatisticsControl() throws Exception {
+ public void testSizeStatisticsAndStatisticsControl() throws Exception {
MessageType schema = Types.buildMessage()
.required(BINARY)
.named("string_field")
@@ -568,6 +568,7 @@ public class TestParquetWriter {
try (ParquetWriter<Group> writer = ExampleParquetWriter.builder(path)
.withType(schema)
.withSizeStatisticsEnabled(false)
+ .withStatisticsEnabled(false) // Disable column statistics globally
.build()) {
writer.write(group);
}
@@ -576,6 +577,7 @@ public class TestParquetWriter {
// Verify size statistics are disabled globally
for (BlockMetaData block : reader.getFooter().getBlocks()) {
for (ColumnChunkMetaData column : block.getColumns()) {
+ assertTrue(column.getStatistics().isEmpty()); // Make sure there is
no column statistics
assertNull(column.getSizeStatistics());
}
}
@@ -589,6 +591,7 @@ public class TestParquetWriter {
.withType(schema)
.withSizeStatisticsEnabled(true) // enable globally
.withSizeStatisticsEnabled("boolean_field", false) // disable for
specific column
+ .withStatisticsEnabled("boolean_field", false) // disable column
statistics
.build()) {
writer.write(group);
}
@@ -599,8 +602,10 @@ public class TestParquetWriter {
for (ColumnChunkMetaData column : block.getColumns()) {
if (column.getPath().toDotString().equals("boolean_field")) {
assertNull(column.getSizeStatistics());
+ assertTrue(column.getStatistics().isEmpty());
} else {
assertTrue(column.getSizeStatistics().isValid());
+ assertFalse(column.getStatistics().isEmpty());
}
}
}