This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new dfc025e17 GH-3213: Add the configuration for ByteStreamSplit encoding
(#3214)
dfc025e17 is described below
commit dfc025e17e21a326addaf0e43c493e085cbac8f4
Author: Joey Tong <[email protected]>
AuthorDate: Sun Nov 9 21:22:31 2025 +0800
GH-3213: Add the configuration for ByteStreamSplit encoding (#3214)
---
.../org/apache/parquet/hadoop/ParquetWriter.java | 5 +++
.../apache/parquet/hadoop/TestParquetWriter.java | 38 ++++++++++++++++++++++
2 files changed, 43 insertions(+)
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
index 05f0e2e3a..8eb5f7f17 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java
@@ -700,6 +700,11 @@ public class ParquetWriter<T> implements Closeable {
return self();
}
+ public SELF withByteStreamSplitEncoding(String columnPath, boolean
enableByteStreamSplit) {
+ encodingPropsBuilder.withByteStreamSplitEncoding(columnPath,
enableByteStreamSplit);
+ return self();
+ }
+
/**
* Enable or disable dictionary encoding of the specified column for the
constructed writer.
*
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
index 9a69ee478..03cd98ac6 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestParquetWriter.java
@@ -626,6 +626,44 @@ public class TestParquetWriter {
}
}
+ @Test
+ public void testByteStreamSplitEncodingControl() throws Exception {
+ MessageType schema = Types.buildMessage()
+ .required(FLOAT)
+ .named("float_field")
+ .required(INT32)
+ .named("int32_field")
+ .named("test_schema");
+
+ File file = temp.newFile();
+ temp.delete();
+
+ Path path = new Path(file.getAbsolutePath());
+ SimpleGroupFactory factory = new SimpleGroupFactory(schema);
+ try (ParquetWriter<Group> writer = ExampleParquetWriter.builder(path)
+ .withType(schema)
+ .withByteStreamSplitEncoding(true)
+ .withByteStreamSplitEncoding("int32_field", true)
+ .build()) {
+ writer.write(factory.newGroup().append("float_field",
0.3f).append("int32_field", 42));
+ }
+
+ try (ParquetFileReader reader =
ParquetFileReader.open(HadoopInputFile.fromPath(path, new Configuration()))) {
+ for (BlockMetaData block : reader.getFooter().getBlocks()) {
+ for (ColumnChunkMetaData column : block.getColumns()) {
+
assertTrue(column.getEncodings().contains(Encoding.BYTE_STREAM_SPLIT));
+ }
+ }
+ }
+
+ try (ParquetReader<Group> reader =
+ ParquetReader.builder(new GroupReadSupport(), path).build()) {
+ Group group = reader.read();
+ assertEquals(0.3f, group.getFloat("float_field", 0), 0.0);
+ assertEquals(42, group.getInteger("int32_field", 0));
+ }
+ }
+
@Test
public void testV2WriteAllNullValues() throws Exception {
testV2WriteAllNullValues(null, null);