(cloudberry-pxf) branch main updated: Bump Parquet version from 1.12.3 to 1.15.2

djwang Mon, 23 Feb 2026 18:45:35 -0800

This is an automated email from the ASF dual-hosted git repository.

djwang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry-pxf.git



The following commit(s) were added to refs/heads/main by this push:
     new c3793999 Bump Parquet version from 1.12.3 to 1.15.2
c3793999 is described below

commit c379399922ae851021ea363072b1b37a6e51a029
Author: Nikolay Antonov <[email protected]>
AuthorDate: Tue Feb 24 07:45:22 2026 +0500

    Bump Parquet version from 1.12.3 to 1.15.2
    
    ### Useful changes in Parquet 1.12.x -> 1.15.x
    * Support LZ4_RAW codec
    * Implement vectored IO in Parquet file format
    * More optimal memory usage in compression codecs
    
    Dependency tree changes are small:
    ```
    +--- org.apache.parquet:parquet-column:1.15.1
    |    +--- org.apache.parquet:parquet-common:1.15.1
    |    |    +--- org.apache.parquet:parquet-format-structures:1.15.1
    |    +--- org.apache.parquet:parquet-encoding:1.15.1
    |    |    +--- org.apache.parquet:parquet-common:1.15.1 (*)
    +--- org.apache.parquet:parquet-hadoop:1.15.1
    |    +--- org.apache.parquet:parquet-column:1.15.1 (*)
    |    +--- org.apache.parquet:parquet-format-structures:1.15.1 (*)
    |    +--- org.apache.parquet:parquet-common:1.15.1 (*)
    |    +--- org.xerial.snappy:snappy-java:1.1.10.7
    |    +--- io.airlift:aircompressor:2.0.2
    |    +--- commons-pool:commons-pool:1.6
    |    +--- com.github.luben:zstd-jni:1.5.6-6
    +--- org.apache.parquet:parquet-jackson:1.15.1
    +--- org.apache.parquet:parquet-generator:1.15.1
    +--- org.apache.parquet:parquet-pig:1.15.1
    |    +--- org.apache.parquet:parquet-column:1.15.1 (*)
    |    +--- org.apache.parquet:parquet-hadoop:1.15.1 (*)
    |    +--- org.apache.parquet:parquet-common:1.15.1 (*)
    \--- org.apache.parquet:parquet-format:2.10.0
    ```
    
    It has its own thrift library shaded. It doesn't depend on protobuf.
    `parquet-hadoop` in fact expects that there is `hadoop-client`, 
`hadoop-common`, `hadoop-annotations` and  `hadoop-mapreduce-client-core` 
provided.
---
 .../pxf/automation/features/parquet/ParquetWriteTest.java           | 5 +++++
 docs/content/hdfs_parquet.html.md.erb                               | 4 ++--
 server/build.gradle                                                 | 6 +++---
 server/gradle.properties                                            | 2 +-
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git 
a/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/parquet/ParquetWriteTest.java
 
b/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/parquet/ParquetWriteTest.java
index 18218eac..0bd9b611 100644
--- 
a/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/parquet/ParquetWriteTest.java
+++ 
b/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/parquet/ParquetWriteTest.java
@@ -210,6 +210,11 @@ public class ParquetWriteTest extends BaseWritableFeature {
         runWritePrimitivesScenario("pxf_parquet_write_primitives_zstd", 
"pxf_parquet_read_primitives_zstd", "parquet_write_primitives_zstd", new 
String[]{"COMPRESSION_CODEC=zstd"});
     }
 
+    @Test(groups = {"features", "gpdb", "security", "hcfs"})
+    public void parquetWritePrimitivesLZ4_RAW() throws Exception {
+        runWritePrimitivesScenario("pxf_parquet_write_primitives_lz4_raw", 
"pxf_parquet_read_primitives_lz4_raw", "parquet_write_primitives_lz4_raw", new 
String[]{"COMPRESSION_CODEC=lz4_raw"});
+    }
+
     // Numeric precision not defined, test writing data precision in [1, 38]. 
All the data should be written correctly.
     @Test(groups = {"features", "gpdb", "security", "hcfs"})
     public void parquetWriteUndefinedPrecisionNumeric() throws Exception {
diff --git a/docs/content/hdfs_parquet.html.md.erb 
b/docs/content/hdfs_parquet.html.md.erb
index 2646f00e..86bed745 100644
--- a/docs/content/hdfs_parquet.html.md.erb
+++ b/docs/content/hdfs_parquet.html.md.erb
@@ -23,7 +23,7 @@ under the License.
 
 Use the PXF HDFS connector to read and write Parquet-format data. This section 
describes how to read and write HDFS files that are stored in Parquet format, 
including how to create, query, and insert into external tables that reference 
files in the HDFS data store.
 
-PXF supports reading or writing Parquet files compressed with these codecs: 
`snappy`, `gzip`, and `zstd`.
+PXF supports reading or writing Parquet files compressed with these codecs: 
`snappy`, `gzip`, 'lz4_raw' and `zstd`.
 
 PXF currently supports reading and writing primitive Parquet data types only.
 
@@ -208,7 +208,7 @@ The PXF `hdfs:parquet` profile supports encoding- and 
compression-related write
 
 | Write Option  | Value Description |
 |-------|-------------------------------------|
-| COMPRESSION_CODEC    | The compression codec alias. Supported compression 
codecs for writing Parquet data include: `snappy`, `gzip`, `zstd`, and 
`uncompressed` . If this option is not provided, PXF compresses the data using 
`snappy` compression. |
+| COMPRESSION_CODEC    | The compression codec alias. Supported compression 
codecs for writing Parquet data include: `snappy`, `gzip`, `lz4_raw`, `zstd`, 
and `uncompressed` . If this option is not provided, PXF compresses the data 
using `snappy` compression. |
 | ROWGROUP_SIZE | A Parquet file consists of one or more row groups, a logical 
partitioning of the data into rows. `ROWGROUP_SIZE` identifies the size (in 
bytes) of the row group. The default row group size is `8 * 1024 * 1024` bytes. 
|
 | PAGE_SIZE | A row group consists of column chunks that are divided up into 
pages. `PAGE_SIZE` is the size (in bytes) of such a page. The default page size 
is `1 * 1024 * 1024` bytes. |
 | ENABLE\_DICTIONARY | A boolean value that specifies whether or not to enable 
dictionary encoding. The default value is `true`; dictionary encoding is 
enabled when PXF writes Parquet files. |
diff --git a/server/build.gradle b/server/build.gradle
index 410ec1f2..3eb6b087 100644
--- a/server/build.gradle
+++ b/server/build.gradle
@@ -92,7 +92,7 @@ configure(javaProjects) {
             dependency("commons-lang:commons-lang:2.6")
             dependency("commons-lang:commons-lang3:3.9")
             dependency("commons-logging:commons-logging:1.1.3")
-            dependency("io.airlift:aircompressor:0.27")
+            dependency("io.airlift:aircompressor:2.0.2")
             dependency("javax.jdo:jdo-api:3.0.1")
             dependency("joda-time:joda-time:2.8.1")
             dependency("net.sf.opencsv:opencsv:2.3")
@@ -122,7 +122,7 @@ configure(javaProjects) {
             dependency("org.threeten:threeten-extra:1.5.0")
             dependency("org.tukaani:xz:1.8")
             dependency("org.wildfly.openssl:wildfly-openssl:1.0.7.Final")
-            dependency("org.xerial.snappy:snappy-java:1.1.10.4")
+            dependency("org.xerial.snappy:snappy-java:1.1.10.7")
 
             // Hadoop dependencies
             dependencySet(group:"org.apache.hadoop", 
version:"${hadoopVersion}") {
@@ -212,7 +212,7 @@ configure(javaProjects) {
                 entry("avro")
                 entry("avro-mapred")
             }
-            // Zstd support for Avro
+            // Zstd support for Avro/Parquet
             dependency("com.github.luben:zstd-jni:1.5.7-6")
 
             // Jackson 1.x dependencies
diff --git a/server/gradle.properties b/server/gradle.properties
index ea8b0c66..e0f41634 100644
--- a/server/gradle.properties
+++ b/server/gradle.properties
@@ -23,7 +23,7 @@ hiveVersion=2.3.8
 hiveStorageApiVersion=2.7.3
 hbaseVersion=2.3.7
 junitVersion=4.11
-parquetVersion=1.12.3
+parquetVersion=1.15.2
 awsJavaSdk=1.12.261
 springBootVersion=2.7.18
 org.gradle.daemon=true


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry-pxf) branch main updated: Bump Parquet version from 1.12.3 to 1.15.2

Reply via email to