(cloudberry-pxf) branch main updated: Update parquet to 1.12.3 in PXF

djwang Thu, 05 Feb 2026 19:27:10 -0800

This is an automated email from the ASF dual-hosted git repository.

djwang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry-pxf.git



The following commit(s) were added to refs/heads/main by this push:
     new ab7f9026 Update parquet to 1.12.3 in PXF
ab7f9026 is described below

commit ab7f902672ec6a579657e157d210cd912b5c210f
Author: Nikolay Antonov <[email protected]>
AuthorDate: Fri Feb 6 08:26:56 2026 +0500

    Update parquet to 1.12.3 in PXF
    
    Update parquet to 1.12.3 (latest version with hadoop-client-2.x)
    * Add support for ZSTD compression
    * Temporary declare LZO as not supported. It causes following error in both 
current and in pre-upgrade builds:
       ```
       ERROR:  PXF server error : Class com.hadoop.compression.lzo.LzoCodec was 
not found  (seg1 10.11.0.131:6000 pid=2567556)
       ```
    * Add tests to cover different types of compression
---
 .../pxf/automation/features/parquet/ParquetWriteTest.java | 15 +++++++++++++++
 docs/content/hdfs_parquet.html.md.erb                     |  4 ++--
 server/build.gradle                                       |  3 ++-
 server/gradle.properties                                  |  2 +-
 server/pxf-hdfs/build.gradle                              |  1 +
 5 files changed, 21 insertions(+), 4 deletions(-)

diff --git 
a/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/parquet/ParquetWriteTest.java
 
b/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/parquet/ParquetWriteTest.java
index 358d2233..18218eac 100644
--- 
a/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/parquet/ParquetWriteTest.java
+++ 
b/automation/src/test/java/org/apache/cloudberry/pxf/automation/features/parquet/ParquetWriteTest.java
@@ -195,6 +195,21 @@ public class ParquetWriteTest extends BaseWritableFeature {
         
runWritePrimitivesScenario("pxf_parquet_write_primitives_gzip_classname", 
"pxf_parquet_read_primitives_gzip_classname", 
"parquet_write_primitives_gzip_classname", new 
String[]{"COMPRESSION_CODEC=org.apache.hadoop.io.compress.GzipCodec"});
     }
 
+    @Test(groups = {"features", "gpdb", "security", "hcfs"})
+    public void parquetWritePrimitivesSnappy() throws Exception {
+        runWritePrimitivesScenario("pxf_parquet_write_primitives_snappy", 
"pxf_parquet_read_primitives_snappy", "parquet_write_primitives_snappy", new 
String[]{"COMPRESSION_CODEC=snappy"});
+    }
+
+    @Test(groups = {"features", "gpdb", "security", "hcfs"})
+    public void parquetWritePrimitivesUncompressed() throws Exception {
+        
runWritePrimitivesScenario("pxf_parquet_write_primitives_uncompressed", 
"pxf_parquet_read_primitives_uncompressed", 
"parquet_write_primitives_uncompressed", new 
String[]{"COMPRESSION_CODEC=uncompressed"});
+    }
+
+    @Test(groups = {"features", "gpdb", "security", "hcfs"})
+    public void parquetWritePrimitivesZStd() throws Exception {
+        runWritePrimitivesScenario("pxf_parquet_write_primitives_zstd", 
"pxf_parquet_read_primitives_zstd", "parquet_write_primitives_zstd", new 
String[]{"COMPRESSION_CODEC=zstd"});
+    }
+
     // Numeric precision not defined, test writing data precision in [1, 38]. 
All the data should be written correctly.
     @Test(groups = {"features", "gpdb", "security", "hcfs"})
     public void parquetWriteUndefinedPrecisionNumeric() throws Exception {
diff --git a/docs/content/hdfs_parquet.html.md.erb 
b/docs/content/hdfs_parquet.html.md.erb
index 26ee4817..9ad05b78 100644
--- a/docs/content/hdfs_parquet.html.md.erb
+++ b/docs/content/hdfs_parquet.html.md.erb
@@ -23,7 +23,7 @@ under the License.
 
 Use the PXF HDFS connector to read and write Parquet-format data. This section 
describes how to read and write HDFS files that are stored in Parquet format, 
including how to create, query, and insert into external tables that reference 
files in the HDFS data store.
 
-PXF supports reading or writing Parquet files compressed with these codecs: 
`snappy`, `gzip`, and `lzo`.
+PXF supports reading or writing Parquet files compressed with these codecs: 
`snappy`, `gzip`, and `zstd`.
 
 PXF currently supports reading and writing primitive Parquet data types only.
 
@@ -182,7 +182,7 @@ The PXF `hdfs:parquet` profile supports encoding- and 
compression-related write
 
 | Write Option  | Value Description |
 |-------|-------------------------------------|
-| COMPRESSION_CODEC    | The compression codec alias. Supported compression 
codecs for writing Parquet data include: `snappy`, `gzip`, `lzo`, and 
`uncompressed` . If this option is not provided, PXF compresses the data using 
`snappy` compression. |
+| COMPRESSION_CODEC    | The compression codec alias. Supported compression 
codecs for writing Parquet data include: `snappy`, `gzip`, `zstd`, and 
`uncompressed` . If this option is not provided, PXF compresses the data using 
`snappy` compression. |
 | ROWGROUP_SIZE | A Parquet file consists of one or more row groups, a logical 
partitioning of the data into rows. `ROWGROUP_SIZE` identifies the size (in 
bytes) of the row group. The default row group size is `8 * 1024 * 1024` bytes. 
|
 | PAGE_SIZE | A row group consists of column chunks that are divided up into 
pages. `PAGE_SIZE` is the size (in bytes) of such a page. The default page size 
is `1 * 1024 * 1024` bytes. |
 | ENABLE\_DICTIONARY | A boolean value that specifies whether or not to enable 
dictionary encoding. The default value is `true`; dictionary encoding is 
enabled when PXF writes Parquet files. |
diff --git a/server/build.gradle b/server/build.gradle
index 499a0b72..a1b6275c 100644
--- a/server/build.gradle
+++ b/server/build.gradle
@@ -164,7 +164,7 @@ configure(javaProjects) {
             }
 
             // Parquet dependencies
-            dependency("org.apache.parquet:parquet-format:2.7.0")
+            dependency("org.apache.parquet:parquet-format:2.11.0")
             dependencySet(group:"org.apache.parquet", 
version:"${parquetVersion}") {
                 entry("parquet-column")
                 entry("parquet-common")
@@ -173,6 +173,7 @@ configure(javaProjects) {
                 entry("parquet-hadoop")
                 entry("parquet-jackson")
                 entry("parquet-pig")
+                entry("parquet-format-structures")
             }
 
             // Thrift dependencies
diff --git a/server/gradle.properties b/server/gradle.properties
index eb6191df..42da880a 100644
--- a/server/gradle.properties
+++ b/server/gradle.properties
@@ -23,7 +23,7 @@ hiveVersion=2.3.8
 hiveStorageApiVersion=2.7.3
 hbaseVersion=1.3.2
 junitVersion=4.11
-parquetVersion=1.11.1
+parquetVersion=1.12.3
 awsJavaSdk=1.12.261
 springBootVersion=2.7.18
 org.gradle.daemon=true
diff --git a/server/pxf-hdfs/build.gradle b/server/pxf-hdfs/build.gradle
index 673e528e..9705fb6f 100644
--- a/server/pxf-hdfs/build.gradle
+++ b/server/pxf-hdfs/build.gradle
@@ -38,6 +38,7 @@ dependencies {
     implementation("org.apache.hadoop:hadoop-hdfs")                  { 
transitive = false }
     implementation("org.apache.hadoop:hadoop-hdfs-client")           { 
transitive = false }
     implementation("org.apache.parquet:parquet-format")              { 
transitive = false }
+    implementation("org.apache.parquet:parquet-format-structures")   { 
transitive = false }
     implementation("org.apache.parquet:parquet-column")              { 
transitive = false }
     implementation("org.apache.parquet:parquet-common")              { 
transitive = false }
     implementation("org.apache.parquet:parquet-encoding")            { 
transitive = false }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry-pxf) branch main updated: Update parquet to 1.12.3 in PXF

Reply via email to