This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new c12b17470 PARQUET-2282: Don't initialize HadoopCodec (#1071)
c12b17470 is described below

commit c12b174703aa7796b61e952c463a729d2929cba9
Author: Fokko Driesprong <[email protected]>
AuthorDate: Tue Apr 18 09:40:05 2023 +0200

    PARQUET-2282: Don't initialize HadoopCodec (#1071)
    
    At Iceberg we want to run Apache Flink without Hadoop, and
    by initializing HadoopCodec directly, but only if another
    codec hasn't been provided.
---
 .../src/main/java/org/apache/parquet/ParquetReadOptions.java        | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git 
a/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java 
b/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java
index a69ba46be..f20628275 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/ParquetReadOptions.java
@@ -171,7 +171,7 @@ public class ParquetReadOptions {
     protected FilterCompat.Filter recordFilter = null;
     protected ParquetMetadataConverter.MetadataFilter metadataFilter = 
NO_FILTER;
     // the page size parameter isn't used when only using the codec factory to 
get decompressors
-    protected CompressionCodecFactory codecFactory = 
HadoopCodecs.newFactory(0);
+    protected CompressionCodecFactory codecFactory = null;
     protected ByteBufferAllocator allocator = new HeapByteBufferAllocator();
     protected int maxAllocationSize = ALLOCATION_SIZE_DEFAULT;
     protected Map<String, String> properties = new HashMap<>();
@@ -314,6 +314,10 @@ public class ParquetReadOptions {
     }
 
     public ParquetReadOptions build() {
+      if (codecFactory == null) {
+        codecFactory = HadoopCodecs.newFactory(0);
+      }
+
       return new ParquetReadOptions(
         useSignedStringMinMax, useStatsFilter, useDictionaryFilter, 
useRecordFilter,
         useColumnIndexFilter, usePageChecksumVerification, useBloomFilter, 
recordFilter, metadataFilter,

Reply via email to