This is an automated email from the ASF dual-hosted git repository.
sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new c86edfc28e [HUDI-4319] Fixed Parquet's `PLAIN_DICTIONARY` encoding not
being applied when bulk-inserting (#5966)
c86edfc28e is described below
commit c86edfc28e811c86d82f98ea07d44dae4f3c5cb1
Author: Alexey Kudinkin <[email protected]>
AuthorDate: Fri Jun 24 20:52:28 2022 -0700
[HUDI-4319] Fixed Parquet's `PLAIN_DICTIONARY` encoding not being applied
when bulk-inserting (#5966)
* Fixed Dictionary encoding config not being properly propagated to Parquet
writer (making it unable to apply it, substantially bloating the storage
footprint)
---
.../scala/org/apache/hudi/cli/SparkHelpers.scala | 5 ++-
.../hudi/io/storage/HoodieAvroParquetWriter.java | 4 +--
.../hudi/io/storage/HoodieBaseParquetWriter.java | 2 +-
.../hudi/io/storage/HoodieFileWriterFactory.java | 2 +-
.../hudi/testutils/HoodieWriteableTestTable.java | 19 +++++-----
.../row/HoodieRowDataFileWriterFactory.java | 3 +-
.../io/storage/row/HoodieRowDataParquetConfig.java | 36 -------------------
.../io/storage/row/HoodieRowDataParquetWriter.java | 3 +-
.../row/HoodieInternalRowFileWriterFactory.java | 14 +++++---
.../row/HoodieInternalRowParquetWriter.java | 3 +-
.../io/storage/row/HoodieRowParquetConfig.java | 36 -------------------
.../row/TestHoodieInternalRowParquetWriter.java | 5 +--
.../table/log/block/HoodieParquetDataBlock.java | 20 +++++------
.../hudi/io/storage/HoodieAvroParquetConfig.java | 42 ----------------------
...ParquetConfig.java => HoodieParquetConfig.java} | 10 +++---
.../hudi/io/storage/HoodieParquetStreamWriter.java | 2 +-
16 files changed, 49 insertions(+), 157 deletions(-)
diff --git a/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala
b/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala
index fbfc1d8ec9..b9f8df5fc2 100644
--- a/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala
+++ b/hudi-cli/src/main/scala/org/apache/hudi/cli/SparkHelpers.scala
@@ -23,12 +23,11 @@ import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hudi.avro.HoodieAvroWriteSupport
import org.apache.hudi.client.SparkTaskContextSupplier
-import org.apache.hudi.common.HoodieJsonPayload
import org.apache.hudi.common.bloom.{BloomFilter, BloomFilterFactory}
import org.apache.hudi.common.model.{HoodieFileFormat, HoodieRecord}
import org.apache.hudi.common.util.BaseFileUtils
import org.apache.hudi.config.{HoodieIndexConfig, HoodieStorageConfig}
-import org.apache.hudi.io.storage.{HoodieAvroParquetConfig,
HoodieAvroParquetWriter}
+import org.apache.hudi.io.storage.{HoodieAvroParquetWriter,
HoodieParquetConfig}
import org.apache.parquet.avro.AvroSchemaConverter
import org.apache.parquet.hadoop.metadata.CompressionCodecName
import org.apache.spark.sql.{DataFrame, SQLContext}
@@ -45,7 +44,7 @@ object SparkHelpers {
val filter: BloomFilter =
BloomFilterFactory.createBloomFilter(HoodieIndexConfig.BLOOM_FILTER_NUM_ENTRIES_VALUE.defaultValue.toInt,
HoodieIndexConfig.BLOOM_FILTER_FPP_VALUE.defaultValue.toDouble,
HoodieIndexConfig.BLOOM_INDEX_FILTER_DYNAMIC_MAX_ENTRIES.defaultValue.toInt,
HoodieIndexConfig.BLOOM_FILTER_TYPE.defaultValue);
val writeSupport: HoodieAvroWriteSupport = new HoodieAvroWriteSupport(new
AvroSchemaConverter(fs.getConf).convert(schema), schema,
org.apache.hudi.common.util.Option.of(filter))
- val parquetConfig: HoodieAvroParquetConfig = new
HoodieAvroParquetConfig(writeSupport, CompressionCodecName.GZIP,
HoodieStorageConfig.PARQUET_BLOCK_SIZE.defaultValue.toInt,
HoodieStorageConfig.PARQUET_PAGE_SIZE.defaultValue.toInt,
HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.defaultValue.toInt, fs.getConf,
HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION.defaultValue.toDouble)
+ val parquetConfig: HoodieParquetConfig[HoodieAvroWriteSupport] = new
HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP,
HoodieStorageConfig.PARQUET_BLOCK_SIZE.defaultValue.toInt,
HoodieStorageConfig.PARQUET_PAGE_SIZE.defaultValue.toInt,
HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.defaultValue.toInt, fs.getConf,
HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION.defaultValue.toDouble)
// Add current classLoad for config, if not will throw classNotFound of
'HoodieWrapperFileSystem'.
parquetConfig.getHadoopConf().setClassLoader(Thread.currentThread.getContextClassLoader)
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java
index 6f7940d04d..06631dc53f 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetWriter.java
@@ -47,11 +47,11 @@ public class HoodieAvroParquetWriter<R extends
IndexedRecord>
@SuppressWarnings({"unchecked", "rawtypes"})
public HoodieAvroParquetWriter(Path file,
- HoodieAvroParquetConfig parquetConfig,
+ HoodieParquetConfig<HoodieAvroWriteSupport>
parquetConfig,
String instantTime,
TaskContextSupplier taskContextSupplier,
boolean populateMetaFields) throws
IOException {
- super(file, (HoodieBaseParquetConfig) parquetConfig);
+ super(file, (HoodieParquetConfig) parquetConfig);
this.fileName = file.getName();
this.writeSupport = parquetConfig.getWriteSupport();
this.instantTime = instantTime;
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java
index b4aa6de1bd..e38b41d422 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetWriter.java
@@ -43,7 +43,7 @@ public abstract class HoodieBaseParquetWriter<R> extends
ParquetWriter<R> {
private long lastCachedDataSize = -1;
public HoodieBaseParquetWriter(Path file,
- HoodieBaseParquetConfig<? extends
WriteSupport<R>> parquetConfig) throws IOException {
+ HoodieParquetConfig<? extends
WriteSupport<R>> parquetConfig) throws IOException {
super(HoodieWrapperFileSystem.convertToHoodiePath(file,
parquetConfig.getHadoopConf()),
ParquetFileWriter.Mode.CREATE,
parquetConfig.getWriteSupport(),
diff --git
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
index ffdff25738..9ee8571ebd 100644
---
a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
+++
b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
@@ -77,7 +77,7 @@ public class HoodieFileWriterFactory {
Option<BloomFilter> filter = enableBloomFilter ?
Option.of(createBloomFilter(config)) : Option.empty();
HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(new
AvroSchemaConverter(conf).convert(schema), schema, filter);
- HoodieAvroParquetConfig parquetConfig = new
HoodieAvroParquetConfig(writeSupport, config.getParquetCompressionCodec(),
+ HoodieParquetConfig<HoodieAvroWriteSupport> parquetConfig = new
HoodieParquetConfig<>(writeSupport, config.getParquetCompressionCodec(),
config.getParquetBlockSize(), config.getParquetPageSize(),
config.getParquetMaxFileSize(),
conf, config.getParquetCompressionRatio(),
config.parquetDictionaryEnabled());
diff --git
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java
index 6b847d4960..2f00b82772 100644
---
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java
+++
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java
@@ -19,6 +19,12 @@
package org.apache.hudi.testutils;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.HoodieAvroWriteSupport;
import org.apache.hudi.common.bloom.BloomFilter;
@@ -38,18 +44,11 @@ import
org.apache.hudi.common.testutils.HoodieMetadataTestTable;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieStorageConfig;
-import org.apache.hudi.io.storage.HoodieAvroParquetConfig;
+import org.apache.hudi.io.storage.HoodieAvroParquetWriter;
+import org.apache.hudi.io.storage.HoodieParquetConfig;
import org.apache.hudi.io.storage.HoodieOrcConfig;
import org.apache.hudi.io.storage.HoodieOrcWriter;
-import org.apache.hudi.io.storage.HoodieAvroParquetWriter;
import org.apache.hudi.metadata.HoodieTableMetadataWriter;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.orc.CompressionKind;
@@ -110,7 +109,7 @@ public class HoodieWriteableTestTable extends
HoodieMetadataTestTable {
if
(HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().equals(HoodieFileFormat.PARQUET))
{
HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(
new AvroSchemaConverter().convert(schema), schema,
Option.of(filter));
- HoodieAvroParquetConfig config = new
HoodieAvroParquetConfig(writeSupport, CompressionCodecName.GZIP,
+ HoodieParquetConfig<HoodieAvroWriteSupport> config = new
HoodieParquetConfig<>(writeSupport, CompressionCodecName.GZIP,
ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.DEFAULT_PAGE_SIZE,
120 * 1024 * 1024,
new Configuration(),
Double.parseDouble(HoodieStorageConfig.PARQUET_COMPRESSION_RATIO_FRACTION.defaultValue()));
try (HoodieAvroParquetWriter writer = new HoodieAvroParquetWriter<>(
diff --git
a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriterFactory.java
b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriterFactory.java
index 8cd8bc89a7..98d4a866e0 100644
---
a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriterFactory.java
+++
b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataFileWriterFactory.java
@@ -22,6 +22,7 @@ import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.bloom.BloomFilterFactory;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.io.storage.HoodieParquetConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.flink.table.types.logical.RowType;
@@ -67,7 +68,7 @@ public class HoodieRowDataFileWriterFactory {
HoodieRowDataParquetWriteSupport writeSupport =
new HoodieRowDataParquetWriteSupport(table.getHadoopConf(), rowType,
filter);
return new HoodieRowDataParquetWriter(
- path, new HoodieRowDataParquetConfig(
+ path, new HoodieParquetConfig<>(
writeSupport,
writeConfig.getParquetCompressionCodec(),
writeConfig.getParquetBlockSize(),
diff --git
a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetConfig.java
b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetConfig.java
deleted file mode 100644
index 99b72da221..0000000000
---
a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetConfig.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.io.storage.row;
-
-import org.apache.hudi.io.storage.HoodieBaseParquetConfig;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.parquet.hadoop.metadata.CompressionCodecName;
-
-/**
- * ParquetConfig for datasource implementation with {@link
org.apache.flink.table.data.RowData}.
- */
-public class HoodieRowDataParquetConfig extends
HoodieBaseParquetConfig<HoodieRowDataParquetWriteSupport> {
-
- public HoodieRowDataParquetConfig(HoodieRowDataParquetWriteSupport
writeSupport, CompressionCodecName compressionCodecName,
- int blockSize, int pageSize, long
maxFileSize, Configuration hadoopConf,
- double compressionRatio) {
- super(writeSupport, compressionCodecName, blockSize, pageSize,
maxFileSize, hadoopConf, compressionRatio);
- }
-}
\ No newline at end of file
diff --git
a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriter.java
b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriter.java
index 373e6b1f5c..7b2a87512d 100644
---
a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriter.java
+++
b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataParquetWriter.java
@@ -23,6 +23,7 @@ import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
import org.apache.flink.table.data.RowData;
import org.apache.hadoop.fs.Path;
+import org.apache.hudi.io.storage.HoodieParquetConfig;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.ParquetWriter;
@@ -39,7 +40,7 @@ public class HoodieRowDataParquetWriter extends
ParquetWriter<RowData>
private final long maxFileSize;
private final HoodieRowDataParquetWriteSupport writeSupport;
- public HoodieRowDataParquetWriter(Path file, HoodieRowDataParquetConfig
parquetConfig)
+ public HoodieRowDataParquetWriter(Path file,
HoodieParquetConfig<HoodieRowDataParquetWriteSupport> parquetConfig)
throws IOException {
super(HoodieWrapperFileSystem.convertToHoodiePath(file,
parquetConfig.getHadoopConf()),
ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(),
parquetConfig.getCompressionCodecName(),
diff --git
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowFileWriterFactory.java
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowFileWriterFactory.java
index 8dd19d8883..eb408f81c1 100644
---
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowFileWriterFactory.java
+++
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowFileWriterFactory.java
@@ -23,6 +23,7 @@ import org.apache.hudi.common.bloom.BloomFilterFactory;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.io.storage.HoodieParquetConfig;
import org.apache.hudi.table.HoodieTable;
import org.apache.hadoop.fs.Path;
@@ -68,14 +69,17 @@ public class HoodieInternalRowFileWriterFactory {
HoodieRowParquetWriteSupport writeSupport =
new HoodieRowParquetWriteSupport(table.getHadoopConf(),
structType, filter, writeConfig);
return new HoodieInternalRowParquetWriter(
- path, new HoodieRowParquetConfig(
+ path,
+ new HoodieParquetConfig<>(
writeSupport,
writeConfig.getParquetCompressionCodec(),
writeConfig.getParquetBlockSize(),
writeConfig.getParquetPageSize(),
writeConfig.getParquetMaxFileSize(),
writeSupport.getHadoopConf(),
- writeConfig.getParquetCompressionRatio()));
+ writeConfig.getParquetCompressionRatio(),
+ writeConfig.parquetDictionaryEnabled()
+ ));
}
public static HoodieInternalRowFileWriter
getInternalRowFileWriterWithoutMetaFields(
@@ -93,13 +97,15 @@ public class HoodieInternalRowFileWriterFactory {
HoodieRowParquetWriteSupport writeSupport =
new HoodieRowParquetWriteSupport(table.getHadoopConf(), structType,
null, writeConfig);
return new HoodieInternalRowParquetWriter(
- path, new HoodieRowParquetConfig(
+ path, new HoodieParquetConfig<>(
writeSupport,
writeConfig.getParquetCompressionCodec(),
writeConfig.getParquetBlockSize(),
writeConfig.getParquetPageSize(),
writeConfig.getParquetMaxFileSize(),
writeSupport.getHadoopConf(),
- writeConfig.getParquetCompressionRatio()));
+ writeConfig.getParquetCompressionRatio(),
+ writeConfig.parquetDictionaryEnabled())
+ );
}
}
diff --git
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowParquetWriter.java
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowParquetWriter.java
index 5a0a60ea07..1d11529352 100644
---
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowParquetWriter.java
+++
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieInternalRowParquetWriter.java
@@ -19,6 +19,7 @@
package org.apache.hudi.io.storage.row;
import org.apache.hadoop.fs.Path;
+import org.apache.hudi.io.storage.HoodieParquetConfig;
import org.apache.hudi.io.storage.HoodieBaseParquetWriter;
import org.apache.spark.sql.catalyst.InternalRow;
@@ -32,7 +33,7 @@ public class HoodieInternalRowParquetWriter extends
HoodieBaseParquetWriter<Inte
private final HoodieRowParquetWriteSupport writeSupport;
- public HoodieInternalRowParquetWriter(Path file, HoodieRowParquetConfig
parquetConfig)
+ public HoodieInternalRowParquetWriter(Path file,
HoodieParquetConfig<HoodieRowParquetWriteSupport> parquetConfig)
throws IOException {
super(file, parquetConfig);
diff --git
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetConfig.java
b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetConfig.java
deleted file mode 100644
index ac187dcdd9..0000000000
---
a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetConfig.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.io.storage.row;
-
-import org.apache.hudi.io.storage.HoodieBaseParquetConfig;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.parquet.hadoop.metadata.CompressionCodecName;
-
-/**
- * ParquetConfig for datasource implementation with {@link
org.apache.hudi.client.model.HoodieInternalRow}.
- */
-public class HoodieRowParquetConfig extends
HoodieBaseParquetConfig<HoodieRowParquetWriteSupport> {
-
- public HoodieRowParquetConfig(HoodieRowParquetWriteSupport writeSupport,
CompressionCodecName compressionCodecName,
- int blockSize, int pageSize, long maxFileSize,
Configuration hadoopConf,
- double compressionRatio) {
- super(writeSupport, compressionCodecName, blockSize, pageSize,
maxFileSize, hadoopConf, compressionRatio);
- }
-}
diff --git
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java
index dd509a86b4..d6c060c6bd 100644
---
a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java
+++
b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java
@@ -23,6 +23,7 @@ import org.apache.hudi.common.bloom.BloomFilterFactory;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.config.HoodieStorageConfig;
import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.io.storage.HoodieParquetConfig;
import org.apache.hudi.testutils.HoodieClientTestHarness;
import org.apache.hudi.testutils.SparkDatasetTestUtils;
@@ -73,9 +74,9 @@ public class TestHoodieInternalRowParquetWriter extends
HoodieClientTestHarness
// init write support and parquet config
HoodieRowParquetWriteSupport writeSupport =
getWriteSupport(writeConfigBuilder, hadoopConf,
parquetWriteLegacyFormatEnabled);
HoodieWriteConfig cfg = writeConfigBuilder.build();
- HoodieRowParquetConfig parquetConfig = new
HoodieRowParquetConfig(writeSupport,
+ HoodieParquetConfig<HoodieRowParquetWriteSupport> parquetConfig = new
HoodieParquetConfig<>(writeSupport,
CompressionCodecName.SNAPPY, cfg.getParquetBlockSize(),
cfg.getParquetPageSize(), cfg.getParquetMaxFileSize(),
- writeSupport.getHadoopConf(), cfg.getParquetCompressionRatio());
+ writeSupport.getHadoopConf(), cfg.getParquetCompressionRatio(),
cfg.parquetDictionaryEnabled());
// prepare path
String fileId = UUID.randomUUID().toString();
diff --git
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
index 5e7bef90a0..afb448f844 100644
---
a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
+++
b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
@@ -18,21 +18,20 @@
package org.apache.hudi.common.table.log.block;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.HoodieAvroWriteSupport;
import org.apache.hudi.common.fs.inline.InLineFSUtils;
import org.apache.hudi.common.fs.inline.InLineFileSystem;
import org.apache.hudi.common.util.ClosableIterator;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ParquetReaderIterator;
-import org.apache.hudi.io.storage.HoodieAvroParquetConfig;
+import org.apache.hudi.io.storage.HoodieParquetConfig;
import org.apache.hudi.io.storage.HoodieParquetStreamWriter;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.Path;
import org.apache.parquet.avro.AvroParquetReader;
import org.apache.parquet.avro.AvroReadSupport;
import org.apache.parquet.avro.AvroSchemaConverter;
@@ -43,7 +42,6 @@ import org.apache.parquet.hadoop.util.HadoopInputFile;
import org.apache.parquet.io.InputFile;
import javax.annotation.Nonnull;
-
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.HashMap;
@@ -97,8 +95,8 @@ public class HoodieParquetDataBlock extends HoodieDataBlock {
HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(
new AvroSchemaConverter().convert(writerSchema), writerSchema,
Option.empty());
- HoodieAvroParquetConfig avroParquetConfig =
- new HoodieAvroParquetConfig(
+ HoodieParquetConfig<HoodieAvroWriteSupport> avroParquetConfig =
+ new HoodieParquetConfig<>(
writeSupport,
compressionCodecName.get(),
ParquetWriter.DEFAULT_BLOCK_SIZE,
diff --git
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java
deleted file mode 100644
index 1a10e6a716..0000000000
---
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.io.storage;
-
-import org.apache.hudi.avro.HoodieAvroWriteSupport;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.parquet.hadoop.metadata.CompressionCodecName;
-
-/**
- * ParquetConfig for writing avro records in Parquet files.
- */
-public class HoodieAvroParquetConfig extends
HoodieBaseParquetConfig<HoodieAvroWriteSupport> {
-
- public HoodieAvroParquetConfig(HoodieAvroWriteSupport writeSupport,
CompressionCodecName compressionCodecName,
- int blockSize, int pageSize, long
maxFileSize, Configuration hadoopConf,
- double compressionRatio) {
- super(writeSupport, compressionCodecName, blockSize, pageSize,
maxFileSize, hadoopConf, compressionRatio);
- }
-
- public HoodieAvroParquetConfig(HoodieAvroWriteSupport writeSupport,
CompressionCodecName compressionCodecName,
- int blockSize, int pageSize, long maxFileSize, Configuration hadoopConf,
- double compressionRatio, boolean directoryEnabled) {
- super(writeSupport, compressionCodecName, blockSize, pageSize,
maxFileSize, hadoopConf, compressionRatio, directoryEnabled);
- }
-}
diff --git
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetConfig.java
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetConfig.java
similarity index 82%
rename from
hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetConfig.java
rename to
hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetConfig.java
index 6db1de012c..77fea6beee 100644
---
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetConfig.java
+++
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetConfig.java
@@ -25,7 +25,7 @@ import
org.apache.parquet.hadoop.metadata.CompressionCodecName;
* Base ParquetConfig to hold config params for writing to Parquet.
* @param <T>
*/
-public class HoodieBaseParquetConfig<T> {
+public class HoodieParquetConfig<T> {
private final T writeSupport;
private final CompressionCodecName compressionCodecName;
private final int blockSize;
@@ -35,13 +35,13 @@ public class HoodieBaseParquetConfig<T> {
private final double compressionRatio;
private final boolean dictionaryEnabled;
- public HoodieBaseParquetConfig(T writeSupport, CompressionCodecName
compressionCodecName, int blockSize,
- int pageSize, long maxFileSize, Configuration hadoopConf, double
compressionRatio) {
+ public HoodieParquetConfig(T writeSupport, CompressionCodecName
compressionCodecName, int blockSize,
+ int pageSize, long maxFileSize, Configuration
hadoopConf, double compressionRatio) {
this(writeSupport, compressionCodecName, blockSize, pageSize, maxFileSize,
hadoopConf, compressionRatio, false);
}
- public HoodieBaseParquetConfig(T writeSupport, CompressionCodecName
compressionCodecName, int blockSize,
- int pageSize, long maxFileSize, Configuration
hadoopConf, double compressionRatio, boolean dictionaryEnabled) {
+ public HoodieParquetConfig(T writeSupport, CompressionCodecName
compressionCodecName, int blockSize,
+ int pageSize, long maxFileSize, Configuration
hadoopConf, double compressionRatio, boolean dictionaryEnabled) {
this.writeSupport = writeSupport;
this.compressionCodecName = compressionCodecName;
this.blockSize = blockSize;
diff --git
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java
index a273601824..c8f78c3501 100644
---
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java
+++
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java
@@ -38,7 +38,7 @@ public class HoodieParquetStreamWriter<R extends
IndexedRecord> implements AutoC
private final HoodieAvroWriteSupport writeSupport;
public HoodieParquetStreamWriter(FSDataOutputStream outputStream,
- HoodieAvroParquetConfig parquetConfig)
throws IOException {
+ HoodieParquetConfig<HoodieAvroWriteSupport>
parquetConfig) throws IOException {
this.writeSupport = parquetConfig.getWriteSupport();
this.writer = new Builder<R>(new
OutputStreamBackedOutputFile(outputStream), writeSupport)
.withWriteMode(ParquetFileWriter.Mode.CREATE)