This is an automated email from the ASF dual-hosted git repository.
vinoth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 48797b1 Add compression codec configurations for HoodieParquetWriter.
48797b1 is described below
commit 48797b1ae1bf3ad08908d43e3fdc5824e3d79eee
Author: kaka11chen <[email protected]>
AuthorDate: Sat Mar 16 01:52:41 2019 +0800
Add compression codec configurations for HoodieParquetWriter.
---
.../main/java/com/uber/hoodie/config/HoodieStorageConfig.java | 10 ++++++++++
.../main/java/com/uber/hoodie/config/HoodieWriteConfig.java | 5 +++++
.../com/uber/hoodie/io/storage/HoodieStorageWriterFactory.java | 3 +--
3 files changed, 16 insertions(+), 2 deletions(-)
diff --git
a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java
b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java
index 0403b62..9a2a3fb 100644
---
a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java
+++
b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieStorageConfig.java
@@ -44,6 +44,9 @@ public class HoodieStorageConfig extends DefaultHoodieConfig {
public static final String PARQUET_COMPRESSION_RATIO =
"hoodie.parquet.compression.ratio";
// Default compression ratio for parquet
public static final String DEFAULT_STREAM_COMPRESSION_RATIO =
String.valueOf(0.1);
+ public static final String PARQUET_COMPRESSION_CODEC =
"hoodie.parquet.compression.codec";
+ // Default compression codec for parquet
+ public static final String DEFAULT_PARQUET_COMPRESSION_CODEC = "gzip";
public static final String LOGFILE_TO_PARQUET_COMPRESSION_RATIO =
"hoodie.logfile.to.parquet.compression.ratio";
// Default compression ratio for log file to parquet, general 3x
public static final String DEFAULT_LOGFILE_TO_PARQUET_COMPRESSION_RATIO =
String.valueOf(0.35);
@@ -105,6 +108,11 @@ public class HoodieStorageConfig extends
DefaultHoodieConfig {
return this;
}
+ public Builder parquetCompressionCodec(String parquetCompressionCodec) {
+ props.setProperty(PARQUET_COMPRESSION_CODEC, parquetCompressionCodec);
+ return this;
+ }
+
public Builder logFileToParquetCompressionRatio(double
logFileToParquetCompressionRatio) {
props.setProperty(LOGFILE_TO_PARQUET_COMPRESSION_RATIO,
String.valueOf(logFileToParquetCompressionRatio));
return this;
@@ -124,6 +132,8 @@ public class HoodieStorageConfig extends
DefaultHoodieConfig {
LOGFILE_SIZE_MAX_BYTES, DEFAULT_LOGFILE_SIZE_MAX_BYTES);
setDefaultOnCondition(props,
!props.containsKey(PARQUET_COMPRESSION_RATIO),
PARQUET_COMPRESSION_RATIO, DEFAULT_STREAM_COMPRESSION_RATIO);
+ setDefaultOnCondition(props,
!props.containsKey(PARQUET_COMPRESSION_CODEC),
+ PARQUET_COMPRESSION_CODEC, DEFAULT_PARQUET_COMPRESSION_CODEC);
setDefaultOnCondition(props,
!props.containsKey(LOGFILE_TO_PARQUET_COMPRESSION_RATIO),
LOGFILE_TO_PARQUET_COMPRESSION_RATIO,
DEFAULT_LOGFILE_TO_PARQUET_COMPRESSION_RATIO);
return config;
diff --git
a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java
b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java
index 07ddc96..d1a5e46 100644
--- a/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java
+++ b/hoodie-client/src/main/java/com/uber/hoodie/config/HoodieWriteConfig.java
@@ -30,6 +30,7 @@ import java.io.InputStream;
import java.util.Map;
import java.util.Properties;
import javax.annotation.concurrent.Immutable;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.spark.storage.StorageLevel;
/**
@@ -355,6 +356,10 @@ public class HoodieWriteConfig extends DefaultHoodieConfig
{
return
Double.valueOf(props.getProperty(HoodieStorageConfig.PARQUET_COMPRESSION_RATIO));
}
+ public CompressionCodecName getParquetCompressionCodec() {
+ return
CompressionCodecName.fromConf(props.getProperty(HoodieStorageConfig.PARQUET_COMPRESSION_CODEC));
+ }
+
public double getLogFileToParquetCompressionRatio() {
return
Double.valueOf(props.getProperty(HoodieStorageConfig.LOGFILE_TO_PARQUET_COMPRESSION_RATIO));
}
diff --git
a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriterFactory.java
b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriterFactory.java
index 2e83108..13874ca 100644
---
a/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriterFactory.java
+++
b/hoodie-client/src/main/java/com/uber/hoodie/io/storage/HoodieStorageWriterFactory.java
@@ -26,7 +26,6 @@ import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.avro.AvroSchemaConverter;
-import org.apache.parquet.hadoop.metadata.CompressionCodecName;
public class HoodieStorageWriterFactory {
@@ -47,7 +46,7 @@ public class HoodieStorageWriterFactory {
new AvroSchemaConverter().convert(schema), schema, filter);
HoodieParquetConfig parquetConfig =
- new HoodieParquetConfig(writeSupport, CompressionCodecName.GZIP,
+ new HoodieParquetConfig(writeSupport,
config.getParquetCompressionCodec(),
config.getParquetBlockSize(), config.getParquetPageSize(),
config.getParquetMaxFileSize(), hoodieTable.getHadoopConf(),
config.getParquetCompressionRatio());