Repository: parquet-mr Updated Branches: refs/heads/master 4fd34e651 -> 98c27699c
PARQUET-321: Default maximum block padding to 8MB. rdblue's change applied to the newest code. Original pull request: https://github.com/apache/parquet-mr/pull/232/ Author: Zoltan Ivanfi <[email protected]> Closes #391 from zicl/master and squashes the following commits: b1c5c1d [Zoltan Ivanfi] PARQUET-321: Default maximum block padding to 8MB. Project: http://git-wip-us.apache.org/repos/asf/parquet-mr/repo Commit: http://git-wip-us.apache.org/repos/asf/parquet-mr/commit/98c27699 Tree: http://git-wip-us.apache.org/repos/asf/parquet-mr/tree/98c27699 Diff: http://git-wip-us.apache.org/repos/asf/parquet-mr/diff/98c27699 Branch: refs/heads/master Commit: 98c27699cbcf65c3d9d655ecbcd67adcd8b45b05 Parents: 4fd34e6 Author: Zoltan Ivanfi <[email protected]> Authored: Wed Dec 7 11:07:03 2016 -0800 Committer: Ryan Blue <[email protected]> Committed: Wed Dec 7 11:07:03 2016 -0800 ---------------------------------------------------------------------- .../java/org/apache/parquet/hadoop/ParquetOutputFormat.java | 6 +----- .../src/main/java/org/apache/parquet/hadoop/ParquetWriter.java | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/98c27699/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java index bd20360..78af765 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetOutputFormat.java @@ -145,9 +145,6 @@ public class ParquetOutputFormat<T> extends FileOutputFormat<Void, T> { public static final String MAX_ROW_COUNT_FOR_PAGE_SIZE_CHECK = "parquet.page.size.row.check.max"; public static final String ESTIMATE_PAGE_SIZE_CHECK = "parquet.page.size.check.estimate"; - // default to no padding for now - private static final int DEFAULT_MAX_PADDING_SIZE = 0; - public static JobSummaryLevel getJobSummaryLevel(Configuration conf) { String level = conf.get(JOB_SUMMARY_LEVEL); String deprecatedFlag = conf.get(ENABLE_JOB_SUMMARY); @@ -313,8 +310,7 @@ public class ParquetOutputFormat<T> extends FileOutputFormat<Void, T> { } private static int getMaxPaddingSize(Configuration conf) { - // default to no padding, 0% of the row group size - return conf.getInt(MAX_PADDING_BYTES, DEFAULT_MAX_PADDING_SIZE); + return conf.getInt(MAX_PADDING_BYTES, ParquetWriter.MAX_PADDING_SIZE_DEFAULT); } private WriteSupport<T> writeSupport; http://git-wip-us.apache.org/repos/asf/parquet-mr/blob/98c27699/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java ---------------------------------------------------------------------- diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java index 58cbe95..9512b93 100644 --- a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java +++ b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/ParquetWriter.java @@ -49,7 +49,7 @@ public class ParquetWriter<T> implements Closeable { public static final String OBJECT_MODEL_NAME_PROP = "writer.model.name"; // max size (bytes) to write as padding and the min size of a row group - public static final int MAX_PADDING_SIZE_DEFAULT = 0; + public static final int MAX_PADDING_SIZE_DEFAULT = 8 * 1024 * 1024; // 8MB private final InternalParquetRecordWriter<T> writer; private final CodecFactory codecFactory;
