This is an automated email from the ASF dual-hosted git repository.
zivanfi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 4d9a2fd Revert "Experiment."
4d9a2fd is described below
commit 4d9a2fd01f33858bd5eb392a5f7bd0967fbec3f8
Author: Zoltan Ivanfi <[email protected]>
AuthorDate: Wed Nov 21 18:27:42 2018 +0100
Revert "Experiment."
This reverts commit 97a880cfc4fc3c2c74ff1302bc6e4aab1582b6df.
---
parquet-benchmarks/run.sh | 4 +-
.../apache/parquet/benchmarks/WriteBenchmarks.java | 16 +--
.../benchmarks/WriteBenchmarksParquet1.java | 159 ---------------------
.../hadoop/InternalParquetRecordWriter.java | 2 +-
4 files changed, 10 insertions(+), 171 deletions(-)
diff --git a/parquet-benchmarks/run.sh b/parquet-benchmarks/run.sh
index dfb0297..cfaddae 100755
--- a/parquet-benchmarks/run.sh
+++ b/parquet-benchmarks/run.sh
@@ -22,9 +22,7 @@
SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P )
echo "Starting WRITE benchmarks"
-java -XX:+PreserveFramePointer -jar
${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write* -wi 0 -i 1 -f 3 -tu s -bm
ss -rf json
-exit 0
-
+java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write* "$@"
echo "Generating test data"
java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar
org.apache.parquet.benchmarks.DataGenerator generate
echo "Data generated, starting READ benchmarks"
diff --git
a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
index 265c5ec..5c26a84 100644
---
a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
+++
b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
@@ -55,7 +55,7 @@ public class WriteBenchmarks {
PAGE_SIZE_DEFAULT,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- 50 * ONE_MILLION);
+ ONE_MILLION);
}
@Benchmark
@@ -69,7 +69,7 @@ public class WriteBenchmarks {
PAGE_SIZE_4M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- 50 * ONE_MILLION);
+ ONE_MILLION);
}
@Benchmark
@@ -83,7 +83,7 @@ public class WriteBenchmarks {
PAGE_SIZE_8M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- 50 * ONE_MILLION);
+ ONE_MILLION);
}
@Benchmark
@@ -97,7 +97,7 @@ public class WriteBenchmarks {
PAGE_SIZE_4M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- 50 * ONE_MILLION);
+ ONE_MILLION);
}
@Benchmark
@@ -111,7 +111,7 @@ public class WriteBenchmarks {
PAGE_SIZE_8M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- 50 * ONE_MILLION);
+ ONE_MILLION);
}
//TODO how to handle lzo jar?
@@ -126,7 +126,7 @@ public class WriteBenchmarks {
// PAGE_SIZE_DEFAULT,
// FIXED_LEN_BYTEARRAY_SIZE,
// LZO,
-// 50 * ONE_MILLION);
+// ONE_MILLION);
// }
@Benchmark
@@ -140,7 +140,7 @@ public class WriteBenchmarks {
PAGE_SIZE_DEFAULT,
FIXED_LEN_BYTEARRAY_SIZE,
SNAPPY,
- 50 * ONE_MILLION);
+ ONE_MILLION);
}
@Benchmark
@@ -154,6 +154,6 @@ public class WriteBenchmarks {
PAGE_SIZE_DEFAULT,
FIXED_LEN_BYTEARRAY_SIZE,
GZIP,
- 50 * ONE_MILLION);
+ ONE_MILLION);
}
}
diff --git
a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java
b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java
deleted file mode 100644
index edd87ba..0000000
---
a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.parquet.benchmarks;
-
-import org.openjdk.jmh.annotations.Benchmark;
-import org.openjdk.jmh.annotations.Level;
-import org.openjdk.jmh.annotations.Setup;
-import org.openjdk.jmh.annotations.State;
-
-import static org.openjdk.jmh.annotations.Scope.Thread;
-import static org.apache.parquet.benchmarks.BenchmarkConstants.*;
-import static org.apache.parquet.benchmarks.BenchmarkFiles.*;
-
-import java.io.IOException;
-
-import static
org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_1_0;
-import static org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP;
-import static org.apache.parquet.hadoop.metadata.CompressionCodecName.SNAPPY;
-import static
org.apache.parquet.hadoop.metadata.CompressionCodecName.UNCOMPRESSED;
-
-@State(Thread)
-public class WriteBenchmarksParquet1 {
- private DataGenerator dataGenerator = new DataGenerator();
-
- @Setup(Level.Iteration)
- public void cleanup() {
- //clean existing test data at the beginning of each iteration
- dataGenerator.cleanup();
- }
-
- @Benchmark
- public void write1MRowsDefaultBlockAndPageSizeUncompressed()
- throws IOException
- {
- dataGenerator.generateData(file_1M,
- configuration,
- PARQUET_1_0,
- BLOCK_SIZE_DEFAULT,
- PAGE_SIZE_DEFAULT,
- FIXED_LEN_BYTEARRAY_SIZE,
- UNCOMPRESSED,
- 50 * ONE_MILLION);
- }
-
- @Benchmark
- public void write1MRowsBS256MPS4MUncompressed()
- throws IOException
- {
- dataGenerator.generateData(file_1M_BS256M_PS4M,
- configuration,
- PARQUET_1_0,
- BLOCK_SIZE_256M,
- PAGE_SIZE_4M,
- FIXED_LEN_BYTEARRAY_SIZE,
- UNCOMPRESSED,
- 50 * ONE_MILLION);
- }
-
- @Benchmark
- public void write1MRowsBS256MPS8MUncompressed()
- throws IOException
- {
- dataGenerator.generateData(file_1M_BS256M_PS8M,
- configuration,
- PARQUET_1_0,
- BLOCK_SIZE_256M,
- PAGE_SIZE_8M,
- FIXED_LEN_BYTEARRAY_SIZE,
- UNCOMPRESSED,
- 50 * ONE_MILLION);
- }
-
- @Benchmark
- public void write1MRowsBS512MPS4MUncompressed()
- throws IOException
- {
- dataGenerator.generateData(file_1M_BS512M_PS4M,
- configuration,
- PARQUET_1_0,
- BLOCK_SIZE_512M,
- PAGE_SIZE_4M,
- FIXED_LEN_BYTEARRAY_SIZE,
- UNCOMPRESSED,
- 50 * ONE_MILLION);
- }
-
- @Benchmark
- public void write1MRowsBS512MPS8MUncompressed()
- throws IOException
- {
- dataGenerator.generateData(file_1M_BS512M_PS8M,
- configuration,
- PARQUET_1_0,
- BLOCK_SIZE_512M,
- PAGE_SIZE_8M,
- FIXED_LEN_BYTEARRAY_SIZE,
- UNCOMPRESSED,
- 50 * ONE_MILLION);
- }
-
- //TODO how to handle lzo jar?
-// @Benchmark
-// public void write1MRowsDefaultBlockAndPageSizeLZO()
-// throws IOException
-// {
-// dataGenerator.generateData(parquetFile_1M_LZO,
-// configuration,
-// WriterVersion.PARQUET_1_0,
-// BLOCK_SIZE_DEFAULT,
-// PAGE_SIZE_DEFAULT,
-// FIXED_LEN_BYTEARRAY_SIZE,
-// LZO,
-// 50 * ONE_MILLION);
-// }
-
- @Benchmark
- public void write1MRowsDefaultBlockAndPageSizeSNAPPY()
- throws IOException
- {
- dataGenerator.generateData(file_1M_SNAPPY,
- configuration,
- PARQUET_1_0,
- BLOCK_SIZE_DEFAULT,
- PAGE_SIZE_DEFAULT,
- FIXED_LEN_BYTEARRAY_SIZE,
- SNAPPY,
- 50 * ONE_MILLION);
- }
-
- @Benchmark
- public void write1MRowsDefaultBlockAndPageSizeGZIP()
- throws IOException
- {
- dataGenerator.generateData(file_1M_GZIP,
- configuration,
- PARQUET_1_0,
- BLOCK_SIZE_DEFAULT,
- PAGE_SIZE_DEFAULT,
- FIXED_LEN_BYTEARRAY_SIZE,
- GZIP,
- 50 * ONE_MILLION);
- }
-}
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
index 8b79dca..d8af379 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
@@ -43,7 +43,7 @@ class InternalParquetRecordWriter<T> {
private static final Logger LOG =
LoggerFactory.getLogger(InternalParquetRecordWriter.class);
private static final int MINIMUM_RECORD_COUNT_FOR_CHECK = 100;
- private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 100;
+ private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 10000;
private final ParquetFileWriter parquetFileWriter;
private final WriteSupport<T> writeSupport;