This is an automated email from the ASF dual-hosted git repository.
zivanfi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 97a880c Experiment.
97a880c is described below
commit 97a880cfc4fc3c2c74ff1302bc6e4aab1582b6df
Author: Zoltan Ivanfi <[email protected]>
AuthorDate: Fri Oct 26 15:08:18 2018 +0200
Experiment.
---
parquet-benchmarks/run.sh | 4 ++-
.../apache/parquet/benchmarks/WriteBenchmarks.java | 16 +++++-----
...enchmarks.java => WriteBenchmarksParquet1.java} | 36 +++++++++++-----------
.../hadoop/InternalParquetRecordWriter.java | 2 +-
4 files changed, 30 insertions(+), 28 deletions(-)
diff --git a/parquet-benchmarks/run.sh b/parquet-benchmarks/run.sh
index cfaddae..dfb0297 100755
--- a/parquet-benchmarks/run.sh
+++ b/parquet-benchmarks/run.sh
@@ -22,7 +22,9 @@
SCRIPT_PATH=$( cd "$(dirname "$0")" ; pwd -P )
echo "Starting WRITE benchmarks"
-java -jar ${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write* "$@"
+java -XX:+PreserveFramePointer -jar
${SCRIPT_PATH}/target/parquet-benchmarks.jar p*Write* -wi 0 -i 1 -f 3 -tu s -bm
ss -rf json
+exit 0
+
echo "Generating test data"
java -cp ${SCRIPT_PATH}/target/parquet-benchmarks.jar
org.apache.parquet.benchmarks.DataGenerator generate
echo "Data generated, starting READ benchmarks"
diff --git
a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
index 5c26a84..265c5ec 100644
---
a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
+++
b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
@@ -55,7 +55,7 @@ public class WriteBenchmarks {
PAGE_SIZE_DEFAULT,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
@Benchmark
@@ -69,7 +69,7 @@ public class WriteBenchmarks {
PAGE_SIZE_4M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
@Benchmark
@@ -83,7 +83,7 @@ public class WriteBenchmarks {
PAGE_SIZE_8M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
@Benchmark
@@ -97,7 +97,7 @@ public class WriteBenchmarks {
PAGE_SIZE_4M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
@Benchmark
@@ -111,7 +111,7 @@ public class WriteBenchmarks {
PAGE_SIZE_8M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
//TODO how to handle lzo jar?
@@ -126,7 +126,7 @@ public class WriteBenchmarks {
// PAGE_SIZE_DEFAULT,
// FIXED_LEN_BYTEARRAY_SIZE,
// LZO,
-// ONE_MILLION);
+// 50 * ONE_MILLION);
// }
@Benchmark
@@ -140,7 +140,7 @@ public class WriteBenchmarks {
PAGE_SIZE_DEFAULT,
FIXED_LEN_BYTEARRAY_SIZE,
SNAPPY,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
@Benchmark
@@ -154,6 +154,6 @@ public class WriteBenchmarks {
PAGE_SIZE_DEFAULT,
FIXED_LEN_BYTEARRAY_SIZE,
GZIP,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
}
diff --git
a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java
similarity index 86%
copy from
parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
copy to
parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java
index 5c26a84..edd87ba 100644
---
a/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarks.java
+++
b/parquet-benchmarks/src/main/java/org/apache/parquet/benchmarks/WriteBenchmarksParquet1.java
@@ -29,13 +29,13 @@ import static
org.apache.parquet.benchmarks.BenchmarkFiles.*;
import java.io.IOException;
-import static
org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_2_0;
+import static
org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_1_0;
import static org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP;
import static org.apache.parquet.hadoop.metadata.CompressionCodecName.SNAPPY;
import static
org.apache.parquet.hadoop.metadata.CompressionCodecName.UNCOMPRESSED;
@State(Thread)
-public class WriteBenchmarks {
+public class WriteBenchmarksParquet1 {
private DataGenerator dataGenerator = new DataGenerator();
@Setup(Level.Iteration)
@@ -50,12 +50,12 @@ public class WriteBenchmarks {
{
dataGenerator.generateData(file_1M,
configuration,
- PARQUET_2_0,
+ PARQUET_1_0,
BLOCK_SIZE_DEFAULT,
PAGE_SIZE_DEFAULT,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
@Benchmark
@@ -64,12 +64,12 @@ public class WriteBenchmarks {
{
dataGenerator.generateData(file_1M_BS256M_PS4M,
configuration,
- PARQUET_2_0,
+ PARQUET_1_0,
BLOCK_SIZE_256M,
PAGE_SIZE_4M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
@Benchmark
@@ -78,12 +78,12 @@ public class WriteBenchmarks {
{
dataGenerator.generateData(file_1M_BS256M_PS8M,
configuration,
- PARQUET_2_0,
+ PARQUET_1_0,
BLOCK_SIZE_256M,
PAGE_SIZE_8M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
@Benchmark
@@ -92,12 +92,12 @@ public class WriteBenchmarks {
{
dataGenerator.generateData(file_1M_BS512M_PS4M,
configuration,
- PARQUET_2_0,
+ PARQUET_1_0,
BLOCK_SIZE_512M,
PAGE_SIZE_4M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
@Benchmark
@@ -106,12 +106,12 @@ public class WriteBenchmarks {
{
dataGenerator.generateData(file_1M_BS512M_PS8M,
configuration,
- PARQUET_2_0,
+ PARQUET_1_0,
BLOCK_SIZE_512M,
PAGE_SIZE_8M,
FIXED_LEN_BYTEARRAY_SIZE,
UNCOMPRESSED,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
//TODO how to handle lzo jar?
@@ -121,12 +121,12 @@ public class WriteBenchmarks {
// {
// dataGenerator.generateData(parquetFile_1M_LZO,
// configuration,
-// WriterVersion.PARQUET_2_0,
+// WriterVersion.PARQUET_1_0,
// BLOCK_SIZE_DEFAULT,
// PAGE_SIZE_DEFAULT,
// FIXED_LEN_BYTEARRAY_SIZE,
// LZO,
-// ONE_MILLION);
+// 50 * ONE_MILLION);
// }
@Benchmark
@@ -135,12 +135,12 @@ public class WriteBenchmarks {
{
dataGenerator.generateData(file_1M_SNAPPY,
configuration,
- PARQUET_2_0,
+ PARQUET_1_0,
BLOCK_SIZE_DEFAULT,
PAGE_SIZE_DEFAULT,
FIXED_LEN_BYTEARRAY_SIZE,
SNAPPY,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
@Benchmark
@@ -149,11 +149,11 @@ public class WriteBenchmarks {
{
dataGenerator.generateData(file_1M_GZIP,
configuration,
- PARQUET_2_0,
+ PARQUET_1_0,
BLOCK_SIZE_DEFAULT,
PAGE_SIZE_DEFAULT,
FIXED_LEN_BYTEARRAY_SIZE,
GZIP,
- ONE_MILLION);
+ 50 * ONE_MILLION);
}
}
diff --git
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
index d8af379..8b79dca 100644
---
a/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
+++
b/parquet-hadoop/src/main/java/org/apache/parquet/hadoop/InternalParquetRecordWriter.java
@@ -43,7 +43,7 @@ class InternalParquetRecordWriter<T> {
private static final Logger LOG =
LoggerFactory.getLogger(InternalParquetRecordWriter.class);
private static final int MINIMUM_RECORD_COUNT_FOR_CHECK = 100;
- private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 10000;
+ private static final int MAXIMUM_RECORD_COUNT_FOR_CHECK = 100;
private final ParquetFileWriter parquetFileWriter;
private final WriteSupport<T> writeSupport;