This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new b07da59957 Fix Spark JMH Benchmarks (#6385)
b07da59957 is described below
commit b07da59957f512972ed83411b6faacb3689034ab
Author: Eduard Tudenhöfner <[email protected]>
AuthorDate: Thu Dec 8 17:03:01 2022 +0100
Fix Spark JMH Benchmarks (#6385)
---
.../spark/data/parquet/SparkParquetReadersFlatDataBenchmark.java | 3 ++-
.../spark/data/parquet/SparkParquetReadersNestedDataBenchmark.java | 3 ++-
.../spark/data/parquet/SparkParquetWritersFlatDataBenchmark.java | 1 +
.../spark/data/parquet/SparkParquetWritersNestedDataBenchmark.java | 1 +
4 files changed, 6 insertions(+), 2 deletions(-)
diff --git
a/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetReadersFlatDataBenchmark.java
b/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetReadersFlatDataBenchmark.java
index 24753f546d..63f111a37d 100644
---
a/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetReadersFlatDataBenchmark.java
+++
b/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetReadersFlatDataBenchmark.java
@@ -88,7 +88,7 @@ public class SparkParquetReadersFlatDataBenchmark {
required(1, "longCol", Types.LongType.get()),
optional(5, "decimalCol", Types.DecimalType.of(20, 5)),
optional(8, "stringCol", Types.StringType.get()));
- private static final int NUM_RECORDS = 10000000;
+ private static final int NUM_RECORDS = 1000000;
private File dataFile;
@Setup
@@ -155,6 +155,7 @@ public class SparkParquetReadersFlatDataBenchmark {
.set("spark.sql.parquet.binaryAsString", "false")
.set("spark.sql.parquet.int96AsTimestamp", "false")
.set("spark.sql.caseSensitive", "false")
+ .set("spark.sql.parquet.fieldId.write.enabled", "false")
.callInit()
.build()) {
diff --git
a/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetReadersNestedDataBenchmark.java
b/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetReadersNestedDataBenchmark.java
index 10aa46840b..7a47d7ca53 100644
---
a/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetReadersNestedDataBenchmark.java
+++
b/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetReadersNestedDataBenchmark.java
@@ -86,7 +86,7 @@ public class SparkParquetReadersNestedDataBenchmark {
private static final Schema PROJECTED_SCHEMA =
new Schema(
optional(4, "nested", Types.StructType.of(required(1, "col1",
Types.StringType.get()))));
- private static final int NUM_RECORDS = 10000000;
+ private static final int NUM_RECORDS = 1000000;
private File dataFile;
@Setup
@@ -153,6 +153,7 @@ public class SparkParquetReadersNestedDataBenchmark {
.set("spark.sql.parquet.binaryAsString", "false")
.set("spark.sql.parquet.int96AsTimestamp", "false")
.set("spark.sql.caseSensitive", "false")
+ .set("spark.sql.parquet.fieldId.write.enabled", "false")
.callInit()
.build()) {
diff --git
a/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetWritersFlatDataBenchmark.java
b/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetWritersFlatDataBenchmark.java
index 87d95081a7..f104b8b88b 100644
---
a/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetWritersFlatDataBenchmark.java
+++
b/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetWritersFlatDataBenchmark.java
@@ -120,6 +120,7 @@ public class SparkParquetWritersFlatDataBenchmark {
.set("spark.sql.parquet.int96AsTimestamp", "false")
.set("spark.sql.parquet.outputTimestampType", "TIMESTAMP_MICROS")
.set("spark.sql.caseSensitive", "false")
+ .set("spark.sql.parquet.fieldId.write.enabled", "false")
.schema(SCHEMA)
.build()) {
diff --git
a/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetWritersNestedDataBenchmark.java
b/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetWritersNestedDataBenchmark.java
index 405db2294f..e375d1c56a 100644
---
a/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetWritersNestedDataBenchmark.java
+++
b/spark/v3.3/spark/src/jmh/java/org/apache/iceberg/spark/data/parquet/SparkParquetWritersNestedDataBenchmark.java
@@ -120,6 +120,7 @@ public class SparkParquetWritersNestedDataBenchmark {
.set("spark.sql.parquet.int96AsTimestamp", "false")
.set("spark.sql.parquet.outputTimestampType", "TIMESTAMP_MICROS")
.set("spark.sql.caseSensitive", "false")
+ .set("spark.sql.parquet.fieldId.write.enabled", "false")
.schema(SCHEMA)
.build()) {