This is an automated email from the ASF dual-hosted git repository.
william pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/main by this push:
new 594859674 ORC-1192: Use `orc.zstd` instead of `orc.none` (#1144)
594859674 is described below
commit 594859674e87fb7c7a79bf96d453bd6f8f1c0e47
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Sun May 29 21:05:33 2022 -0700
ORC-1192: Use `orc.zstd` instead of `orc.none` (#1144)
### What changes were proposed in this pull request?
This PR aims to use `orc.zstd` as the default data source in the benchmarks.
### Why are the changes needed?
After ORC-1109, we use `orc.zstd` by default instead of `orc.none`.
**BEFORE**
```
$ git grep 'none"' | grep Benchmark
java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java:
@Param({"none", "snappy", "gz"})
java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java:
@Param({"none", "gz", "snappy"})
java/bench/hive/src/java/org/apache/orc/bench/hive/RowFilterProjectionBenchmark.java:
@Param({"none"})
java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/BooleanRowFilterBenchmark.java:
String dataRelativePath = "data/generated/sales/orc.none";
java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/DecimalRowFilterBenchmark.java:
String dataRelativePath = "data/generated/taxi/orc.none";
java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/DoubleRowFilterBenchmark.java:
String dataRelativePath = "data/generated/taxi/orc.none";
java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/StringRowFilterBenchmark.java:
String dataRelativePath = "data/generated/sales/orc.none";
java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/TimestampRowFilterBenchmark.java:
String dataRelativePath = "data/generated/taxi/orc.none";
java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java:
@Param({"none", "gz", "snappy"})
```
**AFTER**
```
$ git grep 'none"' | grep Benchmark
$
```
### How was this patch tested?
Pass the CIs.
```
$ java -jar hive/target/orc-benchmarks-hive-*-uber.jar decimal data
Benchmark (version) Mode Cnt Score Error Units
DecimalBench.read ORIGINAL avgt 5 401602.268 ± 2813.756 us/op
DecimalBench.read USE_DECIMAL64 avgt 5 185082.089 ± 4557.199 us/op
DecimalBench.write ORIGINAL avgt 5 42553.202 ± 179.651 us/op
DecimalBench.write USE_DECIMAL64 avgt 5 14512.227 ± 41.197 us/op
```
---
.../src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java | 2 +-
java/bench/hive/src/java/org/apache/orc/bench/hive/DecimalBench.java | 2 +-
.../hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java | 2 +-
.../java/org/apache/orc/bench/hive/RowFilterProjectionBenchmark.java | 2 +-
.../apache/orc/bench/hive/rowfilter/BooleanRowFilterBenchmark.java | 2 +-
.../apache/orc/bench/hive/rowfilter/DecimalRowFilterBenchmark.java | 2 +-
.../org/apache/orc/bench/hive/rowfilter/DoubleRowFilterBenchmark.java | 2 +-
.../org/apache/orc/bench/hive/rowfilter/StringRowFilterBenchmark.java | 2 +-
.../apache/orc/bench/hive/rowfilter/TimestampRowFilterBenchmark.java | 2 +-
.../spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java | 4 ++--
10 files changed, 11 insertions(+), 11 deletions(-)
diff --git
a/java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java
b/java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java
index dce396ecb..ce0d1b705 100644
---
a/java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java
+++
b/java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java
@@ -63,7 +63,7 @@ public class ColumnProjectionBenchmark implements
OrcBenchmark {
@Param({ "github", "sales", "taxi"})
public String dataset;
- @Param({"none", "snappy", "gz"})
+ @Param({"snappy", "gz", "zstd"})
public String compression;
@Override
diff --git
a/java/bench/hive/src/java/org/apache/orc/bench/hive/DecimalBench.java
b/java/bench/hive/src/java/org/apache/orc/bench/hive/DecimalBench.java
index ea2ecf2eb..39a346e9d 100644
--- a/java/bench/hive/src/java/org/apache/orc/bench/hive/DecimalBench.java
+++ b/java/bench/hive/src/java/org/apache/orc/bench/hive/DecimalBench.java
@@ -225,7 +225,7 @@ public class DecimalBench implements OrcBenchmark {
@Setup
public void setup() throws IOException {
fs = FileSystem.getLocal(conf).getRaw();
- path = new Path(root, "generated/taxi/orc.none");
+ path = new Path(root, "generated/taxi/orc.zstd");
schema = Utilities.loadSchema("taxi.schema");
batch = schema.createRowBatch(version, 1024);
// only include the columns with decimal values
diff --git
a/java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java
b/java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java
index c3efdeb17..dc1bcca92 100644
--- a/java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java
+++ b/java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java
@@ -72,7 +72,7 @@ public class FullReadBenchmark implements OrcBenchmark {
@Param({"taxi", "sales", "github"})
public String dataset;
- @Param({"none", "gz", "snappy"})
+ @Param({"gz", "snappy", "zstd"})
public String compression;
@Override
diff --git
a/java/bench/hive/src/java/org/apache/orc/bench/hive/RowFilterProjectionBenchmark.java
b/java/bench/hive/src/java/org/apache/orc/bench/hive/RowFilterProjectionBenchmark.java
index 977b4f686..a523caf7e 100644
---
a/java/bench/hive/src/java/org/apache/orc/bench/hive/RowFilterProjectionBenchmark.java
+++
b/java/bench/hive/src/java/org/apache/orc/bench/hive/RowFilterProjectionBenchmark.java
@@ -58,7 +58,7 @@ public class RowFilterProjectionBenchmark implements
OrcBenchmark {
@Param({"taxi"})
public String dataset;
- @Param({"none"})
+ @Param({"zstd"})
public String compression;
@Param({"0.01", "0.1", "0.2", "0.4", "0.6", "0.8", "1."})
diff --git
a/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/BooleanRowFilterBenchmark.java
b/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/BooleanRowFilterBenchmark.java
index 606d5c52b..5307ea6db 100644
---
a/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/BooleanRowFilterBenchmark.java
+++
b/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/BooleanRowFilterBenchmark.java
@@ -59,7 +59,7 @@ public class BooleanRowFilterBenchmark extends
org.openjdk.jmh.Main {
@Param({"2"})
public int filterColsNum;
- String dataRelativePath = "data/generated/sales/orc.none";
+ String dataRelativePath = "data/generated/sales/orc.zstd";
String schemaName = "sales.schema";
diff --git
a/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/DecimalRowFilterBenchmark.java
b/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/DecimalRowFilterBenchmark.java
index 59fddf605..6dd765209 100644
---
a/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/DecimalRowFilterBenchmark.java
+++
b/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/DecimalRowFilterBenchmark.java
@@ -60,7 +60,7 @@ public class DecimalRowFilterBenchmark extends
org.openjdk.jmh.Main {
@Param({"2"})
public int filterColsNum;
- String dataRelativePath = "data/generated/taxi/orc.none";
+ String dataRelativePath = "data/generated/taxi/orc.zstd";
String schemaName = "taxi.schema";
diff --git
a/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/DoubleRowFilterBenchmark.java
b/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/DoubleRowFilterBenchmark.java
index 5ce87b8bc..bb59e9e10 100644
---
a/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/DoubleRowFilterBenchmark.java
+++
b/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/DoubleRowFilterBenchmark.java
@@ -60,7 +60,7 @@ public class DoubleRowFilterBenchmark extends
org.openjdk.jmh.Main {
@Param({"2"})
public int filterColsNum;
- String dataRelativePath = "data/generated/taxi/orc.none";
+ String dataRelativePath = "data/generated/taxi/orc.zstd";
String schemaName = "taxi.schema";
diff --git
a/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/StringRowFilterBenchmark.java
b/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/StringRowFilterBenchmark.java
index bc12fbe7f..a61de4bc6 100644
---
a/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/StringRowFilterBenchmark.java
+++
b/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/StringRowFilterBenchmark.java
@@ -59,7 +59,7 @@ public class StringRowFilterBenchmark extends
org.openjdk.jmh.Main {
@Param({"2"})
public int filterColsNum;
- String dataRelativePath = "data/generated/sales/orc.none";
+ String dataRelativePath = "data/generated/sales/orc.zstd";
String schemaName = "sales.schema";
diff --git
a/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/TimestampRowFilterBenchmark.java
b/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/TimestampRowFilterBenchmark.java
index a8aa6ba48..dee7fe706 100644
---
a/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/TimestampRowFilterBenchmark.java
+++
b/java/bench/hive/src/java/org/apache/orc/bench/hive/rowfilter/TimestampRowFilterBenchmark.java
@@ -62,7 +62,7 @@ public class TimestampRowFilterBenchmark extends
org.openjdk.jmh.Main {
@Param({"2"})
public int filterColsNum;
- String dataRelativePath = "data/generated/taxi/orc.none";
+ String dataRelativePath = "data/generated/taxi/orc.zstd";
String schemaName = "taxi.schema";
diff --git
a/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java
b/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java
index e7498aa48..6257fa205 100644
--- a/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java
+++ b/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java
@@ -91,7 +91,7 @@ public class SparkBenchmark implements OrcBenchmark {
CommandLine cmds = GenerateVariants.parseCommandLine(args);
new Runner(new OptionsBuilder()
.parent(Utilities.parseOptions(args, this.getClass()))
- .param("compression", cmds.getOptionValue("compress",
"none,gz,snappy").split(","))
+ .param("compression", cmds.getOptionValue("compress",
"gz,snappy,zstd").split(","))
.param("dataset", cmds.getOptionValue("data",
"taxi,sales,github").split(","))
.param("format", cmds.getOptionValue("format",
"orc,parquet,json").split(","))
.build()
@@ -111,7 +111,7 @@ public class SparkBenchmark implements OrcBenchmark {
@Param({"taxi", "sales", "github"})
String dataset;
- @Param({"none", "gz", "snappy"})
+ @Param({"gz", "snappy", "zstd"})
String compression;
@Param({"orc", "parquet", "json"})