This is an automated email from the ASF dual-hosted git repository.
william pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 2c765a148 ORC-1708: Support data/compress options in Hive benchmark
2c765a148 is described below
commit 2c765a148eafbb0fabb92306387dca54d7c22eeb
Author: sychen <[email protected]>
AuthorDate: Tue May 7 23:33:28 2024 -0700
ORC-1708: Support data/compress options in Hive benchmark
### What changes were proposed in this pull request?
This PR aims to support data/compress options in Hive benchmark.
### Why are the changes needed?
Easy to do sampling tests in Hive benchmark.
### How was this patch tested?
local test
```bash
java -jar hive/target/orc-benchmarks-hive-*-uber.jar read-all data -data
taxi -compress snappy
```
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #1924 from cxzl25/ORC-1708.
Authored-by: sychen <[email protected]>
Signed-off-by: William Hyun <[email protected]>
(cherry picked from commit 4c3a5543dc66fc6967d61fb80ab2dedb298bd700)
Signed-off-by: William Hyun <[email protected]>
---
.../org/apache/orc/bench/hive/ColumnProjectionBenchmark.java | 11 ++++++++++-
.../src/java/org/apache/orc/bench/hive/FullReadBenchmark.java | 11 ++++++++++-
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git
a/java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java
b/java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java
index 9c1b7fd21..48806faff 100644
---
a/java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java
+++
b/java/bench/hive/src/java/org/apache/orc/bench/hive/ColumnProjectionBenchmark.java
@@ -19,6 +19,7 @@
package org.apache.orc.bench.hive;
import com.google.auto.service.AutoService;
+import org.apache.commons.cli.CommandLine;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -38,6 +39,7 @@ import org.apache.orc.TypeDescription;
import org.apache.orc.bench.core.IOCounters;
import org.apache.orc.bench.core.OrcBenchmark;
import org.apache.orc.bench.core.Utilities;
+import org.apache.orc.bench.core.convert.GenerateVariants;
import org.apache.parquet.hadoop.ParquetInputFormat;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -47,6 +49,7 @@ import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.net.URI;
import java.util.List;
@@ -78,7 +81,13 @@ public class ColumnProjectionBenchmark implements
OrcBenchmark {
@Override
public void run(String[] args) throws Exception {
- new Runner(Utilities.parseOptions(args, getClass())).run();
+ CommandLine cmds = GenerateVariants.parseCommandLine(args);
+ new Runner(new OptionsBuilder()
+ .parent(Utilities.parseOptions(args, this.getClass()))
+ .param("compression", cmds.getOptionValue("compress",
"snappy,gz,zstd").split(","))
+ .param("dataset", cmds.getOptionValue("data",
"github,sales,taxi").split(","))
+ .build()
+ ).run();
}
@Benchmark
diff --git
a/java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java
b/java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java
index dc1bcca92..8f3b1cbba 100644
--- a/java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java
+++ b/java/bench/hive/src/java/org/apache/orc/bench/hive/FullReadBenchmark.java
@@ -25,6 +25,7 @@ import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.mapred.FsInput;
+import org.apache.commons.cli.CommandLine;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -45,6 +46,7 @@ import org.apache.orc.bench.core.CompressionKind;
import org.apache.orc.bench.core.IOCounters;
import org.apache.orc.bench.core.OrcBenchmark;
import org.apache.orc.bench.core.Utilities;
+import org.apache.orc.bench.core.convert.GenerateVariants;
import org.apache.parquet.hadoop.ParquetInputFormat;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -54,6 +56,7 @@ import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.io.InputStream;
import java.io.InputStreamReader;
@@ -87,7 +90,13 @@ public class FullReadBenchmark implements OrcBenchmark {
@Override
public void run(String[] args) throws Exception {
- new Runner(Utilities.parseOptions(args, getClass())).run();
+ CommandLine cmds = GenerateVariants.parseCommandLine(args);
+ new Runner(new OptionsBuilder()
+ .parent(Utilities.parseOptions(args, this.getClass()))
+ .param("compression", cmds.getOptionValue("compress",
"gz,snappy,zstd").split(","))
+ .param("dataset", cmds.getOptionValue("data",
"taxi,sales,github").split(","))
+ .build()
+ ).run();
}
@Benchmark