This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 1c1fa746f8 GH-39303: [Archery][Benchmarking] Allow setting C++
repetition min time (#39324)
1c1fa746f8 is described below
commit 1c1fa746f8d1266f9210d9c0e1bbab074c5dd4e0
Author: Antoine Pitrou <[email protected]>
AuthorDate: Sat Jan 6 22:58:21 2024 +0100
GH-39303: [Archery][Benchmarking] Allow setting C++ repetition min time
(#39324)
### Rationale for this change
We want to be able to increase the number of repetitions for each C++
micro-benchmark without increasing the total runtime.
### What changes are included in this PR?
* Add a `--repetition-min-time` argument to set the repetition duration in
seconds
* Add a `--cpp-benchmark-extras` argument to pass arbitrary arguments to
Google Benchmark executables
* Add a couple tests with multiple benchmark repetitions
### Are these changes tested?
Not entirely. Command-line argument passing is not unit-tested.
### Are there any user-facing changes?
No.
* Closes: #39303
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
.github/workflows/archery.yml | 4 +-
dev/archery/archery/benchmark/google.py | 18 +--
dev/archery/archery/benchmark/runner.py | 14 ++-
dev/archery/archery/cli.py | 25 +++-
dev/archery/archery/tests/test_benchmarks.py | 173 +++++++++++++++++++++++++++
dev/archery/requirements-test.txt | 2 +
6 files changed, 218 insertions(+), 18 deletions(-)
diff --git a/.github/workflows/archery.yml b/.github/workflows/archery.yml
index 1f915240e9..d5f419f8a7 100644
--- a/.github/workflows/archery.yml
+++ b/.github/workflows/archery.yml
@@ -63,7 +63,9 @@ jobs:
- name: Install pygit2 binary wheel
run: pip install pygit2 --only-binary pygit2
- name: Install Archery, Crossbow- and Test Dependencies
- run: pip install pytest responses -e dev/archery[all]
+ run: |
+ pip install -e dev/archery[all]
+ pip install -r dev/archery/requirements-test.txt
- name: Archery Unittests
working-directory: dev/archery
run: pytest -v archery
diff --git a/dev/archery/archery/benchmark/google.py
b/dev/archery/archery/benchmark/google.py
index ebcc526364..5d07ffab2e 100644
--- a/dev/archery/archery/benchmark/google.py
+++ b/dev/archery/archery/benchmark/google.py
@@ -37,9 +37,10 @@ class GoogleBenchmarkCommand(Command):
notably `--benchmark_filter`, `--benchmark_format`, etc...
"""
- def __init__(self, benchmark_bin, benchmark_filter=None):
+ def __init__(self, benchmark_bin, benchmark_filter=None,
benchmark_extras=None):
self.bin = benchmark_bin
self.benchmark_filter = benchmark_filter
+ self.benchmark_extras = benchmark_extras or []
def list_benchmarks(self):
argv = ["--benchmark_list_tests"]
@@ -49,16 +50,19 @@ class GoogleBenchmarkCommand(Command):
stderr=subprocess.PIPE)
return str.splitlines(result.stdout.decode("utf-8"))
- def results(self, repetitions=1):
+ def results(self, repetitions=1, repetition_min_time=None):
with NamedTemporaryFile() as out:
- argv = ["--benchmark_repetitions={}".format(repetitions),
- "--benchmark_out={}".format(out.name),
+ argv = [f"--benchmark_repetitions={repetitions}",
+ f"--benchmark_out={out.name}",
"--benchmark_out_format=json"]
+ if repetition_min_time is not None:
+ argv.append(f"--benchmark_min_time={repetition_min_time:.6f}")
+
if self.benchmark_filter:
- argv.append(
- "--benchmark_filter={}".format(self.benchmark_filter)
- )
+ argv.append(f"--benchmark_filter={self.benchmark_filter}")
+
+ argv += self.benchmark_extras
self.run(*argv, check=True)
return json.load(out)
diff --git a/dev/archery/archery/benchmark/runner.py
b/dev/archery/archery/benchmark/runner.py
index 86053e6ecd..c12c74135e 100644
--- a/dev/archery/archery/benchmark/runner.py
+++ b/dev/archery/archery/benchmark/runner.py
@@ -42,10 +42,11 @@ DEFAULT_REPETITIONS = 1
class BenchmarkRunner:
def __init__(self, suite_filter=None, benchmark_filter=None,
- repetitions=DEFAULT_REPETITIONS):
+ repetitions=DEFAULT_REPETITIONS, repetition_min_time=None):
self.suite_filter = suite_filter
self.benchmark_filter = benchmark_filter
self.repetitions = repetitions
+ self.repetition_min_time = repetition_min_time
@property
def suites(self):
@@ -107,9 +108,10 @@ class StaticBenchmarkRunner(BenchmarkRunner):
class CppBenchmarkRunner(BenchmarkRunner):
""" Run suites from a CMakeBuild. """
- def __init__(self, build, **kwargs):
+ def __init__(self, build, benchmark_extras, **kwargs):
""" Initialize a CppBenchmarkRunner. """
self.build = build
+ self.benchmark_extras = benchmark_extras
super().__init__(**kwargs)
@staticmethod
@@ -142,14 +144,17 @@ class CppBenchmarkRunner(BenchmarkRunner):
def suite(self, name, suite_bin):
""" Returns the resulting benchmarks for a given suite. """
- suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter)
+ suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter,
+ self.benchmark_extras)
# Ensure there will be data
benchmark_names = suite_cmd.list_benchmarks()
if not benchmark_names:
return None
- results = suite_cmd.results(repetitions=self.repetitions)
+ results = suite_cmd.results(
+ repetitions=self.repetitions,
+ repetition_min_time=self.repetition_min_time)
benchmarks = GoogleBenchmark.from_json(results.get("benchmarks"))
return BenchmarkSuite(name, benchmarks)
@@ -252,6 +257,7 @@ class JavaBenchmarkRunner(BenchmarkRunner):
if not benchmark_names:
return None
+ # TODO: support `repetition_min_time`
results = suite_cmd.results(repetitions=self.repetitions)
benchmarks = JavaMicrobenchmarkHarness.from_json(results)
return BenchmarkSuite(name, benchmarks)
diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py
index 32b0942630..052fe23bfc 100644
--- a/dev/archery/archery/cli.py
+++ b/dev/archery/archery/cli.py
@@ -377,7 +377,10 @@ def benchmark_common_options(cmd):
"Can be stacked. For language=java"),
click.option("--cmake-extras", type=str, multiple=True,
help="Extra flags/options to pass to cmake invocation. "
- "Can be stacked. For language=cpp")
+ "Can be stacked. For language=cpp"),
+ click.option("--cpp-benchmark-extras", type=str, multiple=True,
+ help="Extra flags/options to pass to C++ benchmark
executables. "
+ "Can be stacked. For language=cpp"),
]
cmd = java_toolchain_options(cmd)
@@ -440,12 +443,16 @@ def benchmark_list(ctx, rev_or_path, src, preserve,
output, cmake_extras,
@click.option("--repetitions", type=int, default=-1,
help=("Number of repetitions of each benchmark. Increasing "
"may improve result precision. "
- "[default: 1 for cpp, 5 for java"))
+ "[default: 1 for cpp, 5 for java]"))
[email protected]("--repetition-min-time", type=float, default=None,
+ help=("Minimum duration of each repetition in seconds. "
+ "Currently only supported for language=cpp. "
+ "[default: use runner-specific defaults]"))
@click.pass_context
def benchmark_run(ctx, rev_or_path, src, preserve, output, cmake_extras,
java_home, java_options, build_extras, benchmark_extras,
language, suite_filter, benchmark_filter, repetitions,
- **kwargs):
+ repetition_min_time, cpp_benchmark_extras, **kwargs):
""" Run benchmark suite.
This command will run the benchmark suite for a single build. This is
@@ -468,13 +475,18 @@ def benchmark_run(ctx, rev_or_path, src, preserve,
output, cmake_extras,
\b
archery benchmark run
+ \b
+ # Run the benchmarks on an existing build directory
+ \b
+ archery benchmark run /build/cpp
+
\b
# Run the benchmarks on current previous commit
\b
archery benchmark run HEAD~1
\b
- # Run the benchmarks on current previous commit
+ # Run the benchmarks on current git workspace and output results as a JSON
file.
\b
archery benchmark run --output=run.json
"""
@@ -488,8 +500,9 @@ def benchmark_run(ctx, rev_or_path, src, preserve, output,
cmake_extras,
repetitions = repetitions if repetitions != -1 else 1
runner_base = CppBenchmarkRunner.from_rev_or_path(
src, root, rev_or_path, conf,
- repetitions=repetitions,
- suite_filter=suite_filter, benchmark_filter=benchmark_filter)
+ repetitions=repetitions,
repetition_min_time=repetition_min_time,
+ suite_filter=suite_filter, benchmark_filter=benchmark_filter,
+ benchmark_extras=cpp_benchmark_extras)
elif language == "java":
for key in {'cpp_package_prefix', 'cxx_flags', 'cxx', 'cc'}:
diff --git a/dev/archery/archery/tests/test_benchmarks.py
b/dev/archery/archery/tests/test_benchmarks.py
index fab1e8d443..e5af2b3b02 100644
--- a/dev/archery/archery/tests/test_benchmarks.py
+++ b/dev/archery/archery/tests/test_benchmarks.py
@@ -81,6 +81,53 @@ def test_static_runner_from_json_not_a_regression():
assert not comparison.regression
+def test_static_runner_from_json_multiple_values_not_a_regression():
+ # Same as above, but with multiple repetitions
+ archery_result = {
+ "suites": [
+ {
+ "name": "arrow-value-parsing-benchmark",
+ "benchmarks": [
+ {
+ "name": "FloatParsing<DoubleType>",
+ "unit": "items_per_second",
+ "less_is_better": False,
+ "values": [
+ 93588476.22327498,
+ 94873831.3818328,
+ 95593675.20810866,
+ 95797325.6543961,
+ 96134728.05794072
+ ],
+ "time_unit": "ns",
+ "times": [
+ 10537.724568456104,
+ 10575.162068480413,
+ 10599.271208720838,
+ 10679.028059166194,
+ 10827.995119861762
+ ],
+ "counters": {
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "FloatParsing<DoubleType>",
+ "repetitions": 5,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 10656
+ }
+ }
+ ]
+ }
+ ]
+ }
+
+ contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
+ baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
+ [comparison] = RunnerComparator(contender, baseline).comparisons
+ assert not comparison.regression
+
+
def test_static_runner_from_json_regression():
archery_result = {
"suites": [
@@ -114,6 +161,58 @@ def test_static_runner_from_json_regression():
assert comparison.regression
+def test_static_runner_from_json_multiple_values_regression():
+ # Same as above, but with multiple repetitions
+ archery_result = {
+ "suites": [
+ {
+ "name": "arrow-value-parsing-benchmark",
+ "benchmarks": [
+ {
+ "name": "FloatParsing<DoubleType>",
+ "unit": "items_per_second",
+ "less_is_better": False,
+ "values": [
+ 93588476.22327498,
+ 94873831.3818328,
+ 95593675.20810866,
+ 95797325.6543961,
+ 96134728.05794072
+ ],
+ "time_unit": "ns",
+ "times": [
+ 10537.724568456104,
+ 10575.162068480413,
+ 10599.271208720838,
+ 10679.028059166194,
+ 10827.995119861762
+ ],
+ "counters": {
+ "family_index": 0,
+ "per_family_instance_index": 0,
+ "run_name": "FloatParsing<DoubleType>",
+ "repetitions": 5,
+ "repetition_index": 0,
+ "threads": 1,
+ "iterations": 10656
+ }
+ }
+ ]
+ }
+ ]
+ }
+
+ contender = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
+
+ # introduce artificial regression
+ values = archery_result['suites'][0]['benchmarks'][0]['values']
+ values[:] = [v * 2 for v in values]
+ baseline = StaticBenchmarkRunner.from_json(json.dumps(archery_result))
+
+ [comparison] = RunnerComparator(contender, baseline).comparisons
+ assert comparison.regression
+
+
def test_benchmark_median():
assert median([10]) == 10
assert median([1, 2, 3]) == 2
@@ -381,3 +480,77 @@ def test_omits_aggregates():
benchmark = GoogleBenchmark(name, [observation1, observation2])
result = json.dumps(benchmark, cls=JsonEncoder)
assert json.loads(result) == archery_result
+
+
+def test_multiple_observations():
+ name = "FloatParsing<DoubleType>"
+ google_results = [
+ {
+ 'cpu_time': 10627.38199641615,
+ 'family_index': 0,
+ 'items_per_second': 94096551.75067839,
+ 'iterations': 9487,
+ 'name': 'FloatParsing<DoubleType>',
+ 'per_family_instance_index': 0,
+ 'real_time': 10628.84905663701,
+ 'repetition_index': 0,
+ 'repetitions': 3,
+ 'run_name': 'FloatParsing<DoubleType>',
+ 'run_type': 'iteration',
+ 'threads': 1,
+ 'time_unit': 'ns'
+ },
+ {
+ 'cpu_time': 10633.318014124594,
+ 'family_index': 0,
+ 'items_per_second': 94044022.63448404,
+ 'iterations': 9487,
+ 'name': 'FloatParsing<DoubleType>',
+ 'per_family_instance_index': 0,
+ 'real_time': 10634.858754122948,
+ 'repetition_index': 1,
+ 'repetitions': 3,
+ 'run_name': 'FloatParsing<DoubleType>',
+ 'run_type': 'iteration',
+ 'threads': 1,
+ 'time_unit': 'ns'
+ },
+ {
+ 'cpu_time': 10664.315484347,
+ 'family_index': 0,
+ 'items_per_second': 93770669.24434038,
+ 'iterations': 9487,
+ 'name': 'FloatParsing<DoubleType>',
+ 'per_family_instance_index': 0,
+ 'real_time': 10665.584589337563,
+ 'repetition_index': 2,
+ 'repetitions': 3,
+ 'run_name': 'FloatParsing<DoubleType>',
+ 'run_type': 'iteration',
+ 'threads': 1,
+ 'time_unit': 'ns'
+ }
+ ]
+
+ archery_result = {
+ 'counters': {
+ 'family_index': 0,
+ 'iterations': 9487,
+ 'per_family_instance_index': 0,
+ 'repetition_index': 2,
+ 'repetitions': 3,
+ 'run_name': 'FloatParsing<DoubleType>',
+ 'threads': 1
+ },
+ 'less_is_better': False,
+ 'name': 'FloatParsing<DoubleType>',
+ 'time_unit': 'ns',
+ 'times': [10628.84905663701, 10634.858754122948, 10665.584589337563],
+ 'unit': 'items_per_second',
+ 'values': [93770669.24434038, 94044022.63448404, 94096551.75067839]
+ }
+
+ observations = [GoogleBenchmarkObservation(**g) for g in google_results]
+ benchmark = GoogleBenchmark(name, observations)
+ result = json.dumps(benchmark, cls=JsonEncoder)
+ assert json.loads(result) == archery_result
diff --git a/dev/archery/requirements-test.txt
b/dev/archery/requirements-test.txt
new file mode 100644
index 0000000000..208ec64cdf
--- /dev/null
+++ b/dev/archery/requirements-test.txt
@@ -0,0 +1,2 @@
+pytest
+responses