This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 9cbccd6 CI: add benchmark workflow and script (#250)
9cbccd6 is described below
commit 9cbccd6fb87c58db68990d7b00f57be6659412dd
Author: Saurabh Singh <[email protected]>
AuthorDate: Thu May 29 05:51:21 2025 +0530
CI: add benchmark workflow and script (#250)
Fixes: #85
### What changes are included in this PR?
Added Benchmark CI using Debian as runtime environment
### Are these changes tested?
Yes, I have tested the changes locally.
---------
Signed-off-by: Saurabh Kumar Singh <[email protected]>
---
.github/workflows/benchmark.yml | 71 ++++++++++++++++++++++
ci/scripts/bench.sh | 53 ++++++++++++++++
ci/scripts/bench_adapt.py | 130 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 254 insertions(+)
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 0000000..8dadf3f
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Benchmarks
+on:
+ push:
+ branches: [main]
+ pull_request:
+ paths:
+ - ".github/workflows/benchmark.yml"
+ - "ci/scripts/bench.sh"
+ - "ci/scripts/bench_adapt.py"
+ workflow_dispatch:
+permissions:
+ contents: read
+jobs:
+ benchmark:
+ runs-on: ubuntu-latest
+ container: debian:12
+ strategy:
+ matrix:
+ go: ['1.22.7']
+ arch: ['amd64']
+ steps:
+ - name: Install dependencies
+ run: |
+ apt-get update
+ apt-get install -y git ca-certificates
+ - name: Checkout repository
+ uses: actions/checkout@v4
+ with:
+ submodules: recursive
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: '3.9'
+ - name: Install Go ${{ matrix.go }} for Benchmarks
+ uses: actions/setup-go@v5
+ with:
+ go-version: ${{ matrix.go }}
+ cache: true
+ cache-dependency-path: go.sum
+ check-latest: false
+ - name: Run Benchmarks
+ if: github.event_name != 'push'
+ run: bash ci/scripts/bench.sh $(pwd) --json
+ - name: Upload results
+ if: github.event_name == 'push' && github.repository ==
'apache/arrow-go' && github.ref_name == 'main'
+ env:
+ CONBENCH_URL: https://conbench.ursa.dev
+ CONBENCH_EMAIL: ${{ secrets.CONBENCH_EMAIL }}
+ CONBENCH_PASSWORD: ${{ secrets.CONBENCH_PASS }}
+ CONBENCH_REF: ${{ github.ref_name }}
+ CONBENCH_MACHINE_INFO_NAME: ${{ matrix.arch }}-debian-12
+ run: |
+ python3 -m pip install
benchadapt@git+https://github.com/conbench/conbench.git@main#subdirectory=benchadapt/python
+ python3 ci/scripts/bench_adapt.py
diff --git a/ci/scripts/bench.sh b/ci/scripts/bench.sh
new file mode 100644
index 0000000..597b2a1
--- /dev/null
+++ b/ci/scripts/bench.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# this will output the benchmarks to STDOUT but if `-json` is passed
+# as the second argument, it will create a file "bench_stats.json"
+# in the directory this is called from containing a json representation
+
+set -ex
+
+# Validate input arguments
+if [ -z "$1" ]; then
+ echo "Error: Missing source directory argument"
+ exit 1
+fi
+
+source_dir="$1"
+
+PARQUET_TEST_DATA="${source_dir}/parquet-testing/data"
+export PARQUET_TEST_DATA
+
+pushd "${source_dir}"
+
+# lots of benchmarks, they can take a while
+# the timeout is for *ALL* benchmarks together,
+# not per benchmark
+go test -bench=. -benchmem -timeout 40m -run=^$ ./... | tee bench_stat.dat
+
+popd
+
+if [[ "$2" = "-json" ]]; then
+ go install go.bobheadxi.dev/gobenchdata@latest
+ PATH=$(go env GOPATH)/bin:$PATH
+ export PATH
+ cat "${source_dir}"/bench_*.dat | gobenchdata --json bench_stats.json
+fi
+
+rm "${source_dir}"/bench_*.dat
diff --git a/ci/scripts/bench_adapt.py b/ci/scripts/bench_adapt.py
new file mode 100644
index 0000000..554538f
--- /dev/null
+++ b/ci/scripts/bench_adapt.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+import os
+import uuid
+import logging
+from pathlib import Path
+from typing import List
+
+from benchadapt import BenchmarkResult
+from benchadapt.adapters import BenchmarkAdapter
+from benchadapt.log import log
+
+log.setLevel(logging.DEBUG)
+
+ARROW_ROOT = Path(__file__).parent.parent.parent.resolve()
+SCRIPTS_PATH = ARROW_ROOT / "ci" / "scripts"
+
+# `github_commit_info` is meant to communicate GitHub-flavored commit
+# information to Conbench. See
+#
https://github.com/conbench/conbench/blob/cf7931f/benchadapt/python/benchadapt/result.py#L66
+# for a specification.
+github_commit_info = {"repository": "https://github.com/apache/arrow-go"}
+
+if os.environ.get("CONBENCH_REF") == "main":
+ # Assume GitHub Actions CI. The environment variable lookups below are
+ # expected to fail when not running in GitHub Actions.
+ github_commit_info = {
+ "repository":
f'{os.environ["GITHUB_SERVER_URL"]}/{os.environ["GITHUB_REPOSITORY"]}',
+ "commit": os.environ["GITHUB_SHA"],
+ "pr_number": None, # implying default branch
+ }
+ run_reason = "commit"
+else:
+ # Assume that the environment is not GitHub Actions CI. Error out if that
+ # assumption seems to be wrong.
+ assert os.getenv("GITHUB_ACTIONS") is None
+
+ # This is probably a local dev environment, for testing. In this case, it
+ # does usually not make sense to provide commit information (not a
+ # controlled CI environment). Explicitly leave out "commit" and
"pr_number" to
+ # reflect that (to not send commit information).
+
+ # Reflect 'local dev' scenario in run_reason. Allow user to (optionally)
+ # inject a custom piece of information into the run reason here, from
+ # environment.
+ run_reason = "localdev"
+ custom_reason_suffix = os.getenv("CONBENCH_CUSTOM_RUN_REASON")
+ if custom_reason_suffix is not None:
+ run_reason += f" {custom_reason_suffix.strip()}"
+
+
+class GoAdapter(BenchmarkAdapter):
+ result_file = "bench_stats.json"
+ command = ["bash", SCRIPTS_PATH / "bench.sh", ARROW_ROOT, "-json"]
+
+ def __init__(self, *args, **kwargs) -> None:
+ super().__init__(command=self.command, *args, **kwargs)
+
+ def _transform_results(self) -> List[BenchmarkResult]:
+ with open(self.result_file, "r") as f:
+ raw_results = json.load(f)
+
+ run_id = uuid.uuid4().hex
+ parsed_results = []
+ for suite in raw_results[0]["Suites"]:
+ batch_id = uuid.uuid4().hex
+ pkg = suite["Pkg"]
+
+ for benchmark in suite["Benchmarks"]:
+ data = benchmark["Mem"]["MBPerSec"] * 1e6
+ time = 1 / benchmark["NsPerOp"] * 1e9
+
+ name = benchmark["Name"].removeprefix("Benchmark")
+ ncpu = name[name.rfind("-") + 1 :]
+ pieces = name[: -(len(ncpu) + 1)].split("/")
+
+ parsed = BenchmarkResult(
+ run_id=run_id,
+ batch_id=batch_id,
+ stats={
+ "data": [data],
+ "unit": "B/s",
+ "times": [time],
+ "time_unit": "i/s",
+ "iterations": benchmark["Runs"],
+ },
+ context={
+ "benchmark_language": "Go",
+ "goos": suite["Goos"],
+ "goarch": suite["Goarch"],
+ },
+ tags={
+ "pkg": pkg,
+ "num_cpu": ncpu,
+ "name": pieces[0],
+ "params": "/".join(pieces[1:]),
+ },
+ run_reason=run_reason,
+ github=github_commit_info,
+ )
+ parsed.run_name = (
+ f"{parsed.run_reason}: {github_commit_info.get('commit')}"
+ )
+ parsed_results.append(parsed)
+
+ return parsed_results
+
+
+if __name__ == "__main__":
+ go_adapter = GoAdapter(result_fields_override={"info": {}})
+ go_adapter()
+
\ No newline at end of file