This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch auto-benchmark in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git
commit 4cca96d001cbcb5777b11eb3ab62544941479378 Author: Jia Yu <[email protected]> AuthorDate: Tue Jan 13 00:20:24 2026 -0800 Add initial code --- .github/workflows/benchmark.yml | 306 +++++++++++++++++++++++++++++ benchmark/requirements.txt | 18 ++ benchmark/run_benchmark.py | 426 ++++++++++++++++++++++++++++++++++++++++ benchmark/summarize_results.py | 316 +++++++++++++++++++++++++++++ 4 files changed, 1066 insertions(+) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..5bbca48 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,306 @@ +name: SpatialBench Benchmark + +on: + # Run every other week on Monday at 6:00 UTC + schedule: + - cron: '0 6 1-7,15-21 * 1' + # Allow manual triggering + workflow_dispatch: + inputs: + scale_factor: + description: 'Scale factor for benchmark' + required: false + default: '1' + type: choice + options: + - '0.1' + - '1' + - '10' + engines: + description: 'Engines to benchmark (comma-separated)' + required: false + default: 'duckdb,geopandas,sedonadb' + type: string + timeout: + description: 'Query timeout in seconds' + required: false + default: '600' + type: string + sedonadb_version: + description: 'SedonaDB version' + required: false + default: '0.2.0' + type: string + duckdb_version: + description: 'DuckDB version' + required: false + default: '1.4.3' + type: string + geopandas_version: + description: 'GeoPandas version' + required: false + default: '1.1.2' + type: string + regenerate_data: + description: 'Force regenerate data (ignore cache)' + required: false + default: false + type: boolean + +concurrency: + group: ${{ github.repository }}-${{ github.ref }}-benchmark + cancel-in-progress: true + +env: + CARGO_TERM_COLOR: always + SCALE_FACTOR: ${{ github.event.inputs.scale_factor || '1' }} + BENCHMARK_ENGINES: ${{ github.event.inputs.engines || 'duckdb,geopandas,sedonadb' }} + QUERY_TIMEOUT: ${{ github.event.inputs.timeout || '600' }} + # Package versions (can be overridden via workflow_dispatch) + SEDONADB_VERSION: ${{ github.event.inputs.sedonadb_version || '0.2.0' }} + DUCKDB_VERSION: ${{ github.event.inputs.duckdb_version || '1.4.3' }} + GEOPANDAS_VERSION: ${{ github.event.inputs.geopandas_version || '1.1.2' }} + # Cache key version - increment to invalidate all data caches + DATA_CACHE_VERSION: v1 + +jobs: + # Generate and cache data for all scale factors + generate-data: + name: Generate Data (SF${{ matrix.scale_factor }}) + runs-on: ubuntu-latest + strategy: + matrix: + scale_factor: ['0.1', '1', '10'] + steps: + - uses: actions/checkout@v4 + + - name: Restore data from cache + id: cache-restore + uses: actions/cache/restore@v4 + with: + path: benchmark-data-sf${{ matrix.scale_factor }} + key: spatialbench-data-${{ env.DATA_CACHE_VERSION }}-sf${{ matrix.scale_factor }} + + - name: Setup Python + if: steps.cache-restore.outputs.cache-hit != 'true' || github.event.inputs.regenerate_data == 'true' + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install spatialbench-cli + if: steps.cache-restore.outputs.cache-hit != 'true' || github.event.inputs.regenerate_data == 'true' + run: pip install spatialbench-cli + + - name: Generate benchmark data (SF${{ matrix.scale_factor }}) + if: steps.cache-restore.outputs.cache-hit != 'true' || github.event.inputs.regenerate_data == 'true' + run: | + rm -rf benchmark-data-sf${{ matrix.scale_factor }} + mkdir -p benchmark-data-sf${{ matrix.scale_factor }} + spatialbench-cli \ + --scale-factor ${{ matrix.scale_factor }} \ + --format parquet \ + --output-dir benchmark-data-sf${{ matrix.scale_factor }} + + echo "Generated data for SF${{ matrix.scale_factor }}:" + ls -la benchmark-data-sf${{ matrix.scale_factor }}/ + du -sh benchmark-data-sf${{ matrix.scale_factor }}/ + + - name: Save data to cache + if: steps.cache-restore.outputs.cache-hit != 'true' || github.event.inputs.regenerate_data == 'true' + uses: actions/cache/save@v4 + with: + path: benchmark-data-sf${{ matrix.scale_factor }} + key: spatialbench-data-${{ env.DATA_CACHE_VERSION }}-sf${{ matrix.scale_factor }} + + - name: Report cache status + run: | + if [ "${{ steps.cache-restore.outputs.cache-hit }}" == "true" ]; then + echo "ā Data for SF${{ matrix.scale_factor }} restored from cache" + else + echo "š¦ Data for SF${{ matrix.scale_factor }} generated and cached" + fi + ls -la benchmark-data-sf${{ matrix.scale_factor }}/ + du -sh benchmark-data-sf${{ matrix.scale_factor }}/ + + benchmark-duckdb: + name: Benchmark DuckDB (SF${{ github.event.inputs.scale_factor || '1' }}) + needs: generate-data + runs-on: ubuntu-latest + if: contains(github.event.inputs.engines || 'duckdb,geopandas,sedonadb', 'duckdb') + steps: + - uses: actions/checkout@v4 + + - name: Restore benchmark data from cache + uses: actions/cache/restore@v4 + with: + path: benchmark-data-sf${{ env.SCALE_FACTOR }} + key: spatialbench-data-${{ env.DATA_CACHE_VERSION }}-sf${{ env.SCALE_FACTOR }} + fail-on-cache-miss: true + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + pip install "duckdb==${{ env.DUCKDB_VERSION }}" pyarrow pandas + echo "Installed DuckDB version: $(python -c 'import duckdb; print(duckdb.__version__)')" + + - name: Run DuckDB benchmark + run: | + python benchmark/run_benchmark.py \ + --data-dir benchmark-data-sf${{ env.SCALE_FACTOR }} \ + --engines duckdb \ + --timeout ${{ env.QUERY_TIMEOUT }} \ + --scale-factor ${{ env.SCALE_FACTOR }} \ + --output duckdb_results.json + + - name: Upload results + uses: actions/upload-artifact@v4 + with: + name: duckdb-results-sf${{ env.SCALE_FACTOR }} + path: duckdb_results.json + retention-days: 30 + + benchmark-geopandas: + name: Benchmark GeoPandas (SF${{ github.event.inputs.scale_factor || '1' }}) + needs: generate-data + runs-on: ubuntu-latest + if: contains(github.event.inputs.engines || 'duckdb,geopandas,sedonadb', 'geopandas') + steps: + - uses: actions/checkout@v4 + + - name: Restore benchmark data from cache + uses: actions/cache/restore@v4 + with: + path: benchmark-data-sf${{ env.SCALE_FACTOR }} + key: spatialbench-data-${{ env.DATA_CACHE_VERSION }}-sf${{ env.SCALE_FACTOR }} + fail-on-cache-miss: true + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + pip install "geopandas==${{ env.GEOPANDAS_VERSION }}" pandas pyarrow shapely numpy + echo "Installed GeoPandas version: $(python -c 'import geopandas; print(geopandas.__version__)')" + + - name: Run GeoPandas benchmark + run: | + python benchmark/run_benchmark.py \ + --data-dir benchmark-data-sf${{ env.SCALE_FACTOR }} \ + --engines geopandas \ + --timeout ${{ env.QUERY_TIMEOUT }} \ + --scale-factor ${{ env.SCALE_FACTOR }} \ + --output geopandas_results.json + + - name: Upload results + uses: actions/upload-artifact@v4 + with: + name: geopandas-results-sf${{ env.SCALE_FACTOR }} + path: geopandas_results.json + retention-days: 30 + + benchmark-sedonadb: + name: Benchmark SedonaDB (SF${{ github.event.inputs.scale_factor || '1' }}) + needs: generate-data + runs-on: ubuntu-latest + if: contains(github.event.inputs.engines || 'duckdb,geopandas,sedonadb', 'sedonadb') + steps: + - uses: actions/checkout@v4 + + - name: Restore benchmark data from cache + uses: actions/cache/restore@v4 + with: + path: benchmark-data-sf${{ env.SCALE_FACTOR }} + key: spatialbench-data-${{ env.DATA_CACHE_VERSION }}-sf${{ env.SCALE_FACTOR }} + fail-on-cache-miss: true + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + + - name: Install dependencies + run: | + pip install "sedonadb==${{ env.SEDONADB_VERSION }}" pandas pyarrow + echo "Installed SedonaDB version: $(python -c 'import sedonadb; print(sedonadb.__version__)')" + + - name: Run SedonaDB benchmark + run: | + python benchmark/run_benchmark.py \ + --data-dir benchmark-data-sf${{ env.SCALE_FACTOR }} \ + --engines sedonadb \ + --timeout ${{ env.QUERY_TIMEOUT }} \ + --scale-factor ${{ env.SCALE_FACTOR }} \ + --output sedonadb_results.json + + - name: Upload results + uses: actions/upload-artifact@v4 + with: + name: sedonadb-results-sf${{ env.SCALE_FACTOR }} + path: sedonadb_results.json + retention-days: 30 + + summarize-results: + name: Summarize Results (SF${{ github.event.inputs.scale_factor || '1' }}) + needs: [benchmark-duckdb, benchmark-geopandas, benchmark-sedonadb] + if: always() && (needs.benchmark-duckdb.result == 'success' || needs.benchmark-geopandas.result == 'success' || needs.benchmark-sedonadb.result == 'success') + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Download DuckDB results + if: needs.benchmark-duckdb.result == 'success' + uses: actions/download-artifact@v4 + with: + name: duckdb-results-sf${{ env.SCALE_FACTOR }} + path: results + continue-on-error: true + + - name: Download GeoPandas results + if: needs.benchmark-geopandas.result == 'success' + uses: actions/download-artifact@v4 + with: + name: geopandas-results-sf${{ env.SCALE_FACTOR }} + path: results + continue-on-error: true + + - name: Download SedonaDB results + uses: actions/download-artifact@v4 + with: + name: sedonadb-results-sf${{ env.SCALE_FACTOR }} + path: results + continue-on-error: true + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Generate summary + run: | + python benchmark/summarize_results.py \ + --results-dir results \ + --output benchmark_summary.md + + - name: Display summary + run: cat benchmark_summary.md + + - name: Add summary to job output + run: cat benchmark_summary.md >> $GITHUB_STEP_SUMMARY + + - name: Upload combined results + uses: actions/upload-artifact@v4 + with: + name: benchmark-summary-sf${{ env.SCALE_FACTOR }} + path: | + results/ + benchmark_summary.md + retention-days: 90 diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt new file mode 100644 index 0000000..4904a2f --- /dev/null +++ b/benchmark/requirements.txt @@ -0,0 +1,18 @@ +# SpatialBench Benchmark Dependencies +# Default versions (GitHub Actions workflow may override these) + +# For data generation +spatialbench-cli + +# For DuckDB benchmarks +duckdb==1.4.3 +pyarrow>=14.0.0 +pandas>=2.0.0 + +# For GeoPandas benchmarks +geopandas==1.1.2 +shapely>=2.0.0 +numpy>=1.24.0 + +# For SedonaDB benchmarks +sedonadb==0.2.0 diff --git a/benchmark/run_benchmark.py b/benchmark/run_benchmark.py new file mode 100644 index 0000000..c16a813 --- /dev/null +++ b/benchmark/run_benchmark.py @@ -0,0 +1,426 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +SpatialBench Benchmark Runner + +This script runs spatial benchmarks comparing SedonaDB, DuckDB, and GeoPandas +on the SpatialBench queries at a specified scale factor. +""" + +import argparse +import json +import signal +import sys +import time +from abc import ABC, abstractmethod +from contextlib import contextmanager +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Callable + +# Add parent directory to path to import query modules +sys.path.insert(0, str(Path(__file__).parent.parent)) + +# Constants +QUERY_COUNT = 12 +TABLES = ["building", "customer", "driver", "trip", "vehicle", "zone"] + + +@dataclass +class BenchmarkResult: + """Result of a single query benchmark.""" + query: str + engine: str + time_seconds: float | None + row_count: int | None + status: str # "success", "error", "timeout" + error_message: str | None = None + + +@dataclass +class BenchmarkSuite: + """Complete benchmark suite results.""" + engine: str + scale_factor: int + results: list[BenchmarkResult] = field(default_factory=list) + total_time: float = 0.0 + timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat()) + version: str = "unknown" + + def to_dict(self) -> dict[str, Any]: + return { + "engine": self.engine, + "version": self.version, + "scale_factor": self.scale_factor, + "timestamp": self.timestamp, + "total_time": self.total_time, + "results": [ + { + "query": r.query, + "time_seconds": r.time_seconds, + "row_count": r.row_count, + "status": r.status, + "error_message": r.error_message, + } + for r in self.results + ], + } + + +@contextmanager +def timeout_handler(seconds: int, query_name: str): + """Context manager for handling query timeouts (Unix only).""" + def _handler(signum, frame): + raise TimeoutError(f"Query {query_name} timed out after {seconds} seconds") + + if hasattr(signal, 'SIGALRM'): + old_handler = signal.signal(signal.SIGALRM, _handler) + signal.alarm(seconds) + try: + yield + finally: + signal.alarm(0) + signal.signal(signal.SIGALRM, old_handler) + else: + # Windows: no timeout support + yield + + +def get_data_paths(data_dir: str) -> dict[str, str]: + """Get paths to all data tables.""" + data_path = Path(data_dir) + paths = {} + + for table in TABLES: + table_path = data_path / table + if table_path.is_dir(): + parquet_files = list(table_path.glob("*.parquet")) + paths[table] = str(table_path / "*.parquet") if parquet_files else str(table_path) + elif (data_path / f"{table}.parquet").exists(): + paths[table] = str(data_path / f"{table}.parquet") + else: + matches = list(data_path.glob(f"{table}*.parquet")) + if matches: + paths[table] = str(matches[0]) + + return paths + + +class BaseBenchmark(ABC): + """Base class for benchmark runners.""" + + def __init__(self, data_paths: dict[str, str], engine_name: str): + self.data_paths = data_paths + self.engine_name = engine_name + + @abstractmethod + def setup(self) -> None: + """Initialize the benchmark environment.""" + pass + + @abstractmethod + def teardown(self) -> None: + """Cleanup the benchmark environment.""" + pass + + @abstractmethod + def execute_query(self, query_name: str, query: str | None) -> tuple[int, Any]: + """Execute a query and return (row_count, result).""" + pass + + def run_query(self, query_name: str, query: str | None = None, timeout: int = 1200) -> BenchmarkResult: + """Run a single query with timeout handling.""" + start_time = time.perf_counter() + try: + with timeout_handler(timeout, query_name): + row_count, _ = self.execute_query(query_name, query) + elapsed = time.perf_counter() - start_time + return BenchmarkResult( + query=query_name, + engine=self.engine_name, + time_seconds=round(elapsed, 2), + row_count=row_count, + status="success", + ) + except TimeoutError as e: + return BenchmarkResult( + query=query_name, + engine=self.engine_name, + time_seconds=None, + row_count=None, + status="timeout", + error_message=str(e), + ) + except Exception as e: + return BenchmarkResult( + query=query_name, + engine=self.engine_name, + time_seconds=None, + row_count=None, + status="error", + error_message=str(e), + ) + + +class DuckDBBenchmark(BaseBenchmark): + """DuckDB benchmark runner.""" + + def __init__(self, data_paths: dict[str, str]): + super().__init__(data_paths, "duckdb") + self._conn = None + + def setup(self) -> None: + import duckdb + self._conn = duckdb.connect() + self._conn.execute("INSTALL spatial; LOAD spatial;") + self._conn.execute("SET enable_external_file_cache = false;") + for table, path in self.data_paths.items(): + self._conn.execute(f"CREATE VIEW {table} AS SELECT * FROM read_parquet('{path}')") + + def teardown(self) -> None: + if self._conn: + self._conn.close() + self._conn = None + + def execute_query(self, query_name: str, query: str | None) -> tuple[int, Any]: + result = self._conn.execute(query).fetchall() + return len(result), result + + +class GeoPandasBenchmark(BaseBenchmark): + """GeoPandas benchmark runner.""" + + def __init__(self, data_paths: dict[str, str]): + super().__init__(data_paths, "geopandas") + self._queries = None + + def setup(self) -> None: + import importlib.util + geopandas_path = Path(__file__).parent.parent / "geopandas.py" + spec = importlib.util.spec_from_file_location("geopandas_queries", geopandas_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + self._queries = {f"q{i}": getattr(module, f"q{i}") for i in range(1, QUERY_COUNT + 1)} + + def teardown(self) -> None: + self._queries = None + + def execute_query(self, query_name: str, query: str | None) -> tuple[int, Any]: + if query_name not in self._queries: + raise ValueError(f"Query {query_name} not found") + result = self._queries[query_name](self.data_paths) + return len(result), result + + +class SedonaDBBenchmark(BaseBenchmark): + """SedonaDB benchmark runner.""" + + def __init__(self, data_paths: dict[str, str]): + super().__init__(data_paths, "sedonadb") + self._sedona = None + + def setup(self) -> None: + import sedona.db + self._sedona = sedona.db.connect() + for table, path in self.data_paths.items(): + self._sedona.read_parquet(path).create_temp_view(table) + + def teardown(self) -> None: + if self._sedona: + self._sedona.close() + self._sedona = None + + def execute_query(self, query_name: str, query: str | None) -> tuple[int, Any]: + result = self._sedona.sql(query).collect() + return len(result), result + + +def get_sql_queries(dialect: str) -> dict[str, str]: + """Get SQL queries for a specific dialect from print_queries.py.""" + from print_queries import DuckDBSpatialBenchBenchmark, SedonaDBSpatialBenchBenchmark + + dialects = { + "duckdb": DuckDBSpatialBenchBenchmark, + "sedonadb": SedonaDBSpatialBenchBenchmark, + } + return dialects[dialect]().queries() + + +def run_benchmark( + engine: str, + data_paths: dict[str, str], + queries: list[str] | None, + timeout: int, + scale_factor: int, +) -> BenchmarkSuite: + """Generic benchmark runner for any engine.""" + + # Engine configurations + configs = { + "duckdb": { + "class": DuckDBBenchmark, + "version_getter": lambda: __import__("duckdb").__version__, + "queries_getter": lambda: get_sql_queries("duckdb"), + "needs_sql": True, + }, + "geopandas": { + "class": GeoPandasBenchmark, + "version_getter": lambda: __import__("geopandas").__version__, + "queries_getter": lambda: {f"q{i}": None for i in range(1, QUERY_COUNT + 1)}, + "needs_sql": False, + }, + "sedonadb": { + "class": SedonaDBBenchmark, + "version_getter": lambda: getattr(__import__("sedonadb"), "__version__", "unknown"), + "queries_getter": lambda: get_sql_queries("sedonadb"), + "needs_sql": True, + }, + } + + config = configs[engine] + version = config["version_getter"]() + + print(f"\n{'=' * 60}") + print(f"Running {engine.title()} Benchmark") + print(f"{'=' * 60}") + print(f"{engine.title()} version: {version}") + + benchmark = config["class"](data_paths) + suite = BenchmarkSuite(engine=engine, scale_factor=scale_factor, version=version) + + try: + benchmark.setup() + all_queries = config["queries_getter"]() + + for query_name, query_sql in all_queries.items(): + if queries and query_name not in queries: + continue + + print(f" Running {query_name}...", end=" ", flush=True) + result = benchmark.run_query(query_name, query_sql, timeout) + suite.results.append(result) + + if result.status == "success": + print(f"{result.time_seconds}s ({result.row_count} rows)") + suite.total_time += result.time_seconds + else: + print(f"{result.status.upper()}: {result.error_message}") + finally: + benchmark.teardown() + + return suite + + +def print_summary(results: list[BenchmarkSuite]) -> None: + """Print a summary comparison table.""" + print(f"\n{'=' * 80}") + print("BENCHMARK SUMMARY") + print("=" * 80) + + all_queries = sorted( + {r.query for suite in results for r in suite.results}, + key=lambda x: int(x[1:]) + ) + + data = { + suite.engine: { + r.query: f"{r.time_seconds:.2f}s" if r.status == "success" else r.status.upper() + for r in suite.results + } + for suite in results + } + + engines = [s.engine for s in results] + header = f"{'Query':<10}" + "".join(f"{e:<15}" for e in engines) + print(header) + print("-" * len(header)) + + for query in all_queries: + row = f"{query:<10}" + "".join(f"{data.get(e, {}).get(query, 'N/A'):<15}" for e in engines) + print(row) + + print("-" * len(header)) + print(f"{'Total':<10}" + "".join(f"{s.total_time:.2f}s{'':<9}" for s in results)) + + +def save_results(results: list[BenchmarkSuite], output_file: str) -> None: + """Save results to JSON file.""" + output = { + "benchmark": "spatialbench", + "version": "0.1.0", + "generated_at": datetime.now(timezone.utc).isoformat(), + "results": [suite.to_dict() for suite in results], + } + + with open(output_file, "w") as f: + json.dump(output, f, indent=2) + + print(f"\nResults saved to {output_file}") + + +def main(): + parser = argparse.ArgumentParser( + description="Run SpatialBench benchmarks comparing SedonaDB, DuckDB, and GeoPandas" + ) + parser.add_argument("--data-dir", type=str, required=True, + help="Path to directory containing benchmark data (parquet files)") + parser.add_argument("--engines", type=str, default="duckdb,geopandas", + help="Comma-separated list of engines to benchmark") + parser.add_argument("--queries", type=str, default=None, + help="Comma-separated list of queries to run (e.g., q1,q2,q3)") + parser.add_argument("--timeout", type=int, default=600, + help="Query timeout in seconds (default: 600)") + parser.add_argument("--output", type=str, default="benchmark_results.json", + help="Output file for results") + parser.add_argument("--scale-factor", type=int, default=1, + help="Scale factor of the data (for reporting only)") + + args = parser.parse_args() + + engines = [e.strip().lower() for e in args.engines.split(",")] + valid_engines = {"duckdb", "geopandas", "sedonadb"} + + for e in engines: + if e not in valid_engines: + print(f"Error: Unknown engine '{e}'. Valid options: {valid_engines}") + sys.exit(1) + + queries = [q.strip().lower() for q in args.queries.split(",")] if args.queries else None + + data_paths = get_data_paths(args.data_dir) + if not data_paths: + print(f"Error: No data files found in {args.data_dir}") + sys.exit(1) + + print("Data paths:") + for table, path in data_paths.items(): + print(f" {table}: {path}") + + results = [ + run_benchmark(engine, data_paths, queries, args.timeout, args.scale_factor) + for engine in engines + ] + + print_summary(results) + save_results(results, args.output) + + +if __name__ == "__main__": + main() diff --git a/benchmark/summarize_results.py b/benchmark/summarize_results.py new file mode 100644 index 0000000..858633a --- /dev/null +++ b/benchmark/summarize_results.py @@ -0,0 +1,316 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Summarize benchmark results from multiple engines into a markdown report. +""" + +import argparse +import json +from datetime import datetime, timezone +from pathlib import Path + + +def load_results(results_dir: str) -> dict: + """Load all JSON result files from a directory.""" + results = {} + results_path = Path(results_dir) + + for json_file in results_path.glob("*_results.json"): + with open(json_file) as f: + data = json.load(f) + for suite in data.get("results", []): + engine = suite["engine"] + results[engine] = suite + + return results + + +def format_time(seconds: float | None) -> str: + """Format time in seconds to a readable string.""" + if seconds is None: + return "N/A" + if seconds < 0.01: + return "<0.01s" + return f"{seconds:.2f}s" + + +def get_winner(query: str, data: dict, engines: list) -> str | None: + """Get the fastest engine for a query.""" + times = {} + for engine in engines: + result = data.get(engine, {}).get(query, {}) + if result.get("status") == "success" and result.get("time_seconds") is not None: + times[engine] = result["time_seconds"] + + if not times: + return None + return min(times, key=times.get) + + +def generate_markdown_summary(results: dict, output_file: str) -> str: + """Generate a markdown summary of benchmark results for GitHub Actions.""" + engines = sorted(results.keys()) + + if not engines: + markdown = "# š SpatialBench Benchmark Results\n\nā ļø No results found." + with open(output_file, "w") as f: + f.write(markdown) + return markdown + + # Get scale factor from first result + scale_factor = results[engines[0]].get("scale_factor", 1) + timestamp = results[engines[0]].get("timestamp", datetime.now(timezone.utc).isoformat()) + + # Collect all queries + all_queries = set() + for engine_data in results.values(): + for r in engine_data.get("results", []): + all_queries.add(r["query"]) + all_queries = sorted(all_queries, key=lambda x: int(x[1:])) + + # Build result lookup + data = {} + for engine, engine_data in results.items(): + data[engine] = {} + for r in engine_data.get("results", []): + data[engine][r["query"]] = r + + # Get version info + versions = {engine: results[engine].get("version", "unknown") for engine in engines} + + # Engine display names with icons + engine_icons = { + "sedonadb": "šµ SedonaDB", + "duckdb": "š¦ DuckDB", + "geopandas": "š¼ GeoPandas", + } + + # Generate markdown + lines = [ + "# š SpatialBench Benchmark Results", + "", + "| Parameter | Value |", + "|-----------|-------|", + f"| **Scale Factor** | {scale_factor} |", + f"| **Timestamp** | {timestamp} |", + f"| **Queries** | {len(all_queries)} |", + "", + "## š§ Software Versions", + "", + "| Engine | Version |", + "|--------|---------|", + ] + + for engine in engines: + icon_name = engine_icons.get(engine, engine.title()) + lines.append(f"| {icon_name} | `{versions[engine]}` |") + + # Main results table + lines.extend([ + "", + "## š Results Comparison", + "", + "> š = Fastest for this query", + "", + "| Query | " + " | ".join(engine_icons.get(e, e.title()) for e in engines) + " |", + "|:------|" + "|".join(":---:" for _ in engines) + "|", + ]) + + # Add rows for each query with winner highlighting + for query in all_queries: + winner = get_winner(query, data, engines) + row = f"| **{query.upper()}** |" + for engine in engines: + result = data.get(engine, {}).get(query, {}) + status = result.get("status", "N/A") + if status == "success": + time_val = result.get("time_seconds") + time_str = format_time(time_val) + if engine == winner: + row += f" š **{time_str}** |" + else: + row += f" {time_str} |" + elif status == "timeout": + row += " ā±ļø TIMEOUT |" + elif status == "error": + row += " ā ERROR |" + else: + row += " ā |" + lines.append(row) + + # Add totals row + totals_row = "| **TOTAL** |" + total_times = {engine: results[engine].get("total_time", 0) for engine in engines} + fastest_total = min(total_times.values()) if total_times else 0 + for engine in engines: + total = total_times[engine] + time_str = format_time(total) + if total == fastest_total and total > 0: + totals_row += f" š **{time_str}** |" + else: + totals_row += f" **{time_str}** |" + lines.append(totals_row) + + # Win count summary + win_counts = {engine: 0 for engine in engines} + for query in all_queries: + winner = get_winner(query, data, engines) + if winner: + win_counts[winner] += 1 + + lines.extend([ + "", + "## š„ Performance Summary", + "", + "| Engine | Wins | Total Time |", + "|--------|:----:|:----------:|", + ]) + + for engine in sorted(engines, key=lambda e: win_counts[e], reverse=True): + icon_name = engine_icons.get(engine, engine.title()) + wins = win_counts[engine] + total = format_time(results[engine].get("total_time", 0)) + medal = "š„" if wins == max(win_counts.values()) and wins > 0 else "" + lines.append(f"| {icon_name} | {wins} {medal} | {total} |") + + # Detailed results section (collapsible) + lines.extend([ + "", + "## š Detailed Results", + "", + ]) + + for engine in engines: + icon_name = engine_icons.get(engine, engine.title()) + lines.extend([ + f"<details>", + f"<summary><b>{icon_name}</b> - Click to expand</summary>", + "", + "| Query | Time | Status | Rows |", + "|:------|-----:|:------:|-----:|", + ]) + + for query in all_queries: + result = data.get(engine, {}).get(query, {}) + time_str = format_time(result.get("time_seconds")) + status = result.get("status", "N/A") + rows = result.get("row_count") + row_str = f"{rows:,}" if rows is not None else "ā" + + status_emoji = { + "success": "ā ", + "error": "ā", + "timeout": "ā±ļø", + }.get(status, "ā") + + lines.append(f"| {query.upper()} | {time_str} | {status_emoji} | {row_str} |") + + lines.extend([ + "", + "</details>", + "", + ]) + + # Add error details if any + has_errors = False + error_lines = ["## ā ļø Errors and Timeouts", ""] + + for engine in engines: + engine_errors = [] + for query in all_queries: + result = data.get(engine, {}).get(query, {}) + if result.get("status") in ("error", "timeout"): + error_msg = result.get("error_message", "No details available") + # Truncate long error messages + if len(error_msg) > 200: + error_msg = error_msg[:200] + "..." + engine_errors.append(f"- **{query.upper()}**: `{error_msg}`") + + if engine_errors: + has_errors = True + icon_name = engine_icons.get(engine, engine.title()) + error_lines.append(f"### {icon_name}") + error_lines.append("") + error_lines.extend(engine_errors) + error_lines.append("") + + if has_errors: + lines.extend(error_lines) + + # Footer + lines.extend([ + "---", + "", + "| Legend | Meaning |", + "|--------|---------|", + "| š | Fastest for this query |", + "| ā±ļø TIMEOUT | Query exceeded timeout |", + "| ā ERROR | Query failed |", + "", + f"*Generated by [SpatialBench](https://github.com/apache/sedona-spatialbench) on {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}*", + ]) + + markdown = "\n".join(lines) + + # Write to file + with open(output_file, "w") as f: + f.write(markdown) + + return markdown + + +def main(): + parser = argparse.ArgumentParser( + description="Summarize SpatialBench benchmark results" + ) + parser.add_argument( + "--results-dir", + type=str, + required=True, + help="Directory containing *_results.json files", + ) + parser.add_argument( + "--output", + type=str, + default="benchmark_summary.md", + help="Output markdown file", + ) + + args = parser.parse_args() + + results = load_results(args.results_dir) + + if not results: + print(f"No results found in {args.results_dir}") + # Write empty summary + with open(args.output, "w") as f: + f.write("# SpatialBench Benchmark Results\n\nNo results found.") + return + + markdown = generate_markdown_summary(results, args.output) + print(f"Summary written to {args.output}") + print("\nPreview:") + print("-" * 60) + print(markdown[:2000]) + if len(markdown) > 2000: + print("...") + + +if __name__ == "__main__": + main()
