This is an automated email from the ASF dual-hosted git repository. janhoy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/solr-orbit.git
commit 8cb315530c1dc17eabc6cd62802366c0fffd111e Author: Jan HΓΈydahl <[email protected]> AuthorDate: Fri May 22 00:53:47 2026 +0200 Update GitHub/CI infrastructure for Apache Solr Benchmark Adapt CI/CD and project metadata for the Solr port: - Remove workflows that depended on OpenSearch infrastructure (backport, add-untriaged, integ-test, publish-release, docker-push-release) β these will be rebuilt once the project has its own ASF infrastructure - Add docs.yml workflow (commented out pending docs host decision) - Simplify unit-test and docker-build workflows to remove OpenSearch-specific steps - Update .ci/build.sh and check_deprecated_terms.py for Solr naming - Remove CODEOWNERS and issue templates tied to the old team structure - Add AGENTS.md: guidance for AI coding assistants working in this repo - Refresh Makefile, tox.ini, .pylintrc, .gitignore for the new project shape Part of #3 --- .ci/build.sh | 14 +-- .ci/scripts/check_deprecated_terms.py | 0 .github/CODEOWNERS | 1 - .github/ISSUE_TEMPLATE/documentation_template.yml | 26 ----- .github/ISSUE_TEMPLATE/proposal_template.yml | 69 ------------- .github/PULL_REQUEST_TEMPLATE.md | 1 - .github/workflows/add-untriaged.yml | 19 ---- .github/workflows/backport.yml | 40 -------- .github/workflows/docker-build.yml | 2 +- .github/workflows/docker-push-release.yml | 55 ---------- .github/workflows/docs.yml | 61 ++++++++++++ .github/workflows/integ-test.yml | 77 -------------- .github/workflows/publish-release.yml | 48 --------- .github/workflows/unit-test.yml | 43 ++++---- .gitignore | 4 + .pylintrc | 32 ++++-- AGENTS.md | 116 ++++++++++++++++++++++ Makefile | 2 +- run.sh | 16 +-- tox.ini | 2 +- 20 files changed, 243 insertions(+), 385 deletions(-) diff --git a/.ci/build.sh b/.ci/build.sh index c075181f..2d346258 100644 --- a/.ci/build.sh +++ b/.ci/build.sh @@ -13,15 +13,15 @@ function setup { # Init pyenv. PATH=$HOME/.pyenv/shims:$PATH:$HOME/.pyenv/bin - # OpenSearch has different JDK requirements: - # - Gradle builds need JDK 21 (after Apache Lucene 10 upgrade) - # - OpenSearch runtime operations need JDK 17 - # Store the current JAVA_HOME (Java 21) for Gradle + # Solr JDK requirements: + # - Gradle builds need JDK 21 (from-sources pipeline) + # - Solr 9.x requires JDK 11+; Solr 10.x requires JDK 17+ + # Store the current JAVA_HOME (Java 21) for Gradle builds export GRADLE_JAVA_HOME=$JAVA_HOME - - # Set JAVA_HOME to Java 17 for OpenSearch + + # Set JAVA_HOME to Java 17 for Solr runtime operations if [ -n "$JAVA17_HOME" ]; then - echo "Setting JAVA_HOME to Java 17 for OpenSearch" + echo "Setting JAVA_HOME to Java 17 for Solr" export JAVA_HOME=$JAVA17_HOME java -version else diff --git a/.ci/scripts/check_deprecated_terms.py b/.ci/scripts/check_deprecated_terms.py old mode 100755 new mode 100644 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS deleted file mode 100644 index aebd4302..00000000 --- a/.github/CODEOWNERS +++ /dev/null @@ -1 +0,0 @@ -* @IanHoang @gkamat @beaioun @rishabh6788 @VijayanB @OVI3D0 diff --git a/.github/ISSUE_TEMPLATE/documentation_template.yml b/.github/ISSUE_TEMPLATE/documentation_template.yml deleted file mode 100644 index 8522f626..00000000 --- a/.github/ISSUE_TEMPLATE/documentation_template.yml +++ /dev/null @@ -1,26 +0,0 @@ ---- -name: Documentation request π -description: Suggest new documentation or improvements to existing documentation -labels: [documentation, untriaged] -body: - - type: markdown - attributes: - value: | - Thanks for taking the time to submit a documentation request! - - type: textarea - id: suggestion - attributes: - label: Please provide details on how and where the documentation can be improved - description: A clear and concise description of the issue. For instance, "I'm unable to find documentation on [...]" - validations: - required: true - - type: textarea - id: references - attributes: - label: Provide references - description: List out links to existing documentation if applicable - - type: textarea - id: additional-context - attributes: - label: Additional context - description: Add any other context or screenshots about the documentation request here. diff --git a/.github/ISSUE_TEMPLATE/proposal_template.yml b/.github/ISSUE_TEMPLATE/proposal_template.yml deleted file mode 100644 index 91826784..00000000 --- a/.github/ISSUE_TEMPLATE/proposal_template.yml +++ /dev/null @@ -1,69 +0,0 @@ ---- -name: Proposal -description: Suggest an idea for a specific feature you wish to propose to the community for comment -title: '[RFC/PROPOSAL]: ' -labels: [RFC, untriaged] -body: - - type: markdown - attributes: - value: | - Thanks for taking the time to fill out this RFC / Proposal! - - type: textarea - id: what-are-you-proposing - attributes: - label: What are you proposing? - description: In a few sentences, describe the feature and its core capabilities. - validations: - required: true - - type: textarea - id: what-users-have-asked-for-this-feature - attributes: - label: What users have asked for this feature? - description: Highlight any research, proposals, requests or anecdotes that signal this is the right thing to build. Include links to GitHub Issues, Forums, Stack Overflow, X (Twitter), Etc - validations: - required: true - - type: textarea - id: what-problems-are-you-trying-to-solve - attributes: - label: What problems are you trying to solve? - description: Summarize use cases, user problems, and needs you are trying to solve. Describe the most important user needs, pain points and jobs as expressed by the user asks above. - - type: textarea - id: what-will-developer-experience-be-like - attributes: - label: What will the developer experience be like? - description: Does this have a REST API? If so, please describe the API and any impact it may have to existing APIs. In a brief summary (not a spec), highlight what new REST APIs or changes to REST APIs are planned. as well as any other API, CLI or Configuration changes that are planned as part of this feature. - - type: textarea - id: are-there-any-security-coniderations - attributes: - label: Are there any security considerations? - description: Describe if the feature has any security considerations or impact. What is the security model of the new APIs? Features should be integrated into the OpenSearch security suite and so if they are not, we should highlight the reasons here - - type: textarea - id: are-there-any-breaking-changes - attributes: - label: Are there any breaking changes? - description: If this feature will require breaking changes to any APIs, ouline what those are and why they are needed. What is the path to minimizing impact? (example, add new API and deprecate the old one) - - type: textarea - id: what-will-the-user-experience-be-like - attributes: - label: What will the user experience be like? - description: Describe the feature requirements and/or user stories. You may include low-fidelity sketches, wireframes, APIs stubs, or other examples of how a user would use the feature via CLI. Using a bulleted list or simple diagrams to outline features is okay. If this is net new functionality, call this out as well. - - type: textarea - id: are-there-breaking-changes-to-user-experience - attributes: - label: Are there breaking changes to user experience? - description: Will this change the existing user experience? Will this be a breaking change from a user flow or user experience perspective? - - type: textarea - id: why-should-it-be-built - attributes: - label: Why should it be built? Any reason not to? - description: Describe the value that this feature will bring to the OpenSearch community, as well as what impact it has if it isn't built, or new risks if it is. Highlight opportunities for additional research. - - type: textarea - id: what-will-it-take-to-execute - attributes: - label: What will it take to execute? - description: Describe what it will take to build this feature. Are there any assumptions you may be making that could limit scope or add limitations? Are there performance, cost, or technical constraints that may impact the user experience? Does this feature depend on other feature work? What additional risks are there? - - type: textarea - id: any-remaining-open-questions - attributes: - label: Any remaining open questions? - description: What are the known enhancements to this feature? Are there any enhancements that may be out of scope but which we will want to track long term? List any other open questions that may need to be answered before proceeding with an implementation. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 93eaef0a..77511b3b 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -11,4 +11,3 @@ --- By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. -For more information on following Developer Certificate of Origin and signing off your commits, please check [here](https://github.com/opensearch-project/OpenSearch/blob/main/CONTRIBUTING.md#developer-certificate-of-origin). diff --git a/.github/workflows/add-untriaged.yml b/.github/workflows/add-untriaged.yml deleted file mode 100644 index 9dcc7020..00000000 --- a/.github/workflows/add-untriaged.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: Apply 'untriaged' label during issue lifecycle - -on: - issues: - types: [opened, reopened, transferred] - -jobs: - apply-label: - runs-on: ubuntu-latest - steps: - - uses: actions/github-script@v6 - with: - script: | - github.rest.issues.addLabels({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - labels: ['untriaged'] - }) diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml deleted file mode 100644 index df508da5..00000000 --- a/.github/workflows/backport.yml +++ /dev/null @@ -1,40 +0,0 @@ ---- -name: Backport -on: - pull_request_target: - types: - - closed - - labeled - -jobs: - backport: - name: Backport - runs-on: ubuntu-latest - # Only react to merged PRs for security reasons. - # See https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request_target. - if: > - github.event.pull_request.merged - && ( - github.event.action == 'closed' - || ( - github.event.action == 'labeled' - && contains(github.event.label.name, 'backport') - ) - ) - permissions: - contents: write - pull-requests: write - steps: - - name: GitHub App token - id: github_app_token - uses: tibdex/[email protected] - with: - app_id: ${{ secrets.APP_ID }} - private_key: ${{ secrets.APP_PRIVATE_KEY }} - installation_id: 22958780 - - - name: Backport - uses: VachaShah/[email protected] - with: - github_token: ${{ steps.github_app_token.outputs.token }} - head_template: backport/backport-<%= number %>-to-<%= base %> \ No newline at end of file diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 0ba34ba3..dc884f87 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -37,7 +37,7 @@ jobs: - name: Docker Build ${{ matrix.platform }} run: | docker buildx version - tag=osb/osb-`echo ${{ matrix.platform }} | tr '/' '-'` + tag=asb/asb-`echo ${{ matrix.platform }} | tr '/' '-'` set -x docker buildx build --platform ${{ matrix.platform }} --build-arg VERSION=`cat version.txt` --build-arg BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` -f docker/Dockerfile -t "$tag" -o type=docker . set +x diff --git a/.github/workflows/docker-push-release.yml b/.github/workflows/docker-push-release.yml deleted file mode 100644 index a7f26720..00000000 --- a/.github/workflows/docker-push-release.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: Docker Push Release -on: - push: - branches: - - main - -permissions: - id-token: write - contents: read - -jobs: - docker: - if: github.repository == 'opensearch-project/opensearch-benchmark' - runs-on: ubuntu-latest - steps: - - name: Check out repository - uses: actions/checkout@v4 - - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - # Workaround for a memory allocation layout bug in QEMU, - # triggered by a kernel update to Linux machine images. - # Note that this is not an actual fix. - with: - image: tonistiigi/binfmt:qemu-v7.0.0-28 - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Configure Role to Acquire Credentials - uses: aws-actions/configure-aws-credentials@v4 - with: - role-to-assume: ${{ secrets.BENCHMARK_DOCKERHUB_ROLE }} - aws-region: us-east-1 - - - name: Retrieve Password - id: retrieve-password - run: | - DOCKERHUB_PASSWORD=`aws secretsmanager get-secret-value --secret-id jenkins-staging-dockerhub-credential --query SecretString --output text` - echo "::add-mask::$DOCKERHUB_PASSWORD" - echo "dockerhub-password=$DOCKERHUB_PASSWORD" >> $GITHUB_OUTPUT - - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.BENCHMARK_DOCKERHUB_USERNAME }} - password: ${{ steps.retrieve-password.outputs.dockerhub-password }} - - - name: Docker Build Multi-Platform - run: | - docker buildx version - tag=main-latest - set -x - docker buildx build --platform linux/amd64,linux/arm64 --build-arg VERSION=`cat version.txt` --build-arg BUILD_DATE=`date -u +%Y-%m-%dT%H:%M:%SZ` -f docker/Dockerfile -t opensearchstaging/opensearch-benchmark:"$tag" --push . - set +x diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..387ae8c1 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,61 @@ +#name: Deploy docs to GitHub Pages +# +#on: +# push: +# branches: ["main"] +# paths: +# - "docs/**" +# - ".github/workflows/docs.yml" +# workflow_dispatch: +# +#permissions: +# contents: read +# pages: write +# id-token: write +# +#concurrency: +# group: "pages" +# cancel-in-progress: true +# +#jobs: +# build: +# runs-on: ubuntu-latest +# defaults: +# run: +# working-directory: docs +# steps: +# - name: Checkout +# uses: actions/checkout@v4 +# +# - name: Setup Ruby +# uses: ruby/setup-ruby@v1 +# with: +# ruby-version: "3.3" +# bundler-cache: true +# cache-version: 0 +# working-directory: docs +# +# - name: Setup Pages +# id: pages +# uses: actions/configure-pages@v5 +# +# - name: Build with Jekyll +# run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}" +# env: +# JEKYLL_ENV: production +# +# - name: Upload artifact +# uses: actions/upload-pages-artifact@v4 +# with: +# path: docs/_site +# +# deploy: +# environment: +# name: github-pages +# url: ${{ steps.deployment.outputs.page_url }} +# runs-on: ubuntu-latest +# needs: build +# steps: +# - name: Deploy to GitHub Pages +# id: deployment +# uses: actions/deploy-pages@v4 diff --git a/.github/workflows/integ-test.yml b/.github/workflows/integ-test.yml deleted file mode 100644 index 2c7a971c..00000000 --- a/.github/workflows/integ-test.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: Run Integration Tests -on: [workflow_dispatch, pull_request] -jobs: - Integration-Tests: - strategy: - matrix: - python-version: - - "3.10" - - "3.11" - - "3.12" - - "3.13" - runs-on: ubuntu-latest - steps: - - name: Install bz2 development package - if: runner.os == 'Linux' - run: | - sudo apt-get update - sudo apt-get install -y libbz2-dev - - - uses: actions/setup-python@v4 - - - uses: KengoTODA/actions-setup-docker-compose@v1 - with: - version: '1.29.2' - -# - name: Enforce docker-compose v1 -# run: | -# echo "GitHub starts to switch runners to include docker-compose v2" -# echo "which uses 'docker compose' command to replace 'docker-compose'" -# echo "this would cause issues in our test validation so we enforce v1 here" -# echo "https://github.com/actions/runner-images/commit/2a4bc14da46f1f8e358aa902a69edb9bef135472" -# sudo apt-get remove -y docker-compose-plugin -# sudo pip install docker-compose==1.29.2 -# docker --version -# docker-compose --version - - - name: Check out repository code - uses: actions/checkout@v2 - - - name: Install pyenv - run: git clone https://github.com/pyenv/pyenv.git ~/.pyenv - - - name: Install JDK 21 - uses: actions/setup-java@v3 - with: - distribution: 'adopt' - java-version: '21' - - run: | - echo "JAVA21_HOME=$JAVA_HOME" >> $GITHUB_ENV - echo "BUILD_JAVA_HOME=$JAVA_HOME" >> $GITHUB_ENV - - - name: Install JDK 21 - uses: actions/setup-java@v3 - with: - distribution: 'adopt' - java-version: '21' - - run: echo "JAVA21_HOME=$JAVA_HOME" >> $GITHUB_ENV - - - name: Install JDK 17 - uses: actions/setup-java@v3 - with: - distribution: 'adopt' - java-version: '17' - - run: | - echo "JAVA17_HOME=$JAVA_HOME" >> $GITHUB_ENV - echo "JAVA_HOME=$JAVA_HOME" >> $GITHUB_ENV # Set JDK 17 as default - - - name: Run the CI build script - run: | - # For build operations that need JDK 21 - if [ -n "$BUILD_JAVA_HOME" ]; then - export ORIG_JAVA_HOME=$JAVA_HOME - export JAVA_HOME=$BUILD_JAVA_HOME - # do build operations here - export JAVA_HOME=$ORIG_JAVA_HOME - fi - bash .ci/build.sh run_it ${{ matrix.python-version }} \ No newline at end of file diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml deleted file mode 100644 index 3ff29509..00000000 --- a/.github/workflows/publish-release.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: Publish Release to GitHub - -on: - push: - tags: - - "*" - -jobs: - publish-release: - runs-on: ubuntu-latest - permissions: - id-token: write - contents: write - issues: write - steps: - - name: Checkout Repository - uses: actions/checkout@v3 - - id: get_approvers - run: | - echo "approvers=$(cat .github/CODEOWNERS | grep @ | tr -d '* ' | sed 's/@/,/g' | sed 's/,//1')" >> $GITHUB_OUTPUT - - uses: trstringer/manual-approval@v1 - with: - secret: ${{ github.TOKEN }} - approvers: ${{ steps.get_approvers.outputs.approvers }} - minimum-approvals: 1 - issue-title: 'Release opensearch-benchmark' - issue-body: "Please approve or deny the release of opensearch-benchmark. **Tag**: ${{ github.ref_name }} **Commit**: ${{ github.sha }}" - exclude-workflow-initiator-as-approver: true - - - name: Set up Python 3 - uses: actions/setup-python@v3 - with: - python-version: '3.x' - - - name: Build project for distribution - run: | - make build - tar zcvf artifacts.tar.gz dist - - - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - - - name: Publish release - uses: softprops/action-gh-release@v1 - with: - draft: true - generate_release_notes: true - files: artifacts.tar.gz diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 95915bfd..e81921fb 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -25,26 +25,23 @@ jobs: - name: Run the CI build script run: bash .ci/build.sh build_and_unit_test - - name: Detect term-check mode from labels - id: detect_mode - shell: bash - run: | - labels='${{ toJson(github.event.pull_request.labels.*.name) }}' - echo "PR labels: $labels" - mode="" - - if echo "$labels" | grep -qi '"check-1.x-terms"'; then - mode="block-1x" - elif echo "$labels" | grep -qi '"check-2.x-terms"'; then - mode="block-2x" - fi - - echo "mode=$mode" >> "$GITHUB_OUTPUT" - - # 2) Run the checker only if a mode was selected - - name: Run term lint checker - if: steps.detect_mode.outputs.mode != '' - env: - OSB_TERM_MODE: ${{ steps.detect_mode.outputs.mode }} - run: | - python3 .ci/scripts/check_deprecated_terms.py --mode "$OSB_TERM_MODE" \ No newline at end of file + Docs-Lint: + runs-on: ubuntu-latest + defaults: + run: + working-directory: docs + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Ruby + uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.3" + bundler-cache: true + cache-version: 0 + working-directory: docs + + - name: Build docs (lint check) + run: bundle exec jekyll build --baseurl "" + diff --git a/.gitignore b/.gitignore index 23252792..59b878cf 100644 --- a/.gitignore +++ b/.gitignore @@ -116,3 +116,7 @@ recipes/ccr/ccr-target-hosts.json # Tracker tracks tracks/ + +.claude +.specify +specs/ \ No newline at end of file diff --git a/.pylintrc b/.pylintrc index bd37349b..02b24ee5 100644 --- a/.pylintrc +++ b/.pylintrc @@ -180,7 +180,30 @@ disable=print-statement, bad-option-value, consider-using-dict-items, unused-private-member, - use-a-generator + use-a-generator, + protected-access, + import-outside-toplevel, + wrong-import-position, + logging-fstring-interpolation, + subprocess-run-check, + too-many-return-statements, + too-many-nested-blocks, + unnecessary-comprehension, + multiple-imports, + consider-using-f-string, + unspecified-encoding, + broad-exception-raised, + unnecessary-lambda-assignment, + consider-using-in, + superfluous-parens, + use-implicit-booleaness-not-comparison, + unnecessary-dunder-call, + pointless-exception-statement, + use-list-literal, + use-dict-literal, + consider-using-generator, + try-except-raise, + consider-iterating-dictionary # Enable the message, report, category or checker with the given id(s). You can @@ -372,13 +395,6 @@ max-line-length=180 # Maximum number of lines in a module. max-module-lines=1000 -# List of optional constructs for which whitespace checking is disabled. `dict- -# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. -# `trailing-comma` allows a space between comma and closing bracket: (a, ). -# `empty-line` allows space-only lines. -no-space-check=trailing-comma, - dict-separator - # Allow the body of a class to be on the same line as the declaration if body # contains single statement. single-line-class-stmt=no diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000..30c65f3a --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,116 @@ +# AGENTS.md + +This file provides guidance to AI coding agents working with this repository. + +## Development Setup + +Prerequisites: `pyenv`, JDK 21, Docker, `docker-compose`, `jq` + +Optional: `pbzip2` (parallel bzip2 β install via `apt install pbzip2` or `brew install pbzip2`). +Without it, `.bz2` corpus decompression falls back to Python stdlib (slower). + +```bash +make develop # Install Python 3.10 via pyenv, create .venv, install all deps +source .venv/bin/activate # Activate virtual environment +``` + +## Common Commands + +```bash +make lint # Run pylint on osbenchmark/, benchmarks/, scripts/, tests/, it/ +make test # Run unit tests (pytest tests/) +pytest tests/path/to/test_file.py::TestClass::test_method # Run a single test +make it # Run integration tests via tox (requires Java, Docker; ~30 min) +make it310 # Integration tests for Python 3.10 only +make benchmark # Run performance benchmarks (pytest benchmarks/) +make build # Build distribution wheel +make clean # Remove build artifacts, caches, tox environments +``` + +## Code Style + +- **Linter**: pylint with `pylint-quotes` plugin (`.pylintrc`) +- **String quotes**: Double quotes enforced +- **Max line length**: 180 characters +- **Max module lines**: 1000 + +## Architecture + +Apache Solr Benchmark (ASB) is a **macrobenchmarking framework** for Apache Solr clusters, using an **actor-based concurrent execution model** via the [Thespian](https://thespianpy.com/) library. + +### Entry Points + +- `solr-benchmark` / `sb` β `osbenchmark/benchmark.py:main` β CLI for running benchmarks +- `solr-benchmarkd` / `sbd` β `osbenchmark/benchmarkd.py:main` β Daemon for distributed worker nodes + +### Core Package (`osbenchmark/`) + +**Orchestration layer:** +- `benchmark.py` β CLI arg parsing, subcommands: `run`, `list`, `info`, `generate`, `convert-workload` +- `test_run_orchestrator.py` β Pipeline execution: prepares, launches cluster, runs workload, publishes results +- `actor.py` β Thespian actor system setup for parallel/distributed execution +- `config.py` β Configuration loading and management + +**Cluster management (`builder/`):** +- `solr_provisioner.py` β Download, install and launch Solr (from distribution, sources, or Docker) +- `provisioners/` β Generic node provisioning infrastructure +- `downloaders/` β Download Solr distributions +- `installers/` β Install Solr on provisioned nodes +- `launchers/` β Start/stop cluster nodes +- `executors/` β Execute remote commands on cluster nodes +- `configs/` β Jinja2 templates for cluster configuration + +**Benchmark execution:** +- `workload/` β Load and manage workload definitions (test procedures, operations, schedules) +- `worker_coordinator/` β Coordinate distributed worker nodes; `driver.py` drives actual load +- `worker_coordinator/runner.py` β Solr operation runners (`SolrBulkIndex`, `SolrSearch`, `SolrCreateCollection`, etc.) +- `metrics.py` β Collect, store, and aggregate benchmark metrics (filesystem-backed; no external store) +- `telemetry.py` β Solr-specific telemetry devices (JVM, node, collection, query, indexing, cache stats) +- `publisher.py` β Publish and format benchmark results +- `result_writer.py` β Write results to local filesystem (JSON/CSV) + +**Data and connectivity:** +- `client.py` β `SolrAdminClient` and `SolrClient` (HTTP via `requests`/`pysolr`; Collections API, `/select`, `/update`) +- `synthetic_data_generator/` β Generate synthetic test datasets +- `workload_generator/` β Generate workload definition files from existing Solr collections + +**Workload conversion:** +- `conversion/workload_converter.py` β Convert an OpenSearch Benchmark workload directory to Solr format +- `conversion/detector.py` β Detect whether a workload uses OpenSearch-only operations/query DSL +- `conversion/query.py` β Translate OpenSearch Query DSL to Solr JSON Query DSL +- `conversion/schema.py` β Translate OpenSearch index mappings to Solr `managed-schema.xml` + +**Utilities:** +- `utils/` β IO, process management, console output, network, version parsing, options handling +- `cloud_provider/` β Cloud provider integrations (AWS via boto3, GCP via google-auth) +- `visualizations/` β Result visualization + +### Test Structure + +- `tests/` β Unit tests mirroring `osbenchmark/` structure +- `it/` β Integration tests (spin up real Solr clusters via Docker/provisioning) +- `benchmarks/` β Performance benchmarks for ASB itself + +### Workload System + +Workloads are defined as JSON/YAML files with: +- **Operations**: individual actions (bulk indexing, search queries) +- **Test procedures**: sequences of operations with parameters and schedules +- **Corpora**: dataset files (compatible with OpenSearch Benchmark format) + +Workloads must be in Solr format. Use `solr-benchmark convert-workload` to convert from +OpenSearch Benchmark format. Workloads can be loaded from a local path (`--workload-path`) +or from a git workload repository (`--workload-repository`). + +### Pipeline Execution Flow + +1. **Prepare** β Load workload, configure metrics store +2. **Build** (optional) β Download and provision Solr cluster +3. **Run** β Execute test procedure via worker coordinator and drivers +4. **Publish** β Store metrics, generate report + +## Key Technologies + +- **Python 3.10+** with `pysolr` (data ops), `requests` (HTTP admin), `psutil` (I/O metrics), `thespian` (actor model), `pytest` (tests), `tabulate` (console output) +- **Metrics store**: local filesystem β JSON/CSV result files at `~/.solr-benchmark/`, SQLite test-runs store +- **Docs**: Jekyll 4.x + just-the-docs gem in `docs/`; deployed to GitHub Pages via `.github/workflows/docs.yml` diff --git a/Makefile b/Makefile index fc8a9ef8..1c139a28 100644 --- a/Makefile +++ b/Makefile @@ -61,7 +61,7 @@ build: install-deps # Builds a wheel from source, then installs it. install: build - PIP_ONLY_BINARY=h5py $(PIP) install dist/opensearch_benchmark-*.whl + PIP_ONLY_BINARY=h5py $(PIP) install dist/*.whl rm -rf dist clean: diff --git a/run.sh b/run.sh index 3136b8c3..65c41fb4 100755 --- a/run.sh +++ b/run.sh @@ -19,7 +19,7 @@ ########################################################################################## # -# Internal helper script to actually run either OSB or OSB daemon. +# Internal helper script to actually run either solr-benchmark or its daemon. # # Do not invoke directly but rather use the `benchmark` and `benchmarkd` scripts. # @@ -29,7 +29,7 @@ readonly BINARY_NAME="${__BENCHMARK_INTERNAL_BINARY_NAME}" readonly HUMAN_NAME="${__BENCHMARK_INTERNAL_HUMAN_NAME}" install_osbenchmark_with_setuptools () { - # Check if optional parameter with OSB binary path, points to an existing executable file. + # Check if optional parameter with benchmark binary path, points to an existing executable file. if [[ $# -ge 1 && -n $1 ]]; then if [[ -f $1 && -x $1 ]]; then return; fi fi @@ -43,7 +43,7 @@ install_osbenchmark_with_setuptools () { fi } -# Attempt to update OSB itself by default but allow user to skip it. +# Attempt to update solr-benchmark itself by default but allow user to skip it. SELF_UPDATE=YES # Assume that the "main remote" is called "origin" REMOTE="origin" @@ -72,14 +72,14 @@ case ${i} in SELF_UPDATE=NO shift # past argument with no value ;; - # inspect OSB's command line options and skip update also if the user has specified --offline. + # inspect command line options and skip update also if the user has specified --offline. # - # Note that we do NOT consume this option as it needs to be passed to OSB. + # Note that we do NOT consume this option as it needs to be passed to the benchmark script. --offline) SELF_UPDATE=NO # DO NOT CONSUME!! ;; - # Do not consume unknown parameters; they should still be passed to the actual OSB script + # Do not consume unknown parameters; they should still be passed to the actual benchmark script #*) esac done @@ -89,7 +89,7 @@ then # see http://unix.stackexchange.com/a/155077 if output=$(git status --porcelain) && [ -z "$output" ] && on_master=$(git rev-parse --abbrev-ref HEAD) && [ "$on_master" == "master" ] then - # Working directory clean -> we assume this is a user that is not actively developing OSB and just upgrade it every time it is invoked + # Working directory clean -> we assume this is a user that is not actively developing solr-benchmark and just upgrade it every time it is invoked set +e # this will fail if the user is offline git fetch ${REMOTE} --quiet >/dev/null 2>&1 @@ -97,7 +97,7 @@ then set -e if [[ $exit_code == 0 ]] then - echo "Auto-updating OSB from ${REMOTE}" + echo "Auto-updating solr-benchmark from ${REMOTE}" git rebase ${REMOTE}/master --quiet install_osbenchmark_with_setuptools #else diff --git a/tox.ini b/tox.ini index 24411232..9b2efefe 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ ############################################################################### # -# tox configuration for OpenSearch Benchmark. +# tox configuration for Apache Solr Benchmark. # # Invocation: Run `make it` #
