This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git
The following commit(s) were added to refs/heads/main by this push:
new 03277d5 Revert "Add CI to build tarball"
03277d5 is described below
commit 03277d530abec4d6adcb0317ec1735f7a586f3ef
Author: Jia Yu <[email protected]>
AuthorDate: Tue Nov 25 21:14:36 2025 -0800
Revert "Add CI to build tarball"
This reverts commit af6da8781da63b1a28d1cbc6d718cc210afc3fdb.
---
.github/workflows/build-docs.yml | 109 ------------------------
.github/workflows/packaging.yml | 175 +++++++++++---------------------------
ci/scripts/build-docs.sh | 35 --------
ci/scripts/build-source.sh | 53 ------------
ci/scripts/set_dev_version.py | 122 --------------------------
dev/release/rat_exclude_files.txt | 1 -
docs/queries.md | 10 ++-
7 files changed, 57 insertions(+), 448 deletions(-)
diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml
deleted file mode 100644
index 0a57eb7..0000000
--- a/.github/workflows/build-docs.yml
+++ /dev/null
@@ -1,109 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-name: Build and Deploy Docs
-
-on:
- push:
- branches:
- - main
- pull_request:
- branches:
- - main
-
-permissions:
- contents: write
-
-concurrency:
- group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
- cancel-in-progress: true
-
-jobs:
- build-docs:
- name: Build Documentation
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
-
- - uses: actions/setup-python@v5
- with:
- python-version: "3.13"
-
- - name: Install docs requirements
- run: pip install -r docs/requirements.txt
-
- - name: Build documentation
- # Using mkdocs build is the standard way to generate the site
- run: mkdocs build --strict
-
- - name: Compress docs for artifact upload
- run: |
- # The update-asf-site job expects a specific folder name inside the
tarball
- cp -R site documentation-site
- tar -czf docs.tgz documentation-site
-
- - name: Upload docs artifact
- uses: actions/upload-artifact@v4
- with:
- name: docs
- retention-days: 2
- path: docs.tgz
-
- update-asf-site:
- name: Deploy Dev Snapshot
- runs-on: ubuntu-latest
- needs:
- - build-docs
- # This job should only run on pushes to the main branch
- if: github.event_name == 'push' && github.ref == 'refs/heads/main'
-
- steps:
- - uses: actions/download-artifact@v4
- with:
- name: docs
-
- - name: Clone asf-site branch
- uses: actions/checkout@v4
- with:
- ref: asf-site
- path: pages-clone
-
- - name: Update development documentation
- run: |
- git config --global user.email "[email protected]"
- git config --global user.name "GitHub Actions"
-
- cd pages-clone
- # Remove all existing content except .git directory
- find . -maxdepth 1 -not -name '.git' -not -name '.' -exec rm -rf {} +
-
- tar -xf ../docs.tgz
- # Move all content from documentation-site to root of asf-site branch
- mv documentation-site/* .
- mv documentation-site/.* . 2>/dev/null || true
- rmdir documentation-site
-
- git add *
- git commit --allow-empty -m"update documentation for main branch"
-
- - name: Push development documentation to asf-site
- # Ensure this push only happens on the intended repository
- if: success() && github.repository == 'apache/sedona-spatialbench'
- run: |
- cd pages-clone
- git push
-
diff --git a/.github/workflows/packaging.yml b/.github/workflows/packaging.yml
index 6403902..afce8ef 100644
--- a/.github/workflows/packaging.yml
+++ b/.github/workflows/packaging.yml
@@ -15,171 +15,94 @@
# specific language governing permissions and limitations
# under the License.
-name: packaging
+name: Build and Deploy Docs
on:
push:
branches:
- main
- - 'branch-*'
- tags:
- - 'sedona-spatialbench-*-rc*'
pull_request:
branches:
- main
permissions:
- contents: read
+ contents: write
concurrency:
group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
cancel-in-progress: true
jobs:
- source:
+ build-docs:
+ name: Build Documentation
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v6
- with:
- fetch-depth: 0
+ - uses: actions/checkout@v4
- - uses: actions/setup-python@v6
+ - uses: actions/setup-python@v5
with:
python-version: "3.13"
- - name: Install Java for RAT
- run: sudo apt-get update && sudo apt-get install -y default-jdk
+ - name: Install docs requirements
+ run: pip install -r docs/requirements.txt
- - name: Prepare dev version
- shell: bash
- run: |
- if [ "${GITHUB_REF_TYPE}" = "tag" ]; then
- # In the case of a tag, the version in Cargo.toml is already
correct
- VERSION=${GITHUB_REF_NAME#sedona-spatialbench-}
- VERSION=${VERSION%-rc*}
- elif [[ "${GITHUB_REF##*/}" =~ ^branch-[0-9]+\.[0-9]+\.[0-9]+$ ]];
then
- VERSION="${GITHUB_REF##*/branch-}"
- else
- # Set an alpha version and save it to env
- VERSION=$(python ci/scripts/set_dev_version.py)
- fi
-
- echo "VERSION=${VERSION}" >> $GITHUB_ENV
-
- - name: Create archive
- shell: bash
- run: |
- ci/scripts/build-source.sh \
- apache-sedona-spatialbench-${VERSION} \
- $(git log -n 1 --format=%h)
+ - name: Build documentation
+ # Using mkdocs build is the standard way to generate the site
+ run: mkdocs build --strict
- - name: Run Release Audit Tool
- shell: bash
+ - name: Compress docs for artifact upload
run: |
- dev/release/run-rat.sh apache-sedona-spatialbench-${{ env.VERSION
}}.tar.gz
+ # The update-asf-site job expects a specific folder name inside the
tarball
+ cp -R site documentation-site
+ tar -czf docs.tgz documentation-site
- - uses: actions/upload-artifact@v5
+ - name: Upload docs artifact
+ uses: actions/upload-artifact@v4
with:
- name: source
- retention-days: 7
- path: |
- apache-sedona-spatialbench-${{ env.VERSION }}.tar.gz
+ name: docs
+ retention-days: 2
+ path: docs.tgz
- docs-and-deploy:
+ update-asf-site:
+ name: Deploy Dev Snapshot
runs-on: ubuntu-latest
- env:
- CARGO_INCREMENTAL: 0
- permissions:
- contents: write
+ needs:
+ - build-docs
+ # This job should only run on pushes to the main branch
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main'
steps:
- - uses: actions/checkout@v6
+ - uses: actions/download-artifact@v4
with:
- fetch-depth: 0
-
- - uses: actions/setup-python@v6
- with:
- python-version: "3.x"
-
- - uses: Swatinem/rust-cache@v2
- with:
- # Update this key to force a new cache
- prefix-key: "docs-v1"
-
- - name: Install docs requirements
- run: |
- pip install -r docs/requirements.txt
-
- - name: Build documentation
- run: |
- ci/scripts/build-docs.sh
-
- - name: Compress docs
- run: |
- cp -R site spatialbench-docs
- tar -czf docs.tgz spatialbench-docs
+ name: docs
- - name: Upload docs
- uses: actions/upload-artifact@v5
+ - name: Clone asf-site branch
+ uses: actions/checkout@v4
with:
- name: docs
- retention-days: 2
- path: |
- docs.tgz
+ ref: asf-site
+ path: pages-clone
- - name: Deploy the doc to the website branch
- if: ${{ github.event_name != 'pull_request' && github.repository ==
'apache/sedona-spatialbench' }}
+ - name: Update development documentation
run: |
git config --global user.email "[email protected]"
git config --global user.name "GitHub Actions"
- git fetch origin asf-site --depth=1
-
- if [[ "${GITHUB_REF##*/}" == "main" ]]; then
- # For main branch, update the asf-site branch directly
- git checkout asf-site
- # Remove all existing content except .git directory
- find . -maxdepth 1 -not -name '.git' -not -name '.' -exec rm -rf
{} +
- # Extract docs from the artifact
- tar -xzf docs.tgz
- mv spatialbench-docs/* .
- mv spatialbench-docs/.* . 2>/dev/null || true
- rmdir spatialbench-docs
- git add .
- git commit --allow-empty -m "update documentation for main branch"
- git push origin asf-site
- elif [[ "${GITHUB_REF##*/}" =~ ^branch-[0-9]+\.[0-9]+\.[0-9]+$ ]];
then
- # For release branches, we could add versioned docs here if needed
- echo "Release branch documentation deployment not yet implemented"
- fi
-
- create-release:
- runs-on: ubuntu-latest
- needs:
- - docs-and-deploy
- - source
- permissions:
- contents: write
- steps:
- - name: Get all artifacts
- uses: actions/download-artifact@v6
- with:
- path: release-artifacts
+ cd pages-clone
+ # Remove all existing content except .git directory
+ find . -maxdepth 1 -not -name '.git' -not -name '.' -exec rm -rf {} +
- - name: List release artifacts
- run: |
- find release-artifacts -type f
+ tar -xf ../docs.tgz
+ # Move all content from documentation-site to root of asf-site branch
+ mv documentation-site/* .
+ mv documentation-site/.* . 2>/dev/null || true
+ rmdir documentation-site
+
+ git add *
+ git commit --allow-empty -m"update documentation for main branch"
- - name: Create release
- if: success() && startsWith(github.ref, 'refs/tags/')
+ - name: Push development documentation to asf-site
+ # Ensure this push only happens on the intended repository
+ if: success() && github.repository == 'apache/sedona-spatialbench'
run: |
- RELEASE_TAG=${GITHUB_REF#refs/*/}
- UPLOAD=$(find release-artifacts -type f)
-
- gh release create "${RELEASE_TAG}" \
- --repo ${{ github.repository }} \
- --prerelease \
- --title "SpatialBench ${RELEASE_TAG}" \
- ${UPLOAD}
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ cd pages-clone
+ git push
\ No newline at end of file
diff --git a/ci/scripts/build-docs.sh b/ci/scripts/build-docs.sh
deleted file mode 100755
index 06f4acc..0000000
--- a/ci/scripts/build-docs.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-if [ ${VERBOSE:-0} -gt 0 ]; then
- set -x
-fi
-
-SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
-SPATIALBENCH_DIR="$(cd "${SOURCE_DIR}/../.." && pwd)"
-
-pushd "${SPATIALBENCH_DIR}"
-if mkdocs build --strict ; then
- echo "Success!"
- exit 0
-else
- echo "Documentation build failed"
- exit 1
-fi
-
diff --git a/ci/scripts/build-source.sh b/ci/scripts/build-source.sh
deleted file mode 100755
index 2e4133b..0000000
--- a/ci/scripts/build-source.sh
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-set -eu
-
-main() {
- local -r source_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
- local -r source_top_dir="$(cd "${source_dir}/../../" && pwd)"
-
- if [ "$#" -ne 2 ]; then
- echo "Usage: $0 <base-name> <revision>"
- echo "Usage: $0 apache-sedona-spatialbench-1.0.0 1234567"
- exit 1
- fi
- local -r base_name="$1"
- local -r revision="$2"
-
- echo "Using commit ${revision}"
-
- local -r tar_ball="${base_name}.tar.gz"
-
- pushd "${source_top_dir}"
-
- rm -rf "${base_name}/"
- git archive "${revision}" --prefix "${base_name}/" | tar xf -
-
- # Create new tarball
- tar czf "${tar_ball}" "${base_name}/"
- rm -rf "${base_name}/"
-
- echo "Commit SHA1: ${revision}"
-
- popd
-}
-
-main "$@"
-
diff --git a/ci/scripts/set_dev_version.py b/ci/scripts/set_dev_version.py
deleted file mode 100644
index d653631..0000000
--- a/ci/scripts/set_dev_version.py
+++ /dev/null
@@ -1,122 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import os
-import re
-import subprocess
-import tomllib
-
-
-def git(*args):
- out = subprocess.run(
- ["git"] + list(args), stdout=subprocess.PIPE, stderr=subprocess.PIPE
- )
- if out.returncode != 0:
- raise RuntimeError(f"git {args} failed:\n{out.stderr.decode()}")
-
- return out.stdout.decode().strip().splitlines()
-
-
-def src_path(*args):
- release_dir = os.path.dirname(__file__)
- relative_path = os.path.join(release_dir, "..", "..", *args)
- return os.path.abspath(relative_path)
-
-
-def file_regex_replace(pattern, replacement, path):
- with open(path) as f:
- content = f.read()
-
- # It is usually good to know if zero items are about to be replaced
- if re.search(pattern, content) is None:
- raise ValueError(f"file {path} does not contain pattern '{pattern}'")
-
- content = re.sub(pattern, replacement, content)
- with open(path, "w") as f:
- f.write(content)
-
-
-def find_last_dev_tag():
- """Finds the commit of the last version bump
-
- Note that this excludes changes that happened during the release
- process but were not picked into the release branch.
- """
- try:
- maybe_last_dev_tag = git(
- "describe", "--match", "sedona-spatialbench-*.dev", "--tags",
"--abbrev=0"
- )
- except RuntimeError:
- first_commit = git("rev-list", "--max-parents=0", "HEAD")[0]
- return ("0.0.0", first_commit)
-
- last_dev_tag = maybe_last_dev_tag[0]
- last_version = re.search(r"[0-9]+\.[0-9]+\.[0-9]+", last_dev_tag).group(0)
- sha = git("rev-list", "-n", "1", last_dev_tag)[0]
- return last_version, sha
-
-
-def find_commits_since(begin_sha, end_sha="HEAD"):
- lines = git("log", "--pretty=oneline", f"{begin_sha}..{end_sha}")
- return lines
-
-
-def main():
- _, last_dev_tag = find_last_dev_tag()
- dev_distance = len(find_commits_since(last_dev_tag))
-
- # Update workspace package version
- file_regex_replace(
- r'\nversion = "([0-9]+\.[0-9]+\.[0-9]+)"',
- f'\nversion = "\\1-alpha{dev_distance}"',
- src_path("Cargo.toml"),
- )
-
- # Update workspace dependencies versions to match the prerelease version
- # Matches spatialbench packages in individual Cargo.toml files
- for package_dir in ["spatialbench-arrow", "spatialbench-cli"]:
- cargo_toml = src_path(package_dir, "Cargo.toml")
- if os.path.exists(cargo_toml):
- # Update version field in package
- try:
- file_regex_replace(
- r'\nversion = "([0-9]+\.[0-9]+\.[0-9]+)"',
- f'\nversion = "\\1-alpha{dev_distance}"',
- cargo_toml,
- )
- except ValueError:
- # Version might already be updated or use workspace version
- pass
-
- # Update spatialbench dependency versions
- try:
- file_regex_replace(
- r'(spatialbench(?:-[a-z0-9\-]+)?) = \{ path = "([^"]+)",
version = "([0-9]+\.[0-9]+\.[0-9]+)"',
- f'\\1 = {{ path = "\\2", version =
"\\3-alpha{dev_distance}"',
- cargo_toml,
- )
- except ValueError:
- # Pattern might not match
- pass
-
- with open(src_path("Cargo.toml"), "rb") as f:
- print(tomllib.load(f)["workspace"]["package"]["version"])
-
-
-if __name__ == "__main__":
- main()
-
diff --git a/dev/release/rat_exclude_files.txt
b/dev/release/rat_exclude_files.txt
index 4699c3e..c3a17d7 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -7,6 +7,5 @@ docs-overrides/**
raster/output/**
spatialbench/data/sf-v1/*.tbl.gz
spatialbench/data/sf-v1/*.parquet
-target/**
dev/release/rat_exclude_files.txt
diff --git a/docs/queries.md b/docs/queries.md
index b420188..7cc4741 100644
--- a/docs/queries.md
+++ b/docs/queries.md
@@ -7,13 +7,19 @@ SpatialBench is a benchmark for assessing geospatial SQL
analytics query perform
The benchmark uses a realistic but synthetic, transportation-themed dataset to
ensure the queries reflect practical use cases. By running these queries, you
can evaluate and compare the relative performance of different spatial query
engines in a consistent and unbiased manner.
## Before you start
-Before running this notebook, ensure that you have installed the packages in
the `requirements.txt` file:
+
+Before running this notebook, ensure that you have installed the packages in
the `requirements.txt` file:
```python
%pip install -r ~/sedona-spatialbench/docs/requirements.txt
```
+ ...
+ ...
+ Note: you may need to restart the kernel to use updated packages.
+
+
Additionally, install the SpatialBench CLI and generate the synthetic data on
your machine:
```
@@ -226,7 +232,7 @@ ORDER BY trip_count DESC, z.z_zonekey ASC
## Q5: Monthly travel patterns for repeat customers (convex hull of dropoff
locations)
-**Real-life scenario:** Analyze the geographic spread of travel patterns for
frequent customers to understand their mobility behavior.
+**Real-life scenario:** Analyze the geographic spread of travel patterns for
frequent customers to understand their mobility behavior.
This query analyzes the monthly travel patterns of frequent customers by
measuring how much geographic area they cover with their trips. For each
customer who took more than five trips in a month, it calculates the size of
the "travel hull" - the area enclosed by connecting all their dropoff locations
that month. The results reveal which customers have the most expansive travel
patterns, helping to identify power users who cover large geographic areas
versus those who stick to smaller, l [...]