This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new a16f722 chore(python): Include s2geography in Python wheel (#17)
a16f722 is described below
commit a16f72270971001760e6f5294725f2e924aa9c0d
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Sep 9 17:18:14 2025 +0000
chore(python): Include s2geography in Python wheel (#17)
* try to include s2geography by default in python
* vcpkg + cache for Python CI
* see if passing the toolchain file in windows gets us one step farther
* ensure build scripts error
* maybe fix the nulls issue
* fix import
* fix setsrid
* add perl for openssl
* maybe the right pr this time
* check for static libs in a few places
* maybe fix the library dir
* check multiple libs
* don't run Python on docs prs
* test and document matchers
* add checks for null scalars
* maybe get linker info on Windows
* maybe fix cmake for rust build
* maybe working on windows
* please please
* try more static linking on windows
* temporary disable windows wheel test
* document the cmakelists workaround
* go back to dynamic linking, comment out the corect test so we can check
the wheel locally
* oops
* make geography opt in
* test the windows wheel
* try a different way
* try again
* better way of specifying feature args
* don't build s2geography by default
* Update c/sedona-s2geography/build.rs
Co-authored-by: Copilot <[email protected]>
* fix python wheels spec
* add nightly uploader
* err fix repo name
* stable windows version
* maybe fix linux and moacos
* format
* back to windows-latest
* align the vcpkg reference
* more vcpkg alignment
* try avoiding any potential mingw + vcpkg issues
* fix script
* check location of nasm
* add nasm compiler
* try to auto-download nasm
* don't use the action
* attempt a fix
* gitignore windows runtime-generated files
* speed up tests on Windows
* don't try to support s2geography yet
* fix merge
* revert testing change
* ensure tg sees windows as little endian
* delete nasm files
* ignore them again
* rename
* remove
* Update rust/sedona-expr/src/scalar_udf.rs
Co-authored-by: Copilot <[email protected]>
* fmt
---------
Co-authored-by: Copilot <[email protected]>
---
.github/workflows/python-wheels.yml | 51 ++++++---
.github/workflows/python.yml | 45 ++++++++
.github/workflows/rust.yml | 8 +-
c/.gitignore | 2 +
c/sedona-s2geography/CMakeLists.txt | 78 +++++++++-----
c/sedona-s2geography/build.rs | 115 ++++++++++++++++++---
c/sedona-s2geography/src/s2geography.rs | 14 ++-
c/sedona-s2geography/src/scalar_kernel.rs | 21 +++-
c/sedona-tg/src/tg/tg.c | 11 ++
{c => ci/scripts}/.gitignore | 6 +-
.../x64-windows-dynamic-release.cmake | 1 +
ci/scripts/wheels-bootstrap-vcpkg.sh | 8 +-
ci/scripts/wheels-build-linux.sh | 5 +-
ci/scripts/wheels-build-macos.sh | 3 +-
ci/scripts/wheels-build-windows.ps1 | 57 +++++++++-
python/sedonadb/Cargo.toml | 3 +-
rust/sedona-expr/src/scalar_udf.rs | 100 +++++++++++++++++-
17 files changed, 447 insertions(+), 81 deletions(-)
diff --git a/.github/workflows/python-wheels.yml
b/.github/workflows/python-wheels.yml
index 9e389f2..2833bd4 100644
--- a/.github/workflows/python-wheels.yml
+++ b/.github/workflows/python-wheels.yml
@@ -36,9 +36,13 @@ concurrency:
group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
cancel-in-progress: true
+env:
+ # At GEOS updated to 3.14.0
+ VCPKG_REF: 5a01de756c28279ddfdd2b061d1c75710a6255fa
+
jobs:
windows-x86_64:
- runs-on: windows-latest
+ runs-on: windows-2022
steps:
- uses: actions/checkout@v4
@@ -61,18 +65,9 @@ jobs:
uses: actions/checkout@v4
with:
repository: microsoft/vcpkg
- ref: "2025.06.13"
+ ref: ${{ env.VCPKG_REF }}
path: vcpkg
- - name: Bootstrap vcpkg
- shell: bash
- env:
- VCPKG_ROOT: ${{ github.workspace }}/vcpkg
- VCPKG_DEFAULT_TRIPLET: x64-windows-dynamic-release
- run: |
- cd ci/scripts
- ./wheels-bootstrap-vcpkg.sh
-
- name: Build and test wheels (sedonadb)
run: |
cd ci/scripts
@@ -80,6 +75,7 @@ jobs:
env:
VCPKG_ROOT: ${{ github.workspace }}/vcpkg
VCPKG_DEFAULT_TRIPLET: x64-windows-dynamic-release
+ CMAKE_TOOLCHAIN_FILE: ${{ github.workspace
}}/vcpkg/scripts/buildsystems/vcpkg.cmake
CIBW_BUILD: "*-win_amd64"
CIBW_TEST_SKIP: "cp314* cp38*"
CIBW_TEST_REQUIRES: pytest adbc_driver_manager geoarrow-pyarrow
geopandas
@@ -114,7 +110,7 @@ jobs:
uses: actions/checkout@v4
with:
repository: microsoft/vcpkg
- ref: "2025.06.13"
+ ref: ${{ env.VCPKG_REF }}
path: vcpkg
- name: Build and test wheels (sedonadb)
@@ -139,8 +135,7 @@ jobs:
matrix:
config:
- {os: "ubuntu-latest", label: "linux-x86_64", arch: "x86_64"}
- # We can't include this in our CI config until the repository is
public
- # - {os: "ubuntu-24.04-arm", label: "linux-arm64", arch: "aarch64"}
+ - {os: "ubuntu-24.04-arm", label: "linux-arm64", arch: "aarch64"}
steps:
- uses: actions/checkout@v4
@@ -168,3 +163,31 @@ jobs:
with:
name: release-wheels-${{ matrix.config.label }}
path: python/sedonadb/dist/*.whl
+
+ upload_nightly:
+ needs: ["wheels-linux", "macOS-arm64", "windows-x86_64"]
+ name: Upload nightly packages
+ runs-on: "macos-latest"
+ steps:
+ - uses: actions/download-artifact@v4
+ with:
+ pattern: release-*
+ merge-multiple: true
+ path: dist
+
+ - name: Install gemfury client
+ run: |
+ brew tap gemfury/tap
+ brew install fury-cli
+ fury --version
+
+ - name: Upload packages to Gemfury
+ if: github.repository == 'apache/sedona-db' && github.ref ==
'refs/heads/main'
+ shell: bash
+ run: |
+ fury push \
+ --api-token=${GEMFURY_PUSH_TOKEN} \
+ --as="sedona-nightlies" \
+ dist/*
+ env:
+ NANOARROW_GEMFURY_TOKEN: ${{ secrets.GEMFURY_PUSH_TOKEN }}
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 9ef7d16..aca83b2 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -21,6 +21,13 @@ on:
pull_request:
branches:
- main
+ paths:
+ - 'Cargo.toml'
+ - 'Cargo.lock'
+ - '.github/workflows/python.yml'
+ - 'rust/**'
+ - 'c/**'
+ - 'python/**'
push:
branches:
- main
@@ -36,6 +43,10 @@ defaults:
run:
shell: bash -l -eo pipefail {0}
+env:
+ # At GEOS updated to 3.14.0
+ VCPKG_REF: 5a01de756c28279ddfdd2b061d1c75710a6255fa
+
jobs:
test:
strategy:
@@ -55,6 +66,39 @@ jobs:
python-version: '3.x'
cache: 'pip'
+ - name: Clone vcpkg
+ uses: actions/checkout@v4
+ with:
+ repository: microsoft/vcpkg
+ ref: ${{ env.VCPKG_REF }}
+ path: vcpkg
+
+ - name: Set up environment variables and bootstrap vcpkg
+ env:
+ VCPKG_ROOT: ${{ github.workspace }}/vcpkg
+ CMAKE_TOOLCHAIN_FILE: ${{ github.workspace
}}/vcpkg/scripts/buildsystems/vcpkg.cmake
+ run: |
+ cd vcpkg
+ ./bootstrap-vcpkg.sh
+ cd ..
+
+ echo "VCPKG_ROOT=$VCPKG_ROOT" >> $GITHUB_ENV
+ echo "PATH=$VCPKG_ROOT:$PATH" >> $GITHUB_ENV
+ echo "CMAKE_TOOLCHAIN_FILE=$CMAKE_TOOLCHAIN_FILE" >> $GITHUB_ENV
+
+ - name: Cache vcpkg binaries
+ id: cache-vcpkg
+ uses: actions/cache@v4
+ with:
+ path: vcpkg/packages
+ # Bump the number at the end of this line to force a new dependency
build
+ key: vcpkg-installed-${{ runner.os }}-${{ runner.arch }}-${{
env.VCPKG_REF }}-1
+
+ - name: Install vcpkg dependencies
+ if: steps.cache-vcpkg.outputs.cache-hit != 'true'
+ run: |
+ ./vcpkg/vcpkg install abseil openssl
+
- name: Use stable Rust
id: rust
run: |
@@ -72,6 +116,7 @@ jobs:
- name: Install
run: |
+ export MATURIN_PEP517_ARGS="--features s2geography"
pip install -e "python/sedonadb/[test]" -vv
- name: Download minimal geoarrow-data assets
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 27b2885..1c57218 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -41,6 +41,10 @@ defaults:
run:
shell: bash -l -eo pipefail {0}
+env:
+ # At GEOS updated to 3.14.0
+ VCPKG_REF: 5a01de756c28279ddfdd2b061d1c75710a6255fa
+
jobs:
rust:
strategy:
@@ -65,7 +69,7 @@ jobs:
uses: actions/checkout@v4
with:
repository: microsoft/vcpkg
- ref: "2025.06.13"
+ ref: ${{ env.VCPKG_REF }}
path: vcpkg
- name: Set up environment variables and bootstrap vcpkg
@@ -87,7 +91,7 @@ jobs:
with:
path: vcpkg/packages
# Bump the number at the end of this line to force a new dependency
build
- key: vcpkg-installed-${{ runner.os }}-${{ runner.arch }}-2
+ key: vcpkg-installed-${{ runner.os }}-${{ runner.arch }}-${{
env.VCPKG_REF }}-1
- name: Install vcpkg dependencies
if: steps.cache-vcpkg.outputs.cache-hit != 'true'
diff --git a/c/.gitignore b/c/.gitignore
index e3e7ca1..c46e252 100644
--- a/c/.gitignore
+++ b/c/.gitignore
@@ -14,6 +14,8 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+
build/
dist/
.cache
+CMakeUserPresets.json
diff --git a/c/sedona-s2geography/CMakeLists.txt
b/c/sedona-s2geography/CMakeLists.txt
index b6625dd..06cd063 100644
--- a/c/sedona-s2geography/CMakeLists.txt
+++ b/c/sedona-s2geography/CMakeLists.txt
@@ -251,32 +251,60 @@ install(FILES "${CMAKE_BINARY_DIR}/openssl_libraries.txt"
# .a file (but this might not work if the absl libraries weren't static,
# as they aren't on Homebrew and linux distributions).
-if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
- set(LINK_CXX_STANDARD_LIB "-lc++")
-elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
- set(LINK_CXX_STANDARD_LIB "-lstdc++")
-elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
- # set MSVC-specific flags if we need them
-else()
- set(LINK_CXX_STANDARD_LIB "")
-endif()
+if(NOT WIN32)
+ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ set(LINK_CXX_STANDARD_LIB "-lc++")
+ elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
+ set(LINK_CXX_STANDARD_LIB "-lstdc++")
+ elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
+ # set MSVC-specific flags if we need them
+ set(LINK_CXX_STANDARD_LIB "")
+ else()
+ set(LINK_CXX_STANDARD_LIB "")
+ endif()
-set(CMAKE_ECHO_STANDARD_LIBRARIES ${CMAKE_CXX_STANDARD_LIBRARIES})
-set(CMAKE_ECHO_FLAGS ${CMAKE_CXX_FLAGS})
-set(CMAKE_ECHO_LINK_FLAGS ${CMAKE_CXX_LINK_FLAGS})
-set(CMAKE_ECHO_IMPLICIT_LINK_DIRECTORIES
${CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES})
-# TODO: This won't work on Windows
-set(CMAKE_ECHO_LINK_EXECUTABLE
- "sh -c \"echo <FLAGS> <LINK_FLAGS> <LINK_LIBRARIES>
${LINK_CXX_STANDARD_LIB} > <TARGET>\""
-)
+ set(CMAKE_ECHO_STANDARD_LIBRARIES ${CMAKE_CXX_STANDARD_LIBRARIES})
+ set(CMAKE_ECHO_FLAGS ${CMAKE_CXX_FLAGS})
+ set(CMAKE_ECHO_LINK_FLAGS ${CMAKE_CXX_LINK_FLAGS})
+ set(CMAKE_ECHO_IMPLICIT_LINK_DIRECTORIES
${CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES})
-add_executable(linker_flags "CMakeLists.txt")
-target_link_libraries(linker_flags
- OpenSSL::SSL
- OpenSSL::Crypto
- ${S2_EXTRA_OPENSSL_LIBS}
- ${ABSL_LIBRARIES})
+ set(CMAKE_ECHO_LINK_EXECUTABLE
+ "sh -c \"echo <FLAGS> <LINK_FLAGS> <LINK_LIBRARIES>
${LINK_CXX_STANDARD_LIB} > <TARGET>\""
+ )
-set_target_properties(linker_flags PROPERTIES LINKER_LANGUAGE ECHO SUFFIX
".txt")
+ add_executable(linker_flags "CMakeLists.txt")
+ target_link_libraries(linker_flags
+ OpenSSL::SSL
+ OpenSSL::Crypto
+ ${S2_EXTRA_OPENSSL_LIBS}
+ ${ABSL_LIBRARIES})
-install(TARGETS linker_flags DESTINATION "${CMAKE_INSTALL_LIBDIR}")
+ set_target_properties(linker_flags PROPERTIES LINKER_LANGUAGE ECHO SUFFIX
".txt")
+
+ install(TARGETS linker_flags DESTINATION "${CMAKE_INSTALL_LIBDIR}")
+
+else()
+ # On Windows, MSBuild will write this file for us, but we have to look in a
very specific place
+ # to find it. This is possibly brittle but makes it possible to build this
on Windows at all.
+ file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/linker_flags.cc"
+ "
+int main(int argc, const char** args) {
+ return 0;
+}")
+ add_executable(linker_flags "${CMAKE_CURRENT_BINARY_DIR}/linker_flags.cc")
+ target_link_libraries(linker_flags
+ OpenSSL::SSL
+ OpenSSL::Crypto
+ ${S2_EXTRA_OPENSSL_LIBS}
+ ${ABSL_LIBRARIES})
+
+ add_custom_command(TARGET linker_flags
+ POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy
+
"${CMAKE_CURRENT_BINARY_DIR}/linker_flags.dir/$<CONFIG>/linker_flags.tlog/link.command.1.tlog"
+
"${CMAKE_CURRENT_BINARY_DIR}/$<CONFIG>/linker_flags.txt"
+ COMMENT "Copying linker command file for configuration
$<CONFIG>")
+
+ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/$<CONFIG>/linker_flags.txt"
+ DESTINATION "${CMAKE_INSTALL_LIBDIR}")
+endif()
diff --git a/c/sedona-s2geography/build.rs b/c/sedona-s2geography/build.rs
index 49d1485..6eadb11 100644
--- a/c/sedona-s2geography/build.rs
+++ b/c/sedona-s2geography/build.rs
@@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.
use std::{
+ collections::HashSet,
env,
path::{Path, PathBuf},
};
@@ -31,10 +32,24 @@ fn main() {
// Link the libraries that are easy to enumerate by hand and whose location
// we control in CMakeLists.txt.
- println!(
- "cargo:rustc-link-search=native={}",
- dst.join("lib").display()
- );
+ let mut lib_dirs = [
+ "geography_glue",
+ "s2geography",
+ "s2",
+ "geoarrow",
+ "nanoarrow_static",
+ ]
+ .map(|lib| find_lib_dir(&dst, lib))
+ .into_iter()
+ .collect::<HashSet<_>>()
+ .into_iter()
+ .collect::<Vec<_>>();
+
+ lib_dirs.sort();
+ for lib_dir in lib_dirs {
+ println!("cargo:rustc-link-search=native={}", lib_dir.display());
+ }
+
println!("cargo:rustc-link-lib=static=geography_glue");
println!("cargo:rustc-link-lib=static=s2geography");
println!("cargo:rustc-link-lib=static=s2");
@@ -62,23 +77,28 @@ fn main() {
fn parse_cmake_linker_flags(binary_dir: &Path) {
// e.g., libabsl_base.a
- let re_lib = Regex::new("^(lib|)([^.]+).*?(lib|a|dylib|so|dll)$").unwrap();
+ let re_lib =
Regex::new("^(lib|)([^.]+).*?(LIB|lib|a|dylib|so|dll)$").unwrap();
// e.g., -L/path/to/lib (CMake doesn't usually output this, preferrig
instead
// to pass the full path to the library)
let re_linker_dir = Regex::new("^-L(.*)").unwrap();
// e.g., -lstdc++
let re_linker_lib = Regex::new("^-l(.*)").unwrap();
- let path = binary_dir.join("lib").join("linker_flags.txt");
- let values = std::fs::read_to_string(path).expect("Read linker_flags.txt");
+
+ let path = find_cmake_linker_flags(binary_dir);
+ let linker_flags_string = read_file_maybe_utf16(&path);
// Print out the whole thing for debugging failures
- println!("Parsing CMake linker flags: {values}");
+ println!("Parsing CMake linker flags: {linker_flags_string}");
let mut last_lib_dir = "".to_string();
// Split flags on whitespace. This probably won't work if library paths
// contain spaces.
- for item in values.split_whitespace() {
+ for item in linker_flags_string.split_whitespace() {
+ if item.is_empty() {
+ continue;
+ }
+
if let Some(dir_match) = re_linker_dir.captures(item) {
let (_, [dir]) = dir_match.extract();
println!("cargo:rustc-link-search=native={dir}");
@@ -89,8 +109,12 @@ fn parse_cmake_linker_flags(binary_dir: &Path) {
continue;
}
- // Try to interpret as a path to a library. CMake loves to do this.
- let mut path = PathBuf::from(item);
+ // Try to interpret as a path to a library. CMake loves to do this. It
might be quoted (Windows)
+ let mut path = if item.starts_with('"') && item.ends_with('"') {
+ PathBuf::from(item[1..(item.len() - 1)].to_string())
+ } else {
+ PathBuf::from(item)
+ };
// If it's a relative path, it's relative to the binary directory
if path.is_relative() {
@@ -108,7 +132,8 @@ fn parse_cmake_linker_flags(binary_dir: &Path) {
}
match suffix {
- "a" | "lib" =>
println!("cargo:rustc-link-lib=static={lib}"),
+ "lib" | "LIB" =>
println!("cargo:rustc-link-lib={lib}"),
+ "a" => println!("cargo:rustc-link-lib=static={lib}"),
_ => println!("cargo:rustc-link-lib=dylib={lib}"),
}
}
@@ -119,3 +144,69 @@ fn parse_cmake_linker_flags(binary_dir: &Path) {
}
}
}
+
+fn find_cmake_linker_flags(binary_dir: &Path) -> PathBuf {
+ // Usually lib but could be lib64 (e.g., the Linux used for wheel builds)
+ let possible_lib_dirs = ["lib", "lib64", "build/Release"];
+ for possible_lib in possible_lib_dirs {
+ let path = binary_dir.join(possible_lib).join("linker_flags.txt");
+ if path.exists() {
+ return path;
+ }
+ }
+
+ panic!(
+ "Can't find linker_flags.txt output at {}",
+ binary_dir.to_string_lossy()
+ )
+}
+
+fn find_lib_dir(binary_dir: &Path, lib_file: &str) -> PathBuf {
+ // Usually lib but could be lib64 (e.g., the Linux used for wheel builds)
+ let possible_lib_dirs = ["lib", "lib64", "build/Release"];
+ for possible_lib in possible_lib_dirs {
+ let path = binary_dir.join(possible_lib);
+ let static_lib_posix = path.join(format!("lib{lib_file}.a"));
+ let static_lib_windows = path.join(format!("{lib_file}.lib"));
+ if static_lib_posix.exists() || static_lib_windows.exists() {
+ return path;
+ }
+ }
+
+ panic!(
+ "Can't find library dir for static library '{lib_file}' output at {}",
+ binary_dir.to_string_lossy()
+ )
+}
+
+// Linker flags scraped from MSBuild are UTF-16 with a byte order mark; linker
flags scraped otherwise
+// are system encoding (likely UTF-8 or compatible).
+fn read_file_maybe_utf16(path: &PathBuf) -> String {
+ let linker_flags_bytes = std::fs::read(path).expect("Read
linker_flags.txt");
+
+ // Check if the first two bytes are UTF-16 BOM (0xFF 0xFE or 0xFE 0xFF)
+ if linker_flags_bytes.len() >= 2
+ && ((linker_flags_bytes[0] == 0xFF && linker_flags_bytes[1] == 0xFE)
+ || (linker_flags_bytes[0] == 0xFE && linker_flags_bytes[1] ==
0xFF))
+ {
+ // Determine endianness from BOM
+ let is_le = linker_flags_bytes[0] == 0xFF;
+
+ // Skip the BOM and convert the rest
+ let u16_bytes = &linker_flags_bytes[2..];
+ let u16_vec: Vec<u16> = u16_bytes
+ .chunks_exact(2)
+ .map(|chunk| {
+ if is_le {
+ u16::from_le_bytes([chunk[0], chunk[1]])
+ } else {
+ u16::from_be_bytes([chunk[0], chunk[1]])
+ }
+ })
+ .collect();
+
+ String::from_utf16_lossy(&u16_vec).to_string()
+ } else {
+ String::from_utf8_lossy(&linker_flags_bytes).to_string()
+ }
+}
diff --git a/c/sedona-s2geography/src/s2geography.rs
b/c/sedona-s2geography/src/s2geography.rs
index 731c6fd..f02b635 100644
--- a/c/sedona-s2geography/src/s2geography.rs
+++ b/c/sedona-s2geography/src/s2geography.rs
@@ -297,16 +297,14 @@ mod test {
fn scalar_udf_errors() {
let mut udf = S2ScalarUDF::Length();
let err = udf.init(Fields::empty(), None).unwrap_err();
- assert_eq!(
- err.to_string(),
- "Invalid argument: Expected one argument in unary s2geography UDF"
- );
+ assert!(err
+ .to_string()
+ .contains("Expected one argument in unary s2geography UDF"));
let err = udf.execute(&[]).unwrap_err();
- assert_eq!(
- err.to_string(),
- "Invalid argument: Expected one argument/one argument type in in
unary s2geography UDF"
- );
+ assert!(err
+ .to_string()
+ .contains("Expected one argument/one argument type in in unary
s2geography UDF"));
}
#[test]
diff --git a/c/sedona-s2geography/src/scalar_kernel.rs
b/c/sedona-s2geography/src/scalar_kernel.rs
index c5ab78c..361bdb0 100644
--- a/c/sedona-s2geography/src/scalar_kernel.rs
+++ b/c/sedona-s2geography/src/scalar_kernel.rs
@@ -14,7 +14,7 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-use std::sync::Arc;
+use std::{iter::zip, sync::Arc};
use arrow_schema::DataType;
use datafusion_common::{Result, ScalarValue};
@@ -218,8 +218,13 @@ impl SedonaScalarKernel for S2ScalarKernel {
) -> Result<ColumnarValue> {
let mut inner = (self.inner_factory)();
+ let arg_types_if_null = self.matcher.types_if_null(arg_types)?;
+ let args_casted_null = zip(args, &arg_types_if_null)
+ .map(|(arg, type_if_null)|
arg.cast_to(type_if_null.storage_type(), None))
+ .collect::<Result<Vec<_>>>()?;
+
// S2's scalar UDFs operate on fields with extension metadata
- let arg_fields = arg_types
+ let arg_fields = arg_types_if_null
.iter()
.map(|arg_type| arg_type.to_storage_field("", true))
.collect::<Result<Vec<_>>>()?;
@@ -228,7 +233,7 @@ impl SedonaScalarKernel for S2ScalarKernel {
let out_ffi_schema = inner.init(arg_fields.into(), None)?;
// Create arrays from each argument (scalars become arrays of size 1)
- let arg_arrays = args
+ let arg_arrays = args_casted_null
.iter()
.map(|arg| match arg {
ColumnarValue::Array(array) => Ok(array.clone()),
@@ -299,6 +304,10 @@ mod test {
.invoke_wkb_scalar(Some("LINESTRING (0 0, 0 1)"))
.unwrap();
assert_eq!(result, ScalarValue::Float64(Some(111195.10117748393)));
+
+ // Null scalar -> Null
+ let result = tester.invoke_scalar(ScalarValue::Null).unwrap();
+ assert_eq!(result, ScalarValue::Float64(None));
}
#[rstest]
@@ -338,6 +347,12 @@ mod test {
.invoke_scalar_scalar(polygon_scalar, point_scalar)
.unwrap();
assert_eq!(result, ScalarValue::Boolean(Some(true)));
+
+ // Null scalars -> Null
+ let result = tester
+ .invoke_scalar_scalar(ScalarValue::Null, ScalarValue::Null)
+ .unwrap();
+ assert_eq!(result, ScalarValue::Boolean(None));
}
#[test]
diff --git a/c/sedona-tg/src/tg/tg.c b/c/sedona-tg/src/tg/tg.c
index bdeb8d6..b78fe56 100644
--- a/c/sedona-tg/src/tg/tg.c
+++ b/c/sedona-tg/src/tg/tg.c
@@ -20,6 +20,17 @@
#include <stdint.h>
#include <stddef.h>
+// See https://github.com/tidwall/tg/issues/15 for upstream resolution
+#if defined(_MSC_VER)
+#undef __BYTE_ORDER__
+#undef __ORDER_LITTLE_ENDIAN__
+#undef __ORDER_BIG_ENDIAN__
+
+#define __BYTE_ORDER__ 1
+#define __ORDER_LITTLE_ENDIAN__ 1
+#define __ORDER_BIG_ENDIAN__ 0
+#endif
+
/******************************************************************************
Implementation Notes:
diff --git a/c/.gitignore b/ci/scripts/.gitignore
similarity index 91%
copy from c/.gitignore
copy to ci/scripts/.gitignore
index e3e7ca1..400987c 100644
--- a/c/.gitignore
+++ b/ci/scripts/.gitignore
@@ -14,6 +14,6 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-build/
-dist/
-.cache
+
+# Ignore the nasm compiler downloaded when building Windows wheels
+nasm-*
diff --git a/ci/scripts/custom-triplets/x64-windows-dynamic-release.cmake
b/ci/scripts/custom-triplets/x64-windows-dynamic-release.cmake
index 7157a03..eda7718 100644
--- a/ci/scripts/custom-triplets/x64-windows-dynamic-release.cmake
+++ b/ci/scripts/custom-triplets/x64-windows-dynamic-release.cmake
@@ -14,6 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+
set(VCPKG_TARGET_ARCHITECTURE x64)
set(VCPKG_CRT_LINKAGE dynamic)
set(VCPKG_LIBRARY_LINKAGE dynamic)
diff --git a/ci/scripts/wheels-bootstrap-vcpkg.sh
b/ci/scripts/wheels-bootstrap-vcpkg.sh
index c70a0a2..b564d3f 100755
--- a/ci/scripts/wheels-bootstrap-vcpkg.sh
+++ b/ci/scripts/wheels-bootstrap-vcpkg.sh
@@ -32,8 +32,14 @@ else
export
PATH="${VCPKG_ROOT}/installed/${VCPKG_DEFAULT_TRIPLET}/tools/geos/bin:${PATH}"
pushd ${VCPKG_ROOT}
+
+ # If we have an explicitly requested reference, ensure it is checked out
+ if [ ! -z "${VCPKG_REF}" ]; then
+ git checkout ${VCPKG_REF}
+ fi
+
./bootstrap-vcpkg.sh
- ./vcpkg install
--overlay-triplets="${SEDONADB_DIR}/ci/scripts/custom-triplets" geos
+ ./vcpkg install
--overlay-triplets="${SEDONADB_DIR}/ci/scripts/custom-triplets" geos abseil
openssl
popd
export
CMAKE_TOOLCHAIN_FILE="${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake"
diff --git a/ci/scripts/wheels-build-linux.sh b/ci/scripts/wheels-build-linux.sh
index 3259b6a..0001495 100755
--- a/ci/scripts/wheels-build-linux.sh
+++ b/ci/scripts/wheels-build-linux.sh
@@ -40,7 +40,7 @@ fi
# manylinux is AlmaLinux/Fedora-based, musllinux is Alpine-based
# If we want musllinux support there will be some workshopping required (vcpkg
# needs some newer components than are provided by the default musllinux image)
-BEFORE_ALL_MANYLINUX="yum install -y curl zip unzip tar clang"
+BEFORE_ALL_MANYLINUX="yum install -y curl zip unzip tar clang perl"
# This approach downloads and builds native dependencies with vcpkg once for
every image.
# Compared to the Rust build time, the native dependency build time is not too
bad. We could
@@ -48,9 +48,8 @@ BEFORE_ALL_MANYLINUX="yum install -y curl zip unzip tar clang"
# add quite a bit of complexity but could save time if we build wheels for
linux frequently.
# The native and Rust builds are cached on each image such that compile work
is effectively
# cached between Python versions (just not between invocations of this script).
-export CIBW_ENVIRONMENT_LINUX="VCPKG_ROOT=/vcpkg
VCPKG_DEFAULT_TRIPLET=$VCPKG_DEFAULT_TRIPLET
CMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake
PKG_CONFIG_PATH=/vcpkg/installed/$VCPKG_DEFAULT_TRIPLET/lib/pkgconfig
LD_LIBRARY_PATH=/vcpkg/installed/$VCPKG_DEFAULT_TRIPLET/lib"
+export CIBW_ENVIRONMENT_LINUX="VCPKG_ROOT=/vcpkg VCPKG_REF=$VCPKG_REF
VCPKG_DEFAULT_TRIPLET=$VCPKG_DEFAULT_TRIPLET
CMAKE_TOOLCHAIN_FILE=/vcpkg/scripts/buildsystems/vcpkg.cmake
PKG_CONFIG_PATH=/vcpkg/installed/$VCPKG_DEFAULT_TRIPLET/lib/pkgconfig
LD_LIBRARY_PATH=/vcpkg/installed/$VCPKG_DEFAULT_TRIPLET/lib
MATURIN_PEP517_ARGS='--features s2geography'"
export CIBW_BEFORE_ALL="$BEFORE_ALL_MANYLINUX && git clone
https://github.com/microsoft/vcpkg.git /vcpkg && bash
{package}/../../ci/scripts/wheels-bootstrap-vcpkg.sh"
pushd "${SEDONADB_DIR}"
python -m cibuildwheel --platform linux --archs ${ARCH} --output-dir
python/$2/dist python/$2
-popd
diff --git a/ci/scripts/wheels-build-macos.sh b/ci/scripts/wheels-build-macos.sh
index 4ce86c4..54315ce 100755
--- a/ci/scripts/wheels-build-macos.sh
+++ b/ci/scripts/wheels-build-macos.sh
@@ -43,8 +43,7 @@ source ./wheels-bootstrap-vcpkg.sh
export
CIBW_REPAIR_WHEEL_COMMAND_MACOS="DYLD_LIBRARY_PATH=$VCPKG_INSTALL_NAME_DIR
delocate-listdeps {wheel} && DYLD_LIBRARY_PATH=$VCPKG_INSTALL_NAME_DIR
delocate-wheel --require-archs {delocate_archs} -w {dest_dir} {wheel}"
# Pass on environment variables specifically for the build
-export CIBW_ENVIRONMENT_MACOS="$CIBW_ENVIRONMENT_MACOS
MACOSX_DEPLOYMENT_TARGET=12.0 CMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}"
+export CIBW_ENVIRONMENT_MACOS="$CIBW_ENVIRONMENT_MACOS
_PYTHON_HOST_PLATFORM=macosx-12.0-arm64 MACOSX_DEPLOYMENT_TARGET=12.0
CMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} MATURIN_PEP517_ARGS='--features
s2geography'"
pushd "${SEDONADB_DIR}"
python -m cibuildwheel --output-dir python/$1/dist python/$1
-popd
diff --git a/ci/scripts/wheels-build-windows.ps1
b/ci/scripts/wheels-build-windows.ps1
index 7e0664a..001664a 100644
--- a/ci/scripts/wheels-build-windows.ps1
+++ b/ci/scripts/wheels-build-windows.ps1
@@ -16,13 +16,54 @@
# under the License.
# If running locally:
-# $env:VCPKG_ROOT="C:\Users\dewey\Documents\rscratch\vcpkg"
+# $env:VCPKG_ROOT="C:\Users\dewey\Documents\gh\vcpkg"
# $env:VCPKG_DEFAULT_TRIPLET="x64-windows-dynamic-release"
# $env:CIBW_BUILD="cp311-win_amd64"
+$originalDirectory = Get-Location
$scriptDirectory = Split-Path -Parent $MyInvocation.MyCommand.Path
-$vcpkgBinDirectory = "$env:VCPKG_ROOT\installed\$env:VCPKG_DEFAULT_TRIPLET\bin"
-$vcpkgLibDirectory = "$env:VCPKG_ROOT\installed\$env:VCPKG_DEFAULT_TRIPLET\lib"
+$vcpkgInstalledDirectory =
"$env:VCPKG_ROOT\installed\$env:VCPKG_DEFAULT_TRIPLET"
+$vcpkgBinDirectory = "$vcpkgInstalledDirectory\bin"
+$vcpkgLibDirectory = "$vcpkgInstalledDirectory\lib"
+
+# Ensure vcpkg
+try {
+ Push-Location "$env:VCPKG_ROOT"
+ .\bootstrap-vcpkg
+ .\vcpkg --overlay-triplets="${scriptDirectory}/custom-triplets" install
geos abseil openssl
+ Pop-Location
+}
+finally {
+ # Restore the original working directory
+ Set-Location -Path $originalDirectory
+}
+
+# Download and extract NASM if it doesn't exist
+# On Windows, NASM is required for AWS Rust dependencies
+$NASM_URL =
"https://www.nasm.us/pub/nasm/releasebuilds/2.16.03/win64/nasm-2.16.03-win64.zip"
+$NASM_DIR = "$scriptDirectory\nasm-2.16.03"
+$NASM_ZIP = "$scriptDirectory\nasm.zip"
+
+if (-not (Test-Path $NASM_DIR)) {
+ Write-Host "Downloading NASM to $NASM_DIR..."
+ New-Item -Path $NASM_DIR -ItemType Directory -Force | Out-Null
+
+ # Download the NASM zip file
+ Invoke-WebRequest -Uri $NASM_URL -OutFile $NASM_ZIP
+
+ # Extract the zip file
+ Expand-Archive -Path $NASM_ZIP -DestinationPath $scriptDirectory -Force
+
+ # Clean up the zip file
+ Remove-Item -Path $NASM_ZIP -Force
+
+ Write-Host "NASM downloaded and extracted to $NASM_DIR"
+} else {
+ Write-Host "NASM directory already exists at $NASM_DIR"
+}
+
+# Add NASM to PATH
+$env:PATH += ";$NASM_DIR"
# Put here/windows on PATH for our fake pkg-config and geos-config executables
$env:PATH += ";$scriptDirectory\windows"
@@ -31,10 +72,15 @@ $env:PATH += ";$scriptDirectory\windows"
# (well, specifically our dummy geos-config) the information it needs to build
bindings
$env:GEOS_LIB_DIR = "$vcpkgLibDirectory"
$env:GEOS_VERSION = "3.13.0"
-$originalDirectory = Get-Location
+
+# Some CMake configurations needs this separately from the toolchain file
+$env:CMAKE_PREFIX_PATH="$vcpkgInstalledDirectory"
+$env:OPENSSL_ROOT_DIR="$vcpkgInstalledDirectory"
# Use delvewheel to copy any required dependencies from vcpkg into the wheel
-$env:CIBW_REPAIR_WHEEL_COMMAND_WINDOWS="delvewheel repair -v
--add-path=$vcpkgBinDirectory --wheel-dir={dest_dir} {wheel}"
+# combase.dll seems to be required; however, causes errors when copied into
the wheel
+# This likely means that the wheel won't work on Windows 7.
+$env:CIBW_REPAIR_WHEEL_COMMAND_WINDOWS="delvewheel repair -v
--exclude=combase.dll --add-path=$vcpkgBinDirectory --wheel-dir={dest_dir}
{wheel}"
# Quality of life: don't change the working directory of the calling script
even when it fails
$parentDirectory = Split-Path -Parent (Split-Path -Parent $scriptDirectory)
@@ -50,6 +96,7 @@ try {
Push-Location "$parentDirectory"
python -m cibuildwheel --output-dir python\sedonadb\dist python\sedonadb
+ Pop-Location
}
finally {
# Restore the original working directory
diff --git a/python/sedonadb/Cargo.toml b/python/sedonadb/Cargo.toml
index bfb868b..9fa82a5 100644
--- a/python/sedonadb/Cargo.toml
+++ b/python/sedonadb/Cargo.toml
@@ -28,6 +28,7 @@ crate-type = ["cdylib"]
[features]
default = ["mimalloc"]
mimalloc = ["dep:mimalloc", "dep:libmimalloc-sys"]
+s2geography = ["sedona/s2geography"]
[dependencies]
adbc_core = { workspace = true }
@@ -38,7 +39,7 @@ datafusion = { workspace = true }
datafusion-common = { workspace = true }
datafusion-ffi = { workspace = true }
futures = { workspace = true }
-pyo3 = "0.25.1"
+pyo3 = { version = "0.25.1", features = ["extension-module"] }
sedona = { path = "../../rust/sedona" }
sedona-adbc = { path = "../../rust/sedona-adbc" }
sedona-schema = { path = "../../rust/sedona-schema" }
diff --git a/rust/sedona-expr/src/scalar_udf.rs
b/rust/sedona-expr/src/scalar_udf.rs
index ab3491a..0820cf7 100644
--- a/rust/sedona-expr/src/scalar_udf.rs
+++ b/rust/sedona-expr/src/scalar_udf.rs
@@ -14,7 +14,7 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-use std::{any::Any, fmt::Debug, sync::Arc};
+use std::{any::Any, fmt::Debug, iter::zip, sync::Arc};
use arrow_schema::{DataType, FieldRef};
use datafusion_common::{not_impl_err, plan_err, Result, ScalarValue};
@@ -23,7 +23,7 @@ use datafusion_expr::{
Volatility,
};
use sedona_common::sedona_internal_err;
-use sedona_schema::datatypes::{Edges, SedonaType};
+use sedona_schema::datatypes::{Edges, SedonaType, WKB_GEOGRAPHY, WKB_GEOMETRY};
pub type ScalarKernelRef = Arc<dyn SedonaScalarKernel + Send + Sync>;
@@ -180,6 +180,31 @@ impl ArgMatcher {
arg_iter.next().is_none()
}
+ /// Calls each [TypeMatcher]'s `type_if_null()`
+ ///
+ /// This method errors if one or more matchers does not have an
+ /// unambiguous castable-from-null storage type. It is provided
+ /// as a utility for generic kernel implementations that rely on
+ /// the matcher to sanitize input that may contain literal nulls.
+ pub fn types_if_null(&self, args: &[SedonaType]) ->
Result<Vec<SedonaType>> {
+ let mut out = Vec::new();
+ for (arg, matcher) in zip(args, &self.matchers) {
+ if let SedonaType::Arrow(DataType::Null) = arg {
+ if let Some(type_if_null) = matcher.type_if_null() {
+ out.push(type_if_null);
+ } else {
+ return sedona_internal_err!(
+ "Matcher {matcher:?} does not provide type_if_null()"
+ );
+ }
+ } else {
+ out.push(arg.clone());
+ }
+ }
+
+ Ok(out)
+ }
+
/// Matches any argument
pub fn is_any() -> Arc<dyn TypeMatcher + Send + Sync> {
Arc::new(IsAny {})
@@ -240,11 +265,28 @@ impl ArgMatcher {
}
}
+/// A TypeMatcher is a predicate on a [SedonaType]
+///
+/// TypeMatchers are the building blocks of an [ArgMatcher] that
+/// represent a single argument. This is a generalization of the
+/// DataFusion [Signature] which does not currently consider
+/// extension types and/or how extension arrays might be casted
+/// to conform to a function with a given signature.
pub trait TypeMatcher: Debug {
+ /// Returns true if this matcher matches a type
fn match_type(&self, arg: &SedonaType) -> bool;
+
+ /// If this argument is optional, return true
fn is_optional(&self) -> bool {
false
}
+
+ /// Return the type to which an argument should be casted,
+ /// if applicable. This can be used to generalize null handling
+ /// or casting.
+ fn type_if_null(&self) -> Option<SedonaType> {
+ None
+ }
}
#[derive(Debug)]
@@ -265,6 +307,10 @@ impl TypeMatcher for IsExact {
fn match_type(&self, arg: &SedonaType) -> bool {
self.exact_type.match_signature(arg)
}
+
+ fn type_if_null(&self) -> Option<SedonaType> {
+ Some(self.exact_type.clone())
+ }
}
#[derive(Debug)]
@@ -280,6 +326,10 @@ impl TypeMatcher for OptionalMatcher {
fn is_optional(&self) -> bool {
true
}
+
+ fn type_if_null(&self) -> Option<SedonaType> {
+ self.inner.type_if_null()
+ }
}
#[derive(Debug)]
@@ -303,6 +353,10 @@ impl TypeMatcher for IsGeometry {
_ => false,
}
}
+
+ fn type_if_null(&self) -> Option<SedonaType> {
+ Some(WKB_GEOMETRY)
+ }
}
#[derive(Debug)]
@@ -317,6 +371,10 @@ impl TypeMatcher for IsGeography {
_ => false,
}
}
+
+ fn type_if_null(&self) -> Option<SedonaType> {
+ Some(WKB_GEOGRAPHY)
+ }
}
#[derive(Debug)]
@@ -329,6 +387,10 @@ impl TypeMatcher for IsNumeric {
_ => false,
}
}
+
+ fn type_if_null(&self) -> Option<SedonaType> {
+ Some(SedonaType::Arrow(DataType::Float64))
+ }
}
#[derive(Debug)]
@@ -346,6 +408,10 @@ impl TypeMatcher for IsString {
_ => false,
}
}
+
+ fn type_if_null(&self) -> Option<SedonaType> {
+ Some(SedonaType::Arrow(DataType::Utf8))
+ }
}
#[derive(Debug)]
@@ -360,6 +426,10 @@ impl TypeMatcher for IsBinary {
_ => false,
}
}
+
+ fn type_if_null(&self) -> Option<SedonaType> {
+ Some(SedonaType::Arrow(DataType::Binary))
+ }
}
#[derive(Debug)]
@@ -374,6 +444,10 @@ impl TypeMatcher for IsBoolean {
_ => false,
}
}
+
+ fn type_if_null(&self) -> Option<SedonaType> {
+ Some(SedonaType::Arrow(DataType::Boolean))
+ }
}
#[derive(Debug)]
@@ -594,30 +668,52 @@ mod tests {
assert!(ArgMatcher::is_geometry_or_geography().match_type(&WKB_GEOGRAPHY));
assert!(!ArgMatcher::is_geometry_or_geography()
.match_type(&SedonaType::Arrow(DataType::Binary)));
+ assert_eq!(ArgMatcher::is_geometry_or_geography().type_if_null(),
None);
assert!(ArgMatcher::is_geometry().match_type(&WKB_GEOMETRY));
assert!(!ArgMatcher::is_geometry().match_type(&WKB_GEOGRAPHY));
+ assert_eq!(ArgMatcher::is_geometry().type_if_null(),
Some(WKB_GEOMETRY));
assert!(ArgMatcher::is_geography().match_type(&WKB_GEOGRAPHY));
assert!(!ArgMatcher::is_geography().match_type(&WKB_GEOMETRY));
+ assert_eq!(
+ ArgMatcher::is_geography().type_if_null(),
+ Some(WKB_GEOGRAPHY)
+ );
assert!(ArgMatcher::is_numeric().match_type(&SedonaType::Arrow(DataType::Int32)));
assert!(ArgMatcher::is_numeric().match_type(&SedonaType::Arrow(DataType::Float64)));
+ assert_eq!(
+ ArgMatcher::is_numeric().type_if_null(),
+ Some(SedonaType::Arrow(DataType::Float64))
+ );
assert!(ArgMatcher::is_string().match_type(&SedonaType::Arrow(DataType::Utf8)));
assert!(ArgMatcher::is_string().match_type(&SedonaType::Arrow(DataType::Utf8View)));
assert!(ArgMatcher::is_string().match_type(&SedonaType::Arrow(DataType::LargeUtf8)));
assert!(!ArgMatcher::is_string().match_type(&SedonaType::Arrow(DataType::Binary)));
+ assert_eq!(
+ ArgMatcher::is_string().type_if_null(),
+ Some(SedonaType::Arrow(DataType::Utf8))
+ );
assert!(ArgMatcher::is_binary().match_type(&SedonaType::Arrow(DataType::Binary)));
assert!(ArgMatcher::is_binary().match_type(&SedonaType::Arrow(DataType::BinaryView)));
assert!(!ArgMatcher::is_binary().match_type(&SedonaType::Arrow(DataType::Utf8)));
+ assert_eq!(
+ ArgMatcher::is_binary().type_if_null(),
+ Some(SedonaType::Arrow(DataType::Binary))
+ );
assert!(ArgMatcher::is_boolean().match_type(&SedonaType::Arrow(DataType::Boolean)));
assert!(!ArgMatcher::is_boolean().match_type(&SedonaType::Arrow(DataType::Int32)));
assert!(ArgMatcher::is_null().match_type(&SedonaType::Arrow(DataType::Null)));
assert!(!ArgMatcher::is_null().match_type(&SedonaType::Arrow(DataType::Int32)));
+ assert_eq!(
+ ArgMatcher::is_boolean().type_if_null(),
+ Some(SedonaType::Arrow(DataType::Boolean))
+ );
}
#[test]