paleolimbot commented on code in PR #681: URL: https://github.com/apache/sedona-db/pull/681#discussion_r2891209098
########## c/sedona-gdal/src/dyn_load.rs: ########## @@ -0,0 +1,318 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::path::Path; + +use libloading::Library; + +use crate::errors::GdalInitLibraryError; +use crate::gdal_dyn_bindgen::SedonaGdalApi; + +/// Load a single symbol from the library and write it into the given field. +/// +/// We load as a raw `*const ()` pointer and transmute to the target function pointer +/// type. This is the standard pattern for dynamic symbol loading where the loaded +/// symbol's signature is known but cannot be expressed as a generic parameter to +/// `Library::get` (because each field has a different signature). +/// +/// On failure returns a `GdalInitLibraryError` with the symbol name and the +/// underlying OS error message. +macro_rules! load_fn { + ($lib:expr, $api:expr, $name:ident) => { + // The target types here are too verbose to annotate for each call site + #[allow(clippy::missing_transmute_annotations)] + { + $api.$name = Some(unsafe { + let sym = $lib + .get::<*const ()>(concat!(stringify!($name), "\0").as_bytes()) + .map_err(|e| { + GdalInitLibraryError::LibraryError(format!( + "Failed to load symbol {}: {}", + stringify!($name), + e + )) + })?; + std::mem::transmute(sym.into_raw().into_raw()) + }); + } + }; +} + +/// Try to load a symbol under one of several names (e.g. for C++ mangled symbols). +/// Writes the first successful match into `$api.$field`. Returns an error only if +/// *none* of the names resolve. +macro_rules! load_fn_any { + ($lib:expr, $api:expr, $field:ident, [$($name:expr),+ $(,)?]) => {{ + let mut found = false; + $( + if !found { + if let Ok(sym) = unsafe { $lib.get::<*const ()>($name) } { + // The target types here are too verbose to annotate for each call site + #[allow(clippy::missing_transmute_annotations)] + { + $api.$field = Some(unsafe { std::mem::transmute(sym.into_raw().into_raw()) }); + } + found = true; + } + } + )+ + if !found { + return Err(GdalInitLibraryError::LibraryError(format!( + "Failed to resolve {} under any known mangled name", + stringify!($field), + ))); + } + }}; +} + +/// Populate all function-pointer fields of [`SedonaGdalApi`] from the given +/// [`Library`] handle. +fn load_all_symbols(lib: &Library, api: &mut SedonaGdalApi) -> Result<(), GdalInitLibraryError> { + // --- Dataset --- + load_fn!(lib, api, GDALOpenEx); + load_fn!(lib, api, GDALClose); + load_fn!(lib, api, GDALGetRasterXSize); + load_fn!(lib, api, GDALGetRasterYSize); + load_fn!(lib, api, GDALGetRasterCount); + load_fn!(lib, api, GDALGetRasterBand); + load_fn!(lib, api, GDALGetGeoTransform); + load_fn!(lib, api, GDALSetGeoTransform); + load_fn!(lib, api, GDALGetProjectionRef); + load_fn!(lib, api, GDALSetProjection); + load_fn!(lib, api, GDALGetSpatialRef); + load_fn!(lib, api, GDALSetSpatialRef); + load_fn!(lib, api, GDALCreateCopy); + load_fn!(lib, api, GDALDatasetCreateLayer); + + // --- Driver --- + load_fn!(lib, api, GDALAllRegister); + load_fn!(lib, api, GDALGetDriverByName); + load_fn!(lib, api, GDALCreate); + + // --- Band --- + load_fn!(lib, api, GDALAddBand); + load_fn!(lib, api, GDALRasterIO); + load_fn!(lib, api, GDALRasterIOEx); + load_fn!(lib, api, GDALGetRasterDataType); + load_fn!(lib, api, GDALGetRasterBandXSize); + load_fn!(lib, api, GDALGetRasterBandYSize); + load_fn!(lib, api, GDALGetBlockSize); + load_fn!(lib, api, GDALGetRasterNoDataValue); + load_fn!(lib, api, GDALSetRasterNoDataValue); + load_fn!(lib, api, GDALDeleteRasterNoDataValue); + load_fn!(lib, api, GDALSetRasterNoDataValueAsUInt64); + load_fn!(lib, api, GDALSetRasterNoDataValueAsInt64); + + // --- SpatialRef --- + load_fn!(lib, api, OSRNewSpatialReference); + load_fn!(lib, api, OSRDestroySpatialReference); + load_fn!(lib, api, OSRExportToPROJJSON); + load_fn!(lib, api, OSRClone); + load_fn!(lib, api, OSRRelease); + + // --- Geometry --- + load_fn!(lib, api, OGR_G_CreateFromWkb); + load_fn!(lib, api, OGR_G_CreateFromWkt); + load_fn!(lib, api, OGR_G_ExportToIsoWkb); + load_fn!(lib, api, OGR_G_WkbSize); + load_fn!(lib, api, OGR_G_GetEnvelope); + load_fn!(lib, api, OGR_G_DestroyGeometry); + + // --- Vector / Layer --- + load_fn!(lib, api, OGR_L_ResetReading); + load_fn!(lib, api, OGR_L_GetNextFeature); + load_fn!(lib, api, OGR_L_CreateField); + load_fn!(lib, api, OGR_L_GetFeatureCount); + load_fn!(lib, api, OGR_F_GetGeometryRef); + load_fn!(lib, api, OGR_F_GetFieldIndex); + load_fn!(lib, api, OGR_F_GetFieldAsDouble); + load_fn!(lib, api, OGR_F_GetFieldAsInteger); + load_fn!(lib, api, OGR_F_IsFieldSetAndNotNull); + load_fn!(lib, api, OGR_F_Destroy); + load_fn!(lib, api, OGR_Fld_Create); + load_fn!(lib, api, OGR_Fld_Destroy); + + // --- VSI --- + load_fn!(lib, api, VSIFileFromMemBuffer); + load_fn!(lib, api, VSIFCloseL); + load_fn!(lib, api, VSIUnlink); + load_fn!(lib, api, VSIGetMemFileBuffer); + load_fn!(lib, api, VSIFree); + load_fn!(lib, api, VSIMalloc); + + // --- VRT --- + load_fn!(lib, api, VRTCreate); + load_fn!(lib, api, VRTAddSimpleSource); + + // --- Rasterize / Polygonize --- + load_fn!(lib, api, GDALRasterizeGeometries); + load_fn!(lib, api, GDALFPolygonize); + load_fn!(lib, api, GDALPolygonize); + + // --- Config --- + load_fn!(lib, api, CPLSetThreadLocalConfigOption); + + // --- Error --- + load_fn!(lib, api, CPLGetLastErrorNo); + load_fn!(lib, api, CPLGetLastErrorMsg); + load_fn!(lib, api, CPLErrorReset); + + // --- Data Type --- + load_fn!(lib, api, GDALGetDataTypeSizeBytes); + + // --- C++ API: MEMDataset::Create (resolved via mangled symbol names) --- + // The symbol is mangled differently on Linux, macOS, and MSVC, and the + // `char**` vs `const char**` parameter also affects the mangling. + load_fn_any!( + lib, + api, + MEMDatasetCreate, + [ + // Linux (char**) + b"_ZN10MEMDataset6CreateEPKciii12GDALDataTypePPc\0", + // macOS (char**) + b"__ZN10MEMDataset6CreateEPKciii12GDALDataTypePPc\0", + // Linux (const char**) + b"_ZN10MEMDataset6CreateEPKciii12GDALDataTypePPKc\0", + // macOS (const char**) + b"__ZN10MEMDataset6CreateEPKciii12GDALDataTypePPKc\0", + // MSVC (char**) + b"?Create@MEMDataset@@SAPEAV1@PEBDHHH4GDALDataType@@PEAPEAD@Z\0", + // MSVC (const char**) + b"?Create@MEMDataset@@SAPEAV1@PEBDHHH4GDALDataType@@PEAPEBAD@Z\0", + ] + ); + + Ok(()) +} + +/// Load a GDAL shared library from `path` and populate a [`SedonaGdalApi`] struct. +/// +/// Returns the `(Library, SedonaGdalApi)` pair. The caller is responsible for +/// keeping the `Library` alive for the lifetime of the function pointers. +pub(crate) fn load_gdal_from_path( + path: &Path, +) -> Result<(Library, SedonaGdalApi), GdalInitLibraryError> { + let lib = unsafe { Library::new(path.as_os_str()) }.map_err(|e| { + GdalInitLibraryError::LibraryError(format!( + "Failed to load GDAL library from {}: {}", + path.display(), + e + )) + })?; + + let mut api = SedonaGdalApi::default(); + load_all_symbols(&lib, &mut api)?; + Ok((lib, api)) +} + +/// Load GDAL symbols from the current process image (equivalent to `dlopen(NULL)`). +/// +/// Returns the `(Library, SedonaGdalApi)` pair. The caller is responsible for +/// keeping the `Library` alive for the lifetime of the function pointers. +pub(crate) fn load_gdal_from_current_process( +) -> Result<(Library, SedonaGdalApi), GdalInitLibraryError> { + let lib = current_process_library()?; + let mut api = SedonaGdalApi::default(); + load_all_symbols(&lib, &mut api)?; + Ok((lib, api)) +} + +/// Open a handle to the current process image. +#[cfg(unix)] +fn current_process_library() -> Result<Library, GdalInitLibraryError> { + Ok(libloading::os::unix::Library::this().into()) +} + +#[cfg(windows)] +fn current_process_library() -> Result<Library, GdalInitLibraryError> { + // Safety: loading symbols from the current process is safe. + Ok(unsafe { libloading::os::windows::Library::this() } + .map_err(|e| { + GdalInitLibraryError::LibraryError(format!( + "Failed to open current process handle: {}", + e + )) + })? + .into()) +} + +#[cfg(not(any(unix, windows)))] +compile_error!( + "sedona-gdal: current_process_library() is not implemented for this platform. \ + Only Unix and Windows are supported." +); Review Comment: I don't think we need to fail compilation, just fail at runtime if somebody tries to use a function that requires GDAL and didn't configure it any other way. ########## c/sedona-gdal/src/gdal_api.rs: ########## @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::ffi::CStr; +use std::path::PathBuf; + +use libloading::Library; + +use crate::dyn_load; +use crate::errors::{GdalError, GdalInitLibraryError}; +use crate::gdal_dyn_bindgen::SedonaGdalApi; + +/// Invoke a function pointer from the `SedonaGdalApi` struct. +/// +/// # Panics +/// +/// Panics if the function pointer is `None`. This is unreachable in correct usage +/// because all function pointers are guaranteed to be `Some` after successful +/// initialization of [`GdalApi`] via [`GdalApi::try_from_shared_library`] or +/// [`GdalApi::try_from_current_process`], and you cannot obtain a `&GdalApi` +/// without successful initialization. +macro_rules! call_gdal_api { + ($api:expr, $func:ident $(, $arg:expr)*) => { + if let Some(func) = $api.inner.$func { + func($($arg),*) + } else { + panic!("{} function not available", stringify!($func)) + } + }; +} + +// Re-export for use in other modules in this crate +#[allow(unused_imports)] +pub(crate) use call_gdal_api; Review Comment: Can this be removed? ########## c/sedona-gdal/src/register.rs: ########## @@ -0,0 +1,314 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::errors::GdalInitLibraryError; +use crate::gdal_api::GdalApi; +use std::path::PathBuf; +use std::sync::{Mutex, OnceLock}; + +/// Minimum GDAL version required by sedona-gdal. +#[cfg(feature = "gdal-sys")] +const MIN_GDAL_VERSION_MAJOR: i32 = 3; +#[cfg(feature = "gdal-sys")] +const MIN_GDAL_VERSION_MINOR: i32 = 4; + +/// Builder for the global [`GdalApi`]. +/// +/// Provides a way to configure how GDAL is loaded before the first use. +/// Use [`configure_global_gdal_api`] to install a builder, then the actual +/// loading happens lazily on the first call to [`get_global_gdal_api`]. +/// +/// # Examples +/// +/// ```no_run +/// use sedona_gdal::register::{GdalApiBuilder, configure_global_gdal_api}; +/// +/// // Configure with a specific shared library path +/// let builder = GdalApiBuilder::default() +/// .with_shared_library("/usr/lib/libgdal.so".into()); +/// configure_global_gdal_api(builder).unwrap(); +/// ``` +#[derive(Default)] +pub struct GdalApiBuilder { + shared_library: Option<PathBuf>, +} + +impl GdalApiBuilder { + /// Set the path to the GDAL shared library. + /// + /// If unset, GDAL symbols will be resolved from the current process image + /// (equivalent to `dlopen(NULL)`), which requires GDAL to already be linked + /// into the process (e.g. via `gdal-sys`). + /// + /// Note that the path is passed directly to `dlopen()`/`LoadLibrary()`, + /// which takes into account the working directory. As a security measure, + /// applications may wish to verify that the path is absolute. This should + /// not be specified from untrusted input. + pub fn with_shared_library(self, path: PathBuf) -> Self { + Self { + shared_library: Some(path), + } + } + + /// Build a [`GdalApi`] with the configured options. + /// + /// When `shared_library` is set, loads GDAL from that path. Otherwise, + /// resolves symbols from the current process (with an optional + /// compile-time version check when the `gdal-sys` feature is enabled). + pub fn build(&self) -> Result<GdalApi, GdalInitLibraryError> { + if let Some(shared_library) = &self.shared_library { + GdalApi::try_from_shared_library(shared_library.clone()) + } else { + #[cfg(feature = "gdal-sys")] + check_gdal_version()?; + GdalApi::try_from_current_process() + } + } +} + +/// Global builder configuration, protected by a [`Mutex`]. +/// +/// Set via [`configure_global_gdal_api`] before the first call to +/// [`get_global_gdal_api`]. Multiple calls to `configure_global_gdal_api` +/// are allowed as long as the API has not been initialized yet. +/// The same mutex also serves as the initialization guard for +/// [`get_global_gdal_api`], eliminating the need for a separate lock. +static GDAL_API_BUILDER: Mutex<Option<GdalApiBuilder>> = Mutex::new(None); + +static GDAL_API: OnceLock<GdalApi> = OnceLock::new(); + +/// Get a reference to the global GDAL API, initializing it if not already done. +/// +/// On first call, reads the builder set by [`configure_global_gdal_api`] (or uses +/// [`GdalApiBuilder::default()`] if none was configured) and calls its `build()` +/// method to create the [`GdalApi`]. The result is stored in a process-global +/// `OnceLock` and reused for all subsequent calls. +pub fn get_global_gdal_api() -> Result<&'static GdalApi, GdalInitLibraryError> { Review Comment: Probably the `with_xxx()` pattern is the one you want to expose? ```suggestion fn get_global_gdal_api() -> Result<&'static GdalApi, GdalInitLibraryError> { ``` ########## c/sedona-gdal/src/lib.rs: ########## @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// --- FFI layer --- +pub(crate) mod dyn_load; +pub mod gdal_dyn_bindgen; + +// --- Error types --- +pub mod errors; + +// --- Core API --- +pub mod gdal_api; +pub mod register; + +// --- Re-exports for convenient use --- +pub use errors::GdalError; +pub use gdal_api::GdalApi; +pub use register::{ + configure_global_gdal_api, get_global_gdal_api, is_gdal_api_configured, with_global_gdal_api, + GdalApiBuilder, +}; Review Comment: Until we start formalizing public APIs for internal crates I think we can skip public reexports ########## c/sedona-gdal/src/register.rs: ########## @@ -0,0 +1,314 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::errors::GdalInitLibraryError; +use crate::gdal_api::GdalApi; +use std::path::PathBuf; +use std::sync::{Mutex, OnceLock}; + +/// Minimum GDAL version required by sedona-gdal. +#[cfg(feature = "gdal-sys")] +const MIN_GDAL_VERSION_MAJOR: i32 = 3; +#[cfg(feature = "gdal-sys")] +const MIN_GDAL_VERSION_MINOR: i32 = 4; + +/// Builder for the global [`GdalApi`]. +/// +/// Provides a way to configure how GDAL is loaded before the first use. +/// Use [`configure_global_gdal_api`] to install a builder, then the actual +/// loading happens lazily on the first call to [`get_global_gdal_api`]. +/// +/// # Examples +/// +/// ```no_run +/// use sedona_gdal::register::{GdalApiBuilder, configure_global_gdal_api}; +/// +/// // Configure with a specific shared library path +/// let builder = GdalApiBuilder::default() +/// .with_shared_library("/usr/lib/libgdal.so".into()); +/// configure_global_gdal_api(builder).unwrap(); +/// ``` +#[derive(Default)] +pub struct GdalApiBuilder { + shared_library: Option<PathBuf>, +} + +impl GdalApiBuilder { + /// Set the path to the GDAL shared library. + /// + /// If unset, GDAL symbols will be resolved from the current process image + /// (equivalent to `dlopen(NULL)`), which requires GDAL to already be linked + /// into the process (e.g. via `gdal-sys`). + /// + /// Note that the path is passed directly to `dlopen()`/`LoadLibrary()`, + /// which takes into account the working directory. As a security measure, + /// applications may wish to verify that the path is absolute. This should + /// not be specified from untrusted input. + pub fn with_shared_library(self, path: PathBuf) -> Self { + Self { + shared_library: Some(path), + } + } + + /// Build a [`GdalApi`] with the configured options. + /// + /// When `shared_library` is set, loads GDAL from that path. Otherwise, + /// resolves symbols from the current process (with an optional + /// compile-time version check when the `gdal-sys` feature is enabled). + pub fn build(&self) -> Result<GdalApi, GdalInitLibraryError> { + if let Some(shared_library) = &self.shared_library { + GdalApi::try_from_shared_library(shared_library.clone()) + } else { + #[cfg(feature = "gdal-sys")] + check_gdal_version()?; + GdalApi::try_from_current_process() + } + } +} + +/// Global builder configuration, protected by a [`Mutex`]. +/// +/// Set via [`configure_global_gdal_api`] before the first call to +/// [`get_global_gdal_api`]. Multiple calls to `configure_global_gdal_api` +/// are allowed as long as the API has not been initialized yet. +/// The same mutex also serves as the initialization guard for +/// [`get_global_gdal_api`], eliminating the need for a separate lock. +static GDAL_API_BUILDER: Mutex<Option<GdalApiBuilder>> = Mutex::new(None); + +static GDAL_API: OnceLock<GdalApi> = OnceLock::new(); + +/// Get a reference to the global GDAL API, initializing it if not already done. +/// +/// On first call, reads the builder set by [`configure_global_gdal_api`] (or uses +/// [`GdalApiBuilder::default()`] if none was configured) and calls its `build()` +/// method to create the [`GdalApi`]. The result is stored in a process-global +/// `OnceLock` and reused for all subsequent calls. +pub fn get_global_gdal_api() -> Result<&'static GdalApi, GdalInitLibraryError> { + if let Some(api) = GDAL_API.get() { + return Ok(api); + } + + let guard = GDAL_API_BUILDER + .lock() + .map_err(|_| GdalInitLibraryError::Invalid("GDAL API builder lock poisoned".to_string()))?; + + if let Some(api) = GDAL_API.get() { + return Ok(api); + } + + let api = guard + .as_ref() + .unwrap_or(&GdalApiBuilder::default()) + .build()?; + + // Register all GDAL drivers once, immediately after loading symbols. + // This mirrors georust/gdal's `_register_drivers()` pattern where + // `GDALAllRegister` is called via `std::sync::Once` before any driver + // lookup or dataset open. Here the `OnceLock` + `Mutex` already + // guarantees this runs exactly once. + unsafe { + let Some(gdal_all_register) = api.inner.GDALAllRegister else { + return Err(GdalInitLibraryError::LibraryError( + "GDALAllRegister symbol not loaded".to_string(), + )); + }; + gdal_all_register(); + } + + let _ = GDAL_API.set(api); + Ok(GDAL_API.get().expect("GDAL API should be set")) +} + +/// Configure the global GDAL API. +/// +/// Stores the given [`GdalApiBuilder`] for use when the global [`GdalApi`] is +/// first initialized (lazily, on the first call to [`get_global_gdal_api`]). +/// +/// This can be called multiple times before the API is initialized — each call +/// replaces the previous builder. However, once [`get_global_gdal_api`] has been +/// called and the API has been successfully initialized, subsequent configurations +/// are accepted but will have no effect (the `OnceLock` ensures the API is created +/// only once). +/// +/// # Typical usage +/// +/// 1. The application (e.g. sedona-db) calls `configure_global_gdal_api` with its +/// default builder early in startup. +/// 2. User code may call `configure_global_gdal_api` again to override the +/// configuration before the first query that uses GDAL. +/// 3. On the first actual GDAL operation, [`get_global_gdal_api`] reads the +/// builder and initializes the API. +pub fn configure_global_gdal_api(builder: GdalApiBuilder) -> Result<(), GdalInitLibraryError> { + let mut global_builder = GDAL_API_BUILDER.lock().map_err(|_| { + GdalInitLibraryError::Invalid( + "Failed to acquire lock for global GDAL configuration".to_string(), + ) + })?; + global_builder.replace(builder); + Ok(()) +} + +pub fn is_gdal_api_configured() -> bool { + GDAL_API.get().is_some() +} + +pub fn with_global_gdal_api<F, R>(func: F) -> Result<R, GdalInitLibraryError> +where Review Comment: Can we add documentation to these since they are the access points? ########## c/sedona-gdal/src/errors.rs: ########## @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Ported (and contains copied code) from georust/gdal: +//! <https://github.com/georust/gdal/blob/v0.19.0/src/errors.rs>. +//! Original code is licensed under MIT. + +use std::ffi::NulError; +use std::num::TryFromIntError; + +use thiserror::Error; + +/// Error type for the sedona-gdal crate initialization and library loading. +#[derive(Error, Debug)] +pub enum GdalInitLibraryError { + #[error("{0}")] + Invalid(String), + #[error("{0}")] + LibraryError(String), +} + +/// Error type compatible with the georust/gdal error variants used in this codebase. +#[derive(Clone, Debug, Error)] +pub enum GdalError { + #[error("CPL error class: '{class:?}', error number: '{number}', error msg: '{msg}'")] + CplError { + class: u32, + number: i32, + msg: String, + }, + + #[error("GDAL method '{method_name}' returned a NULL pointer. Error msg: '{msg}'")] + NullPointer { + method_name: &'static str, + msg: String, + }, + + #[error("Bad argument: {0}")] + BadArgument(String), + + #[error("OGR method '{method_name}' returned error: '{err:?}'")] + OgrError { err: i32, method_name: &'static str }, + + #[error("Unable to unlink mem file: {file_name}")] + UnlinkMemFile { file_name: String }, + + #[error("FFI NUL error: {0}")] + FfiNulError(#[from] NulError), + + #[error("FfiIntoStringError")] + FfiIntoStringError(#[from] std::ffi::IntoStringError), + + #[error("StrUtf8Error")] + StrUtf8Error(#[from] std::str::Utf8Error), + + #[error(transparent)] + IntConversionError(#[from] TryFromIntError), + + #[error("Buffer length {0} does not match raster size {1:?}")] + BufferSizeMismatch(usize, (usize, usize)), +} Review Comment: I see a number of places where these aren't used consistently (e.g., checking the library version does not use any of null pointer, strutf8, intostring or FfiNul). We should probably use them consistently everywhere or more coarsely split them up (I would personally just use fewer error variants until some use case arises where you need to treat them differently). ########## .github/workflows/rust.yml: ########## @@ -128,7 +128,7 @@ jobs: - name: Install dependencies shell: bash run: | - sudo apt-get update && sudo apt-get install -y libgeos-dev + sudo apt-get update && sudo apt-get install -y libgeos-dev libgdal-dev Review Comment: We should also update the contributor guide because I think the general dev setup will now fail without a system GDAL install. ########## c/sedona-gdal/src/errors.rs: ########## @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Ported (and contains copied code) from georust/gdal: +//! <https://github.com/georust/gdal/blob/v0.19.0/src/errors.rs>. +//! Original code is licensed under MIT. + +use std::ffi::NulError; +use std::num::TryFromIntError; + +use thiserror::Error; + +/// Error type for the sedona-gdal crate initialization and library loading. +#[derive(Error, Debug)] +pub enum GdalInitLibraryError { + #[error("{0}")] + Invalid(String), + #[error("{0}")] + LibraryError(String), +} + +/// Error type compatible with the georust/gdal error variants used in this codebase. +#[derive(Clone, Debug, Error)] +pub enum GdalError { + #[error("CPL error class: '{class:?}', error number: '{number}', error msg: '{msg}'")] + CplError { + class: u32, + number: i32, + msg: String, + }, + + #[error("GDAL method '{method_name}' returned a NULL pointer. Error msg: '{msg}'")] + NullPointer { + method_name: &'static str, + msg: String, + }, + + #[error("Bad argument: {0}")] + BadArgument(String), + + #[error("OGR method '{method_name}' returned error: '{err:?}'")] + OgrError { err: i32, method_name: &'static str }, + + #[error("Unable to unlink mem file: {file_name}")] + UnlinkMemFile { file_name: String }, + + #[error("FFI NUL error: {0}")] + FfiNulError(#[from] NulError), + + #[error("FfiIntoStringError")] + FfiIntoStringError(#[from] std::ffi::IntoStringError), + + #[error("StrUtf8Error")] + StrUtf8Error(#[from] std::str::Utf8Error), + + #[error(transparent)] + IntConversionError(#[from] TryFromIntError), + + #[error("Buffer length {0} does not match raster size {1:?}")] + BufferSizeMismatch(usize, (usize, usize)), +} + +pub type Result<T> = std::result::Result<T, GdalError>; Review Comment: ```suggestion ``` We don't do this elsewhere ########## c/sedona-gdal/src/dyn_load.rs: ########## @@ -0,0 +1,318 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::path::Path; + +use libloading::Library; + +use crate::errors::GdalInitLibraryError; +use crate::gdal_dyn_bindgen::SedonaGdalApi; + +/// Load a single symbol from the library and write it into the given field. +/// +/// We load as a raw `*const ()` pointer and transmute to the target function pointer +/// type. This is the standard pattern for dynamic symbol loading where the loaded +/// symbol's signature is known but cannot be expressed as a generic parameter to +/// `Library::get` (because each field has a different signature). +/// +/// On failure returns a `GdalInitLibraryError` with the symbol name and the +/// underlying OS error message. +macro_rules! load_fn { + ($lib:expr, $api:expr, $name:ident) => { + // The target types here are too verbose to annotate for each call site + #[allow(clippy::missing_transmute_annotations)] + { + $api.$name = Some(unsafe { + let sym = $lib + .get::<*const ()>(concat!(stringify!($name), "\0").as_bytes()) + .map_err(|e| { + GdalInitLibraryError::LibraryError(format!( + "Failed to load symbol {}: {}", + stringify!($name), + e + )) + })?; + std::mem::transmute(sym.into_raw().into_raw()) + }); + } + }; +} + +/// Try to load a symbol under one of several names (e.g. for C++ mangled symbols). +/// Writes the first successful match into `$api.$field`. Returns an error only if +/// *none* of the names resolve. +macro_rules! load_fn_any { + ($lib:expr, $api:expr, $field:ident, [$($name:expr),+ $(,)?]) => {{ + let mut found = false; + $( + if !found { + if let Ok(sym) = unsafe { $lib.get::<*const ()>($name) } { + // The target types here are too verbose to annotate for each call site + #[allow(clippy::missing_transmute_annotations)] + { + $api.$field = Some(unsafe { std::mem::transmute(sym.into_raw().into_raw()) }); + } + found = true; + } + } + )+ + if !found { + return Err(GdalInitLibraryError::LibraryError(format!( + "Failed to resolve {} under any known mangled name", + stringify!($field), + ))); + } + }}; +} + +/// Populate all function-pointer fields of [`SedonaGdalApi`] from the given +/// [`Library`] handle. +fn load_all_symbols(lib: &Library, api: &mut SedonaGdalApi) -> Result<(), GdalInitLibraryError> { + // --- Dataset --- + load_fn!(lib, api, GDALOpenEx); + load_fn!(lib, api, GDALClose); + load_fn!(lib, api, GDALGetRasterXSize); + load_fn!(lib, api, GDALGetRasterYSize); + load_fn!(lib, api, GDALGetRasterCount); + load_fn!(lib, api, GDALGetRasterBand); + load_fn!(lib, api, GDALGetGeoTransform); + load_fn!(lib, api, GDALSetGeoTransform); + load_fn!(lib, api, GDALGetProjectionRef); + load_fn!(lib, api, GDALSetProjection); + load_fn!(lib, api, GDALGetSpatialRef); + load_fn!(lib, api, GDALSetSpatialRef); + load_fn!(lib, api, GDALCreateCopy); + load_fn!(lib, api, GDALDatasetCreateLayer); + + // --- Driver --- + load_fn!(lib, api, GDALAllRegister); + load_fn!(lib, api, GDALGetDriverByName); + load_fn!(lib, api, GDALCreate); + + // --- Band --- + load_fn!(lib, api, GDALAddBand); + load_fn!(lib, api, GDALRasterIO); + load_fn!(lib, api, GDALRasterIOEx); + load_fn!(lib, api, GDALGetRasterDataType); + load_fn!(lib, api, GDALGetRasterBandXSize); + load_fn!(lib, api, GDALGetRasterBandYSize); + load_fn!(lib, api, GDALGetBlockSize); + load_fn!(lib, api, GDALGetRasterNoDataValue); + load_fn!(lib, api, GDALSetRasterNoDataValue); + load_fn!(lib, api, GDALDeleteRasterNoDataValue); + load_fn!(lib, api, GDALSetRasterNoDataValueAsUInt64); + load_fn!(lib, api, GDALSetRasterNoDataValueAsInt64); + + // --- SpatialRef --- + load_fn!(lib, api, OSRNewSpatialReference); + load_fn!(lib, api, OSRDestroySpatialReference); + load_fn!(lib, api, OSRExportToPROJJSON); + load_fn!(lib, api, OSRClone); + load_fn!(lib, api, OSRRelease); + + // --- Geometry --- + load_fn!(lib, api, OGR_G_CreateFromWkb); + load_fn!(lib, api, OGR_G_CreateFromWkt); + load_fn!(lib, api, OGR_G_ExportToIsoWkb); + load_fn!(lib, api, OGR_G_WkbSize); + load_fn!(lib, api, OGR_G_GetEnvelope); + load_fn!(lib, api, OGR_G_DestroyGeometry); + + // --- Vector / Layer --- + load_fn!(lib, api, OGR_L_ResetReading); + load_fn!(lib, api, OGR_L_GetNextFeature); + load_fn!(lib, api, OGR_L_CreateField); + load_fn!(lib, api, OGR_L_GetFeatureCount); + load_fn!(lib, api, OGR_F_GetGeometryRef); + load_fn!(lib, api, OGR_F_GetFieldIndex); + load_fn!(lib, api, OGR_F_GetFieldAsDouble); + load_fn!(lib, api, OGR_F_GetFieldAsInteger); + load_fn!(lib, api, OGR_F_IsFieldSetAndNotNull); + load_fn!(lib, api, OGR_F_Destroy); + load_fn!(lib, api, OGR_Fld_Create); + load_fn!(lib, api, OGR_Fld_Destroy); + + // --- VSI --- + load_fn!(lib, api, VSIFileFromMemBuffer); + load_fn!(lib, api, VSIFCloseL); + load_fn!(lib, api, VSIUnlink); + load_fn!(lib, api, VSIGetMemFileBuffer); + load_fn!(lib, api, VSIFree); + load_fn!(lib, api, VSIMalloc); + + // --- VRT --- + load_fn!(lib, api, VRTCreate); + load_fn!(lib, api, VRTAddSimpleSource); + + // --- Rasterize / Polygonize --- + load_fn!(lib, api, GDALRasterizeGeometries); + load_fn!(lib, api, GDALFPolygonize); + load_fn!(lib, api, GDALPolygonize); + + // --- Config --- + load_fn!(lib, api, CPLSetThreadLocalConfigOption); + + // --- Error --- + load_fn!(lib, api, CPLGetLastErrorNo); + load_fn!(lib, api, CPLGetLastErrorMsg); + load_fn!(lib, api, CPLErrorReset); + + // --- Data Type --- + load_fn!(lib, api, GDALGetDataTypeSizeBytes); + + // --- C++ API: MEMDataset::Create (resolved via mangled symbol names) --- + // The symbol is mangled differently on Linux, macOS, and MSVC, and the + // `char**` vs `const char**` parameter also affects the mangling. + load_fn_any!( + lib, + api, + MEMDatasetCreate, + [ + // Linux (char**) + b"_ZN10MEMDataset6CreateEPKciii12GDALDataTypePPc\0", + // macOS (char**) + b"__ZN10MEMDataset6CreateEPKciii12GDALDataTypePPc\0", + // Linux (const char**) + b"_ZN10MEMDataset6CreateEPKciii12GDALDataTypePPKc\0", + // macOS (const char**) + b"__ZN10MEMDataset6CreateEPKciii12GDALDataTypePPKc\0", + // MSVC (char**) + b"?Create@MEMDataset@@SAPEAV1@PEBDHHH4GDALDataType@@PEAPEAD@Z\0", + // MSVC (const char**) + b"?Create@MEMDataset@@SAPEAV1@PEBDHHH4GDALDataType@@PEAPEBAD@Z\0", + ] Review Comment: We chatted a bit offline about how this is kind of a hack but one that we're going to move forward with because dynamically loading GDAL is so useful (and `MEMDatasetCreate()` is essential for performance because of a global GDAL locking issue). Is the difference between Linux and MacOS actually GCC vs clang or is this actually MacOS vs Linux? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
