This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 763bdffa fix(rust/sedona-geoparquet): Ensure that GeoParquet files are
always written with PROJJSON CRSes (#669)
763bdffa is described below
commit 763bdffa6703df23255f7b65ca04811e7cf91d71
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Feb 27 20:08:34 2026 -0600
fix(rust/sedona-geoparquet): Ensure that GeoParquet files are always
written with PROJJSON CRSes (#669)
Co-authored-by: Copilot <[email protected]>
---
c/sedona-proj/src/lib.rs | 1 +
c/sedona-proj/src/proj.rs | 78 +++++++++---------
c/sedona-proj/src/proj_dyn.c | 1 +
c/sedona-proj/src/proj_dyn.h | 2 +
c/sedona-proj/src/proj_dyn_bindgen.rs | 9 +-
c/sedona-proj/src/{register.rs => provider.rs} | 31 +++++--
c/sedona-proj/src/register.rs | 2 +-
c/sedona-proj/src/sd_order_lnglat.rs | 2 +-
c/sedona-proj/src/st_transform.rs | 84 ++-----------------
c/sedona-proj/src/transform.rs | 110 ++++++++++++++++++++++++-
python/sedonadb/tests/io/test_parquet.py | 16 ++++
python/sedonadb/tests/test_context.py | 6 +-
rust/sedona-common/src/option.rs | 97 +++++++++++++++++++++-
rust/sedona-geometry/src/transform.rs | 7 ++
rust/sedona-geoparquet/src/writer.rs | 26 +++++-
rust/sedona/src/context.rs | 18 +++-
16 files changed, 352 insertions(+), 138 deletions(-)
diff --git a/c/sedona-proj/src/lib.rs b/c/sedona-proj/src/lib.rs
index 914bb031..6cc48eab 100644
--- a/c/sedona-proj/src/lib.rs
+++ b/c/sedona-proj/src/lib.rs
@@ -17,6 +17,7 @@
pub mod error;
mod proj;
mod proj_dyn_bindgen;
+pub mod provider;
pub mod register;
pub mod sd_order_lnglat;
mod st_transform;
diff --git a/c/sedona-proj/src/proj.rs b/c/sedona-proj/src/proj.rs
index 053260f3..e52eb295 100644
--- a/c/sedona-proj/src/proj.rs
+++ b/c/sedona-proj/src/proj.rs
@@ -300,6 +300,27 @@ impl Proj {
Ok(Self { inner, ctx })
}
+ pub(crate) fn to_projjson(&self) -> Result<String, SedonaProjError> {
+ let inner = unsafe {
+ call_proj_api!(
+ self.ctx.api,
+ proj_as_projjson,
+ self.ctx.inner,
+ self.inner,
+ ptr::null()
+ )
+ };
+
+ if inner.is_null() {
+ return Err(SedonaProjError::Invalid(
+ "proj_as_projjson returned null".to_string(),
+ ));
+ }
+
+ let c_str = unsafe { CStr::from_ptr(inner) };
+ Ok(c_str.to_string_lossy().to_string())
+ }
+
/// Create a transformation between two coordinate reference systems.
///
/// This creates a transformation pipeline that converts coordinates from
@@ -519,12 +540,12 @@ impl ProjApi {
#[cfg(feature = "proj-sys")]
fn from_proj_sys() -> Self {
use proj_sys::{
- proj_area_create, proj_area_destroy, proj_area_set_bbox,
proj_context_create,
- proj_context_destroy, proj_context_errno,
proj_context_errno_string,
- proj_context_set_database_path, proj_context_set_search_paths,
proj_create,
- proj_create_crs_to_crs_from_pj, proj_cs_get_axis_count,
proj_destroy, proj_errno,
- proj_errno_reset, proj_info, proj_log_level,
proj_normalize_for_visualization,
- proj_trans, proj_trans_array,
+ proj_area_create, proj_area_destroy, proj_area_set_bbox,
proj_as_projjson,
+ proj_context_create, proj_context_destroy, proj_context_errno,
+ proj_context_errno_string, proj_context_set_database_path,
+ proj_context_set_search_paths, proj_create,
proj_create_crs_to_crs_from_pj,
+ proj_cs_get_axis_count, proj_destroy, proj_errno,
proj_errno_reset, proj_info,
+ proj_log_level, proj_normalize_for_visualization, proj_trans,
proj_trans_array,
};
let mut inner = proj_dyn_bindgen::ProjApi::default();
@@ -595,6 +616,9 @@ impl ProjApi {
inner.proj_trans_array = Some(std::mem::transmute(
proj_trans_array as unsafe extern "C" fn(*mut _, _, usize,
*mut _) -> _,
));
+ inner.proj_as_projjson = Some(std::mem::transmute(
+ proj_as_projjson as unsafe extern "C" fn(_, _, _) -> _,
+ ));
}
Self {
@@ -604,42 +628,22 @@ impl ProjApi {
}
}
-// We don't have control over this generated source, so we can't derive the
implementation
-#[allow(clippy::derivable_impls)]
-impl Default for proj_dyn_bindgen::ProjApi {
- fn default() -> Self {
- Self {
- proj_area_create: Default::default(),
- proj_area_destroy: Default::default(),
- proj_area_set_bbox: Default::default(),
- proj_context_create: Default::default(),
- proj_context_destroy: Default::default(),
- proj_context_errno_string: Default::default(),
- proj_context_errno: Default::default(),
- proj_context_set_database_path: Default::default(),
- proj_context_set_search_paths: Default::default(),
- proj_create_crs_to_crs_from_pj: Default::default(),
- proj_create: Default::default(),
- proj_cs_get_axis_count: Default::default(),
- proj_destroy: Default::default(),
- proj_errno_reset: Default::default(),
- proj_errno: Default::default(),
- proj_info: Default::default(),
- proj_log_level: Default::default(),
- proj_normalize_for_visualization: Default::default(),
- proj_trans: Default::default(),
- proj_trans_array: Default::default(),
- release: Default::default(),
- private_data: ptr::null_mut(),
- }
- }
-}
-
#[cfg(test)]
mod test {
use super::*;
use approx::assert_relative_eq;
+ #[test]
+ fn test_crs_to_projjson() {
+ let ctx = Rc::new(ProjContext::try_from_proj_sys().unwrap());
+ let proj = Proj::try_new(ctx.clone(), "EPSG:3857").unwrap();
+ let projjson = proj.to_projjson().unwrap();
+ assert!(
+ projjson.starts_with("{"),
+ "Unexpected PROJJSON output: {projjson}"
+ );
+ }
+
/// Test conversion from NAD83 US Survey Feet (EPSG 2230) to NAD83 Metres
(EPSG 26946)
#[test]
fn test_crs_to_crs_conversion() {
diff --git a/c/sedona-proj/src/proj_dyn.c b/c/sedona-proj/src/proj_dyn.c
index 995792ea..69c54217 100644
--- a/c/sedona-proj/src/proj_dyn.c
+++ b/c/sedona-proj/src/proj_dyn.c
@@ -112,6 +112,7 @@ static int load_proj_from_handle(struct ProjApi* api, void*
handle, char* err_ms
LOAD_PROJ_FUNCTION(api, proj_normalize_for_visualization);
LOAD_PROJ_FUNCTION(api, proj_trans);
LOAD_PROJ_FUNCTION(api, proj_trans_array);
+ LOAD_PROJ_FUNCTION(api, proj_as_projjson);
api->release = &proj_dyn_release_api;
api->private_data = handle;
diff --git a/c/sedona-proj/src/proj_dyn.h b/c/sedona-proj/src/proj_dyn.h
index 377bc9f2..81e80503 100644
--- a/c/sedona-proj/src/proj_dyn.h
+++ b/c/sedona-proj/src/proj_dyn.h
@@ -85,6 +85,8 @@ struct ProjApi {
PJ* (*proj_normalize_for_visualization)(PJ_CONTEXT* ctx, const PJ* obj);
PJ_COORD (*proj_trans)(PJ* P, PJ_DIRECTION direction, PJ_COORD coord);
PJ_COORD (*proj_trans_array)(PJ* P, PJ_DIRECTION direction, size_t n,
PJ_COORD* coord);
+ const char* (*proj_as_projjson)(PJ_CONTEXT* ctx, const PJ* obj,
+ const char* const* options);
void (*release)(struct ProjApi*);
void* private_data;
};
diff --git a/c/sedona-proj/src/proj_dyn_bindgen.rs
b/c/sedona-proj/src/proj_dyn_bindgen.rs
index d9ede16f..55ea2d9a 100644
--- a/c/sedona-proj/src/proj_dyn_bindgen.rs
+++ b/c/sedona-proj/src/proj_dyn_bindgen.rs
@@ -84,7 +84,7 @@ pub struct PJ_INFO {
}
#[repr(C)]
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, Default)]
pub struct ProjApi {
pub proj_area_create: Option<unsafe extern "C" fn() -> *mut PJ_AREA>,
pub proj_area_destroy: Option<unsafe extern "C" fn(area: *mut PJ_AREA)>,
@@ -145,6 +145,13 @@ pub struct ProjApi {
coord: *mut PJ_COORD,
) -> PJ_COORD,
>,
+ pub proj_as_projjson: Option<
+ unsafe extern "C" fn(
+ ctx: *mut PJ_CONTEXT,
+ obj: *const PJ,
+ options: *const *const c_char,
+ ) -> *const c_char,
+ >,
pub release: Option<unsafe extern "C" fn(arg1: *mut ProjApi)>,
pub private_data: *mut c_void,
}
diff --git a/c/sedona-proj/src/register.rs b/c/sedona-proj/src/provider.rs
similarity index 55%
copy from c/sedona-proj/src/register.rs
copy to c/sedona-proj/src/provider.rs
index 5c3951db..d2833da8 100644
--- a/c/sedona-proj/src/register.rs
+++ b/c/sedona-proj/src/provider.rs
@@ -14,18 +14,31 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-use sedona_expr::aggregate_udf::SedonaAccumulatorRef;
-use sedona_expr::scalar_udf::ScalarKernelRef;
-use crate::st_transform::st_transform_impl;
+use sedona_common::CrsProvider;
-pub use crate::st_transform::configure_global_proj_engine;
-pub use crate::transform::ProjCrsEngineBuilder;
+use crate::transform::with_global_proj_engine;
-pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
- vec![("st_transform", st_transform_impl())]
+#[derive(Debug, Default)]
+pub struct ProjCrsProvider {}
+
+impl CrsProvider for ProjCrsProvider {
+ fn to_projjson(&self, crs_string: &str) ->
datafusion_common::Result<String> {
+ with_global_proj_engine(|e| e.engine().to_projjson(crs_string))
+ }
}
-pub fn aggregate_kernels() -> Vec<(&'static str, SedonaAccumulatorRef)> {
- vec![]
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn proj_crs_provider() {
+ let provider = ProjCrsProvider {};
+ let projjson = provider.to_projjson("EPSG:3857").unwrap();
+ assert!(
+ projjson.starts_with("{"),
+ "Unexpected PROJJSON output: {projjson}"
+ );
+ }
}
diff --git a/c/sedona-proj/src/register.rs b/c/sedona-proj/src/register.rs
index 5c3951db..11fa0ea9 100644
--- a/c/sedona-proj/src/register.rs
+++ b/c/sedona-proj/src/register.rs
@@ -19,7 +19,7 @@ use sedona_expr::scalar_udf::ScalarKernelRef;
use crate::st_transform::st_transform_impl;
-pub use crate::st_transform::configure_global_proj_engine;
+pub use crate::transform::configure_global_proj_engine;
pub use crate::transform::ProjCrsEngineBuilder;
pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
diff --git a/c/sedona-proj/src/sd_order_lnglat.rs
b/c/sedona-proj/src/sd_order_lnglat.rs
index 9d95de36..09089d5a 100644
--- a/c/sedona-proj/src/sd_order_lnglat.rs
+++ b/c/sedona-proj/src/sd_order_lnglat.rs
@@ -26,7 +26,7 @@ use sedona_functions::executor::WkbBytesExecutor;
use sedona_geometry::{transform::CrsEngine, wkb_header::WkbHeader};
use sedona_schema::{crs::lnglat, datatypes::SedonaType, matchers::ArgMatcher};
-use crate::st_transform::with_global_proj_engine;
+use crate::transform::with_global_proj_engine;
/// Generic scalar kernel for sd_order based on the first coordinate
/// of a geometry projected to lon/lat
diff --git a/c/sedona-proj/src/st_transform.rs
b/c/sedona-proj/src/st_transform.rs
index efeb3b02..9128485c 100644
--- a/c/sedona-proj/src/st_transform.rs
+++ b/c/sedona-proj/src/st_transform.rs
@@ -14,7 +14,7 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-use crate::transform::{ProjCrsEngine, ProjCrsEngineBuilder};
+
use arrow_array::builder::{BinaryBuilder, StringViewBuilder};
use arrow_array::ArrayRef;
use arrow_schema::DataType;
@@ -22,21 +22,22 @@ use datafusion_common::cast::{as_string_view_array,
as_struct_array};
use datafusion_common::config::ConfigOptions;
use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
use datafusion_expr::ColumnarValue;
-use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err};
+use sedona_common::sedona_internal_err;
use sedona_expr::item_crs::make_item_crs;
use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
use sedona_functions::executor::WkbExecutor;
-use sedona_geometry::transform::{transform, CachingCrsEngine, CrsEngine,
CrsTransform};
+use sedona_geometry::transform::{transform, CrsEngine, CrsTransform};
use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
use sedona_schema::crs::{deserialize_crs, Crs};
use sedona_schema::datatypes::{Edges, SedonaType, WKB_GEOMETRY,
WKB_GEOMETRY_ITEM_CRS};
use sedona_schema::matchers::ArgMatcher;
-use std::cell::OnceCell;
use std::io::Write;
use std::iter::zip;
-use std::sync::{Arc, RwLock};
+use std::sync::Arc;
use wkb::reader::Wkb;
+use crate::transform::with_global_proj_engine;
+
/// ST_Transform() implementation using the proj crate
pub fn st_transform_impl() -> ScalarKernelRef {
Arc::new(STTransform {})
@@ -364,79 +365,6 @@ impl<'a> ArgInput<'a> {
}
}
-/// Configure the global PROJ engine
-///
-/// Provides an opportunity for a calling application to provide the
-/// [ProjCrsEngineBuilder] whose `build()` method will be used to create
-/// a set of thread local [CrsEngine]s which in turn will perform the actual
-/// computations. This provides an opportunity to configure locations of
-/// various files in addition to network CDN access preferences.
-///
-/// This configuration can be set more than once; however, once the engines
-/// are constructed they cannot currently be reconfigured. This code is
structured
-/// deliberately to ensure that if an error occurs creating an engine that the
-/// configuration can be set again. Notably, this will occur if this crate was
-/// built without proj-sys the first time somebody calls st_transform.
-pub fn configure_global_proj_engine(builder: ProjCrsEngineBuilder) ->
Result<()> {
- let mut global_builder = PROJ_ENGINE_BUILDER.try_write().map_err(|_| {
- DataFusionError::Configuration(
- "Failed to acquire write lock for global PROJ
configuration".to_string(),
- )
- })?;
- global_builder.replace(builder);
- Ok(())
-}
-
-/// Do something with the global thread-local PROJ engine, creating it if it
has not
-/// already been created.
-pub(crate) fn with_global_proj_engine<
- R,
- F: FnMut(&CachingCrsEngine<ProjCrsEngine>) -> Result<R>,
->(
- mut func: F,
-) -> Result<R> {
- PROJ_ENGINE.with(|engine_cell| {
- // If there is already an engine, use it!
- if let Some(engine) = engine_cell.get() {
- return func(engine);
- }
-
- // Otherwise, attempt to get the builder
- let maybe_builder = PROJ_ENGINE_BUILDER.read().map_err(|_| {
- // Highly unlikely (can only occur when a panic occurred during
set)
- sedona_internal_datafusion_err!(
- "Failed to acquire read lock for global PROJ configuration"
- )
- })?;
-
- // ...and build the engine. This will use a default configuration
- // (i.e., proj_sys or error) if the builder was never set.
- let proj_engine = maybe_builder
- .as_ref()
- .unwrap_or(&ProjCrsEngineBuilder::default())
- .build()
- .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
- engine_cell
- .set(CachingCrsEngine::new(proj_engine))
- .map_err(|_| sedona_internal_datafusion_err!("Failed to set cached
PROJ transform"))?;
- func(engine_cell.get().unwrap())
- })
-}
-
-/// Global builder as a thread-safe RwLock. Normally set once on application
start
-/// or never set to use all default settings.
-static PROJ_ENGINE_BUILDER: RwLock<Option<ProjCrsEngineBuilder>> =
- RwLock::<Option<ProjCrsEngineBuilder>>::new(None);
-
-// CrsTransform backed by PROJ is not thread safe, so we define the cache as
thread-local
-// to avoid race conditions.
-thread_local! {
- static PROJ_ENGINE: OnceCell<CachingCrsEngine<ProjCrsEngine>> = const {
- OnceCell::<CachingCrsEngine<ProjCrsEngine>>::new()
- };
-}
-
#[cfg(test)]
mod tests {
use super::*;
diff --git a/c/sedona-proj/src/transform.rs b/c/sedona-proj/src/transform.rs
index 09de1209..1790a51a 100644
--- a/c/sedona-proj/src/transform.rs
+++ b/c/sedona-proj/src/transform.rs
@@ -14,16 +14,20 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
+
use crate::error::SedonaProjError;
use crate::proj::{Proj, ProjContext};
+use datafusion_common::{exec_datafusion_err, DataFusionError, Result};
use geo_traits::Dimensions;
+use sedona_common::sedona_internal_datafusion_err;
use sedona_geometry::bounding_box::BoundingBox;
use sedona_geometry::error::SedonaGeometryError;
use sedona_geometry::interval::IntervalTrait;
-use sedona_geometry::transform::{CrsEngine, CrsTransform};
-use std::cell::RefCell;
+use sedona_geometry::transform::{CachingCrsEngine, CrsEngine, CrsTransform};
+use std::cell::{OnceCell, RefCell};
use std::path::PathBuf;
use std::rc::Rc;
+use std::sync::RwLock;
/// Builder for a [ProjCrsEngine]
///
@@ -133,6 +137,79 @@ impl ProjCrsEngineBuilder {
}
}
+/// Configure the global PROJ engine
+///
+/// Provides an opportunity for a calling application to provide the
+/// [ProjCrsEngineBuilder] whose `build()` method will be used to create
+/// a set of thread local [CrsEngine]s which in turn will perform the actual
+/// computations. This provides an opportunity to configure locations of
+/// various files in addition to network CDN access preferences.
+///
+/// This configuration can be set more than once; however, once the engines
+/// are constructed they cannot currently be reconfigured. This code is
structured
+/// deliberately to ensure that if an error occurs creating an engine that the
+/// configuration can be set again. Notably, this will occur if this crate was
+/// built without proj-sys the first time somebody calls st_transform.
+pub fn configure_global_proj_engine(builder: ProjCrsEngineBuilder) ->
Result<()> {
+ let mut global_builder = PROJ_ENGINE_BUILDER.try_write().map_err(|_| {
+ DataFusionError::Configuration(
+ "Failed to acquire write lock for global PROJ
configuration".to_string(),
+ )
+ })?;
+ global_builder.replace(builder);
+ Ok(())
+}
+
+/// Do something with the global thread-local PROJ engine, creating it if it
has not
+/// already been created.
+pub(crate) fn with_global_proj_engine<
+ R,
+ F: FnMut(&CachingCrsEngine<ProjCrsEngine>) -> Result<R>,
+>(
+ mut func: F,
+) -> Result<R> {
+ PROJ_ENGINE.with(|engine_cell| {
+ // If there is already an engine, use it!
+ if let Some(engine) = engine_cell.get() {
+ return func(engine);
+ }
+
+ // Otherwise, attempt to get the builder
+ let maybe_builder = PROJ_ENGINE_BUILDER.read().map_err(|_| {
+ // Highly unlikely (can only occur when a panic occurred during
set)
+ sedona_internal_datafusion_err!(
+ "Failed to acquire read lock for global PROJ configuration"
+ )
+ })?;
+
+ // ...and build the engine. This will use a default configuration
+ // (i.e., proj_sys or error) if the builder was never set.
+ let proj_engine = maybe_builder
+ .as_ref()
+ .unwrap_or(&ProjCrsEngineBuilder::default())
+ .build()
+ .map_err(|e| DataFusionError::External(Box::new(e)))?;
+
+ engine_cell
+ .set(CachingCrsEngine::new(proj_engine))
+ .map_err(|_| sedona_internal_datafusion_err!("Failed to set cached
PROJ transform"))?;
+ func(engine_cell.get().unwrap())
+ })
+}
+
+/// Global builder as a thread-safe RwLock. Normally set once on application
start
+/// or never set to use all default settings.
+static PROJ_ENGINE_BUILDER: RwLock<Option<ProjCrsEngineBuilder>> =
+ RwLock::<Option<ProjCrsEngineBuilder>>::new(None);
+
+// CrsTransform backed by PROJ is not thread safe, so we define the cache as
thread-local
+// to avoid race conditions.
+thread_local! {
+ static PROJ_ENGINE: OnceCell<CachingCrsEngine<ProjCrsEngine>> = const {
+ OnceCell::<CachingCrsEngine<ProjCrsEngine>>::new()
+ };
+}
+
/// A [CrsEngine] implemented using PROJ
///
/// Use the [ProjCrsEngineBuilder] to create this object.
@@ -141,6 +218,19 @@ pub struct ProjCrsEngine {
ctx: Rc<ProjContext>,
}
+impl ProjCrsEngine {
+ /// Resolve the CRS represented by this object to a PROJJSON string
+ pub fn to_projjson(&self, crs_string: &str) -> Result<String> {
+ let source_crs = Proj::try_new(self.ctx.clone(),
crs_string).map_err(|e| {
+ exec_datafusion_err!("Failed to create CRS from source
'{crs_string}': {e}")
+ })?;
+
+ source_crs
+ .to_projjson()
+ .map_err(|e| exec_datafusion_err!("Failed to export '{crs_string}'
as PROJJSON: {e}"))
+ }
+}
+
impl CrsEngine for ProjCrsEngine {
fn get_transform_crs_to_crs(
&self,
@@ -256,6 +346,22 @@ mod test {
use sedona_geometry::transform::transform;
use wkb::reader::read_wkb;
+ #[test]
+ fn proj_as_projjson() {
+ let engine = ProjCrsEngineBuilder::default().build().unwrap();
+ let projjson = engine.to_projjson("EPSG:3857").unwrap();
+ assert!(
+ projjson.starts_with("{"),
+ "Unexpected PROJJSON output: {projjson}"
+ );
+
+ let err = engine.to_projjson("gazornenplat").unwrap_err();
+ assert_eq!(
+ err.message(),
+ "Failed to create CRS from source 'gazornenplat': Invalid PROJ
string syntax"
+ );
+ }
+
#[test]
fn proj_crs_to_crs() {
let engine = ProjCrsEngineBuilder::default().build().unwrap();
diff --git a/python/sedonadb/tests/io/test_parquet.py
b/python/sedonadb/tests/io/test_parquet.py
index ccdbcd40..33300ac3 100644
--- a/python/sedonadb/tests/io/test_parquet.py
+++ b/python/sedonadb/tests/io/test_parquet.py
@@ -440,6 +440,22 @@ def test_write_geoparquet_1_1(con, geoarrow_data):
assert "bbox" in df_roundtrip.columns
+def test_write_geoparquet_ensure_projjson_crs(con):
+ df = con.sql("SELECT ST_Point(1, 2, 'EPSG:3857') AS geometry")
+
+ with tempfile.TemporaryDirectory() as td:
+ tmp_parquet = Path(td) / "tmp.parquet"
+ df.to_parquet(tmp_parquet)
+
+ file_kv_metadata = parquet.ParquetFile(tmp_parquet).metadata.metadata
+ assert b"geo" in file_kv_metadata
+ geo_metadata = json.loads(file_kv_metadata[b"geo"])
+ crs = geo_metadata["columns"]["geometry"]["crs"]
+ assert crs != "EPSG:3857"
+ assert crs["id"]["authority"] == "EPSG"
+ assert crs["id"]["code"] == 3857
+
+
def test_write_geoparquet_unknown(con):
with pytest.raises(SedonaError, match="Unexpected GeoParquet version
string"):
con.sql("SELECT 1 as one").to_parquet(
diff --git a/python/sedonadb/tests/test_context.py
b/python/sedonadb/tests/test_context.py
index 6b876eed..be370619 100644
--- a/python/sedonadb/tests/test_context.py
+++ b/python/sedonadb/tests/test_context.py
@@ -173,7 +173,7 @@ def test_read_parquet_geometry_columns_roundtrip(con,
tmp_path):
geom_meta = _geom_column_metadata(out_geo2)
assert geom_meta["encoding"] == "WKB"
- assert geom_meta["crs"] == "EPSG:3857"
+ assert geom_meta["crs"]["id"] == {"authority": "EPSG", "code": 3857}
# Test 5: overriding with a different CRS replaces the previous value.
geometry_columns = json.dumps({"geom": {"encoding": "WKB", "crs":
"EPSG:4326"}})
@@ -212,7 +212,7 @@ def test_read_parquet_geometry_columns_roundtrip(con,
tmp_path):
df.to_parquet(out_geo_multi)
geom_meta = _geom_column_metadata(out_geo_multi)
assert geom_meta["encoding"] == "WKB"
- assert geom_meta["crs"] == "EPSG:3857"
+ assert geom_meta["crs"]["id"] == {"authority": "EPSG", "code": 3857}
assert geom_meta["edges"] == "spherical"
# Test 8: specify a non-existent column raises error
@@ -261,7 +261,7 @@ def
test_read_parquet_geometry_columns_multiple_columns(con, tmp_path):
geom1_meta = _geom_column_metadata(out_geo2, "geom1")
geom2_meta = _geom_column_metadata(out_geo2, "geom2")
assert geom1_meta["encoding"] == "WKB"
- assert geom1_meta["crs"] == "EPSG:3857"
+ assert geom1_meta["crs"]["id"] == {"authority": "EPSG", "code": 3857}
assert geom2_meta["encoding"] == "WKB"
diff --git a/rust/sedona-common/src/option.rs b/rust/sedona-common/src/option.rs
index 280a4705..21b228d4 100644
--- a/rust/sedona-common/src/option.rs
+++ b/rust/sedona-common/src/option.rs
@@ -15,13 +15,16 @@
// specific language governing permissions and limitations
// under the License.
use std::fmt::Display;
+use std::sync::Arc;
use datafusion::config::{ConfigEntry, ConfigExtension, ConfigField,
ExtensionOptions, Visit};
use datafusion::prelude::SessionConfig;
-use datafusion_common::config_namespace;
use datafusion_common::Result;
+use datafusion_common::{config_err, config_namespace};
use regex::Regex;
+use crate::sedona_internal_err;
+
/// Default minimum number of analyzed geometries for speculative execution
mode to select an
/// optimal execution mode.
pub const DEFAULT_SPECULATIVE_THRESHOLD: usize = 1000;
@@ -39,6 +42,9 @@ config_namespace! {
pub struct SedonaOptions {
/// Options for spatial join
pub spatial_join: SpatialJoinOptions, default =
SpatialJoinOptions::default()
+
+ /// Global [CrsProvider] for CRS metadata operations
+ pub crs_provider: CrsProviderOption, default =
CrsProviderOption::default()
}
}
@@ -405,6 +411,67 @@ impl ConfigField for TgIndexType {
}
}
+/// Trait defining an abstract provider of Coordinate Reference System metadata
+///
+/// Unlike a CrsEngine, which provides concrete coordinate transformations for
+/// pairs of projections, a CrsProvider is handles metadata-only operations.
+/// Currently this is only used to resolve an arbitrary CRS representation to
+/// PROJJSON (e.g., to write valid GeoParquet files from arbitrary CRSes), but
+/// could also be used to validate CRSes.
+pub trait CrsProvider: std::fmt::Debug + Send + Sync {
+ fn to_projjson(&self, crs_string: &str) -> Result<String>;
+}
+
+/// Wrapper class implementing [ConfigField] that allows a [CrsProvider]
+/// member in [SedonaOptions].
+#[derive(Debug, Clone)]
+pub struct CrsProviderOption(Arc<dyn CrsProvider>);
+
+impl CrsProviderOption {
+ /// Create a new option from a [CrsProvider] reference
+ pub fn new(inner: Arc<dyn CrsProvider>) -> Self {
+ CrsProviderOption(inner)
+ }
+
+ /// Convert an arbitrary string to a PROJJSON representation if possible
+ pub fn to_projjson(&self, crs_string: &str) -> Result<String> {
+ self.0.to_projjson(crs_string)
+ }
+}
+
+impl Default for CrsProviderOption {
+ fn default() -> Self {
+ Self(Arc::new(DefaultCrsProvider {}))
+ }
+}
+
+impl PartialEq for CrsProviderOption {
+ fn eq(&self, other: &Self) -> bool {
+ Arc::ptr_eq(&self.0, &other.0)
+ }
+}
+
+impl ConfigField for CrsProviderOption {
+ fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str)
{
+ v.some(key, format!("{:?}", self.0), description);
+ }
+
+ fn set(&mut self, key: &str, _value: &str) -> Result<()> {
+ config_err!("Can't set {key} from SQL")
+ }
+}
+
+#[derive(Debug)]
+struct DefaultCrsProvider {}
+
+impl CrsProvider for DefaultCrsProvider {
+ fn to_projjson(&self, crs_string: &str) -> Result<String> {
+ sedona_internal_err!(
+ "Can't convert {crs_string} to PROJJSON CRS (no CrsProvider
registered)"
+ )
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -523,4 +590,32 @@ mod tests {
assert!(config.set("", "invalid").is_err());
assert!(config.set("", "fixed[10]").is_err());
}
+
+ #[test]
+ fn test_default_crs_provider_returns_error() {
+ let provider = CrsProviderOption::default();
+ let result = provider.to_projjson("EPSG:4326");
+ assert!(result.is_err());
+ let err_msg = result.unwrap_err().to_string();
+ assert!(
+ err_msg.contains("Can't convert EPSG:4326 to PROJJSON CRS"),
+ "Unexpected error message: {err_msg}"
+ );
+ assert!(
+ err_msg.contains("no CrsProvider registered"),
+ "Unexpected error message: {err_msg}"
+ );
+ }
+
+ #[test]
+ fn test_crs_provider_option_set_from_sql_returns_error() {
+ let mut option = CrsProviderOption::default();
+ let result = option.set("sedona.crs_provider", "some_value");
+ assert!(result.is_err());
+ let err_msg = result.unwrap_err().to_string();
+ assert!(
+ err_msg.contains("Can't set sedona.crs_provider from SQL"),
+ "Unexpected error message: {err_msg}"
+ );
+ }
}
diff --git a/rust/sedona-geometry/src/transform.rs
b/rust/sedona-geometry/src/transform.rs
index 2540535e..ba7b5367 100644
--- a/rust/sedona-geometry/src/transform.rs
+++ b/rust/sedona-geometry/src/transform.rs
@@ -152,6 +152,13 @@ pub struct CachingCrsEngine<T: CrsEngine> {
pipeline_cache: RefCell<LruCache<PipelineCacheKey<'static>, Rc<dyn
CrsTransform>>>,
}
+impl<T: CrsEngine> CachingCrsEngine<T> {
+ /// Return a reference to the wrapped engine
+ pub fn engine(&self) -> &T {
+ &self.engine
+ }
+}
+
/// Cache key for CRS to CRS transforms
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
struct CrsToCrsCacheKey<'a> {
diff --git a/rust/sedona-geoparquet/src/writer.rs
b/rust/sedona-geoparquet/src/writer.rs
index 3ac62980..33be505e 100644
--- a/rust/sedona-geoparquet/src/writer.rs
+++ b/rust/sedona-geoparquet/src/writer.rs
@@ -45,7 +45,7 @@ use datafusion_physical_plan::{
use float_next_after::NextAfter;
use futures::StreamExt;
use geo_traits::GeometryTrait;
-use sedona_common::sedona_internal_err;
+use sedona_common::{sedona_internal_err, SedonaOptions};
use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
use sedona_functions::executor::WkbExecutor;
use sedona_geometry::{
@@ -58,6 +58,7 @@ use sedona_schema::{
matchers::ArgMatcher,
schema::SedonaSchema,
};
+use serde_json::Value;
use crate::{
metadata::{GeoParquetColumnMetadata, GeoParquetCovering,
GeoParquetMetadata},
@@ -147,9 +148,28 @@ pub fn create_geoparquet_writer_physical_plan(
if crs == lnglat() {
// Do nothing, lnglat is the meaning of an omitted CRS
} else if let Some(crs) = crs {
- column_metadata.crs = Some(crs.to_json().parse().map_err(|e| {
+ let mut crs_value: Value = crs.to_json().parse().map_err(|e| {
exec_datafusion_err!("Failed to parse CRS for column '{}'
{e}", f.name())
- })?);
+ })?;
+
+ // Ensure crs is PROJJSON to ensure this file is not rejected by
downstream readers
+ if let Value::String(string) = &crs_value {
+ if let Some(sedona_options) =
+ session_config_options.extensions.get::<SedonaOptions>()
+ {
+ let projjson_string =
sedona_options.crs_provider.to_projjson(string)?;
+ crs_value = projjson_string.parse().map_err(|e| {
+ exec_datafusion_err!(
+ "Failed to parse CRS for column '{}' from
CrsProvider {e}",
+ f.name()
+ )
+ })?;
+ } else {
+ return sedona_internal_err!("SedonaOptions not available");
+ }
+ }
+
+ column_metadata.crs = Some(crs_value);
} else {
return exec_err!(
"Can't write GeoParquet from null CRS\nUse ST_SetSRID({}, ...)
to assign it one",
diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs
index 0de88261..1664173a 100644
--- a/rust/sedona/src/context.rs
+++ b/rust/sedona/src/context.rs
@@ -46,7 +46,10 @@ use datafusion_expr::dml::InsertOp;
use datafusion_expr::sqlparser::dialect::{dialect_from_str, Dialect};
use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, SortExpr};
use parking_lot::Mutex;
-use sedona_common::option::add_sedona_option_extension;
+use sedona_common::{
+ option::add_sedona_option_extension, sedona_internal_datafusion_err,
CrsProviderOption,
+ SedonaOptions,
+};
use sedona_datasource::provider::external_listing_table;
use sedona_datasource::spec::ExternalFormatSpec;
use sedona_expr::scalar_udf::IntoScalarKernelRefs;
@@ -103,7 +106,18 @@ impl SedonaContext {
// and perhaps for all of these initializing them optionally from
environment
// variables.
let session_config =
SessionConfig::from_env()?.with_information_schema(true);
- let session_config = add_sedona_option_extension(session_config);
+ let mut session_config = add_sedona_option_extension(session_config);
+
+ // Always register the PROJ CrsProvider by default (if PROJ is not
configured
+ // before it is used an error will be raised).
+ let opts = session_config
+ .options_mut()
+ .extensions
+ .get_mut::<SedonaOptions>()
+ .ok_or_else(|| sedona_internal_datafusion_err!("SedonaOptions not
available"))?;
+ opts.crs_provider =
+
CrsProviderOption::new(Arc::new(sedona_proj::provider::ProjCrsProvider::default()));
+
#[cfg(feature = "pointcloud")]
let session_config = session_config.with_option_extension(
PointcloudOptions::default()