This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 763bdffa fix(rust/sedona-geoparquet): Ensure that GeoParquet files are 
always written with PROJJSON CRSes (#669)
763bdffa is described below

commit 763bdffa6703df23255f7b65ca04811e7cf91d71
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Feb 27 20:08:34 2026 -0600

    fix(rust/sedona-geoparquet): Ensure that GeoParquet files are always 
written with PROJJSON CRSes (#669)
    
    Co-authored-by: Copilot <[email protected]>
---
 c/sedona-proj/src/lib.rs                       |   1 +
 c/sedona-proj/src/proj.rs                      |  78 +++++++++---------
 c/sedona-proj/src/proj_dyn.c                   |   1 +
 c/sedona-proj/src/proj_dyn.h                   |   2 +
 c/sedona-proj/src/proj_dyn_bindgen.rs          |   9 +-
 c/sedona-proj/src/{register.rs => provider.rs} |  31 +++++--
 c/sedona-proj/src/register.rs                  |   2 +-
 c/sedona-proj/src/sd_order_lnglat.rs           |   2 +-
 c/sedona-proj/src/st_transform.rs              |  84 ++-----------------
 c/sedona-proj/src/transform.rs                 | 110 ++++++++++++++++++++++++-
 python/sedonadb/tests/io/test_parquet.py       |  16 ++++
 python/sedonadb/tests/test_context.py          |   6 +-
 rust/sedona-common/src/option.rs               |  97 +++++++++++++++++++++-
 rust/sedona-geometry/src/transform.rs          |   7 ++
 rust/sedona-geoparquet/src/writer.rs           |  26 +++++-
 rust/sedona/src/context.rs                     |  18 +++-
 16 files changed, 352 insertions(+), 138 deletions(-)

diff --git a/c/sedona-proj/src/lib.rs b/c/sedona-proj/src/lib.rs
index 914bb031..6cc48eab 100644
--- a/c/sedona-proj/src/lib.rs
+++ b/c/sedona-proj/src/lib.rs
@@ -17,6 +17,7 @@
 pub mod error;
 mod proj;
 mod proj_dyn_bindgen;
+pub mod provider;
 pub mod register;
 pub mod sd_order_lnglat;
 mod st_transform;
diff --git a/c/sedona-proj/src/proj.rs b/c/sedona-proj/src/proj.rs
index 053260f3..e52eb295 100644
--- a/c/sedona-proj/src/proj.rs
+++ b/c/sedona-proj/src/proj.rs
@@ -300,6 +300,27 @@ impl Proj {
         Ok(Self { inner, ctx })
     }
 
+    pub(crate) fn to_projjson(&self) -> Result<String, SedonaProjError> {
+        let inner = unsafe {
+            call_proj_api!(
+                self.ctx.api,
+                proj_as_projjson,
+                self.ctx.inner,
+                self.inner,
+                ptr::null()
+            )
+        };
+
+        if inner.is_null() {
+            return Err(SedonaProjError::Invalid(
+                "proj_as_projjson returned null".to_string(),
+            ));
+        }
+
+        let c_str = unsafe { CStr::from_ptr(inner) };
+        Ok(c_str.to_string_lossy().to_string())
+    }
+
     /// Create a transformation between two coordinate reference systems.
     ///
     /// This creates a transformation pipeline that converts coordinates from
@@ -519,12 +540,12 @@ impl ProjApi {
     #[cfg(feature = "proj-sys")]
     fn from_proj_sys() -> Self {
         use proj_sys::{
-            proj_area_create, proj_area_destroy, proj_area_set_bbox, 
proj_context_create,
-            proj_context_destroy, proj_context_errno, 
proj_context_errno_string,
-            proj_context_set_database_path, proj_context_set_search_paths, 
proj_create,
-            proj_create_crs_to_crs_from_pj, proj_cs_get_axis_count, 
proj_destroy, proj_errno,
-            proj_errno_reset, proj_info, proj_log_level, 
proj_normalize_for_visualization,
-            proj_trans, proj_trans_array,
+            proj_area_create, proj_area_destroy, proj_area_set_bbox, 
proj_as_projjson,
+            proj_context_create, proj_context_destroy, proj_context_errno,
+            proj_context_errno_string, proj_context_set_database_path,
+            proj_context_set_search_paths, proj_create, 
proj_create_crs_to_crs_from_pj,
+            proj_cs_get_axis_count, proj_destroy, proj_errno, 
proj_errno_reset, proj_info,
+            proj_log_level, proj_normalize_for_visualization, proj_trans, 
proj_trans_array,
         };
 
         let mut inner = proj_dyn_bindgen::ProjApi::default();
@@ -595,6 +616,9 @@ impl ProjApi {
             inner.proj_trans_array = Some(std::mem::transmute(
                 proj_trans_array as unsafe extern "C" fn(*mut _, _, usize, 
*mut _) -> _,
             ));
+            inner.proj_as_projjson = Some(std::mem::transmute(
+                proj_as_projjson as unsafe extern "C" fn(_, _, _) -> _,
+            ));
         }
 
         Self {
@@ -604,42 +628,22 @@ impl ProjApi {
     }
 }
 
-// We don't have control over this generated source, so we can't derive the 
implementation
-#[allow(clippy::derivable_impls)]
-impl Default for proj_dyn_bindgen::ProjApi {
-    fn default() -> Self {
-        Self {
-            proj_area_create: Default::default(),
-            proj_area_destroy: Default::default(),
-            proj_area_set_bbox: Default::default(),
-            proj_context_create: Default::default(),
-            proj_context_destroy: Default::default(),
-            proj_context_errno_string: Default::default(),
-            proj_context_errno: Default::default(),
-            proj_context_set_database_path: Default::default(),
-            proj_context_set_search_paths: Default::default(),
-            proj_create_crs_to_crs_from_pj: Default::default(),
-            proj_create: Default::default(),
-            proj_cs_get_axis_count: Default::default(),
-            proj_destroy: Default::default(),
-            proj_errno_reset: Default::default(),
-            proj_errno: Default::default(),
-            proj_info: Default::default(),
-            proj_log_level: Default::default(),
-            proj_normalize_for_visualization: Default::default(),
-            proj_trans: Default::default(),
-            proj_trans_array: Default::default(),
-            release: Default::default(),
-            private_data: ptr::null_mut(),
-        }
-    }
-}
-
 #[cfg(test)]
 mod test {
     use super::*;
     use approx::assert_relative_eq;
 
+    #[test]
+    fn test_crs_to_projjson() {
+        let ctx = Rc::new(ProjContext::try_from_proj_sys().unwrap());
+        let proj = Proj::try_new(ctx.clone(), "EPSG:3857").unwrap();
+        let projjson = proj.to_projjson().unwrap();
+        assert!(
+            projjson.starts_with("{"),
+            "Unexpected PROJJSON output: {projjson}"
+        );
+    }
+
     /// Test conversion from NAD83 US Survey Feet (EPSG 2230) to NAD83 Metres 
(EPSG 26946)
     #[test]
     fn test_crs_to_crs_conversion() {
diff --git a/c/sedona-proj/src/proj_dyn.c b/c/sedona-proj/src/proj_dyn.c
index 995792ea..69c54217 100644
--- a/c/sedona-proj/src/proj_dyn.c
+++ b/c/sedona-proj/src/proj_dyn.c
@@ -112,6 +112,7 @@ static int load_proj_from_handle(struct ProjApi* api, void* 
handle, char* err_ms
   LOAD_PROJ_FUNCTION(api, proj_normalize_for_visualization);
   LOAD_PROJ_FUNCTION(api, proj_trans);
   LOAD_PROJ_FUNCTION(api, proj_trans_array);
+  LOAD_PROJ_FUNCTION(api, proj_as_projjson);
 
   api->release = &proj_dyn_release_api;
   api->private_data = handle;
diff --git a/c/sedona-proj/src/proj_dyn.h b/c/sedona-proj/src/proj_dyn.h
index 377bc9f2..81e80503 100644
--- a/c/sedona-proj/src/proj_dyn.h
+++ b/c/sedona-proj/src/proj_dyn.h
@@ -85,6 +85,8 @@ struct ProjApi {
   PJ* (*proj_normalize_for_visualization)(PJ_CONTEXT* ctx, const PJ* obj);
   PJ_COORD (*proj_trans)(PJ* P, PJ_DIRECTION direction, PJ_COORD coord);
   PJ_COORD (*proj_trans_array)(PJ* P, PJ_DIRECTION direction, size_t n, 
PJ_COORD* coord);
+  const char* (*proj_as_projjson)(PJ_CONTEXT* ctx, const PJ* obj,
+                                  const char* const* options);
   void (*release)(struct ProjApi*);
   void* private_data;
 };
diff --git a/c/sedona-proj/src/proj_dyn_bindgen.rs 
b/c/sedona-proj/src/proj_dyn_bindgen.rs
index d9ede16f..55ea2d9a 100644
--- a/c/sedona-proj/src/proj_dyn_bindgen.rs
+++ b/c/sedona-proj/src/proj_dyn_bindgen.rs
@@ -84,7 +84,7 @@ pub struct PJ_INFO {
 }
 
 #[repr(C)]
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, Default)]
 pub struct ProjApi {
     pub proj_area_create: Option<unsafe extern "C" fn() -> *mut PJ_AREA>,
     pub proj_area_destroy: Option<unsafe extern "C" fn(area: *mut PJ_AREA)>,
@@ -145,6 +145,13 @@ pub struct ProjApi {
             coord: *mut PJ_COORD,
         ) -> PJ_COORD,
     >,
+    pub proj_as_projjson: Option<
+        unsafe extern "C" fn(
+            ctx: *mut PJ_CONTEXT,
+            obj: *const PJ,
+            options: *const *const c_char,
+        ) -> *const c_char,
+    >,
     pub release: Option<unsafe extern "C" fn(arg1: *mut ProjApi)>,
     pub private_data: *mut c_void,
 }
diff --git a/c/sedona-proj/src/register.rs b/c/sedona-proj/src/provider.rs
similarity index 55%
copy from c/sedona-proj/src/register.rs
copy to c/sedona-proj/src/provider.rs
index 5c3951db..d2833da8 100644
--- a/c/sedona-proj/src/register.rs
+++ b/c/sedona-proj/src/provider.rs
@@ -14,18 +14,31 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use sedona_expr::aggregate_udf::SedonaAccumulatorRef;
-use sedona_expr::scalar_udf::ScalarKernelRef;
 
-use crate::st_transform::st_transform_impl;
+use sedona_common::CrsProvider;
 
-pub use crate::st_transform::configure_global_proj_engine;
-pub use crate::transform::ProjCrsEngineBuilder;
+use crate::transform::with_global_proj_engine;
 
-pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
-    vec![("st_transform", st_transform_impl())]
+#[derive(Debug, Default)]
+pub struct ProjCrsProvider {}
+
+impl CrsProvider for ProjCrsProvider {
+    fn to_projjson(&self, crs_string: &str) -> 
datafusion_common::Result<String> {
+        with_global_proj_engine(|e| e.engine().to_projjson(crs_string))
+    }
 }
 
-pub fn aggregate_kernels() -> Vec<(&'static str, SedonaAccumulatorRef)> {
-    vec![]
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn proj_crs_provider() {
+        let provider = ProjCrsProvider {};
+        let projjson = provider.to_projjson("EPSG:3857").unwrap();
+        assert!(
+            projjson.starts_with("{"),
+            "Unexpected PROJJSON output: {projjson}"
+        );
+    }
 }
diff --git a/c/sedona-proj/src/register.rs b/c/sedona-proj/src/register.rs
index 5c3951db..11fa0ea9 100644
--- a/c/sedona-proj/src/register.rs
+++ b/c/sedona-proj/src/register.rs
@@ -19,7 +19,7 @@ use sedona_expr::scalar_udf::ScalarKernelRef;
 
 use crate::st_transform::st_transform_impl;
 
-pub use crate::st_transform::configure_global_proj_engine;
+pub use crate::transform::configure_global_proj_engine;
 pub use crate::transform::ProjCrsEngineBuilder;
 
 pub fn scalar_kernels() -> Vec<(&'static str, ScalarKernelRef)> {
diff --git a/c/sedona-proj/src/sd_order_lnglat.rs 
b/c/sedona-proj/src/sd_order_lnglat.rs
index 9d95de36..09089d5a 100644
--- a/c/sedona-proj/src/sd_order_lnglat.rs
+++ b/c/sedona-proj/src/sd_order_lnglat.rs
@@ -26,7 +26,7 @@ use sedona_functions::executor::WkbBytesExecutor;
 use sedona_geometry::{transform::CrsEngine, wkb_header::WkbHeader};
 use sedona_schema::{crs::lnglat, datatypes::SedonaType, matchers::ArgMatcher};
 
-use crate::st_transform::with_global_proj_engine;
+use crate::transform::with_global_proj_engine;
 
 /// Generic scalar kernel for sd_order based on the first coordinate
 /// of a geometry projected to lon/lat
diff --git a/c/sedona-proj/src/st_transform.rs 
b/c/sedona-proj/src/st_transform.rs
index efeb3b02..9128485c 100644
--- a/c/sedona-proj/src/st_transform.rs
+++ b/c/sedona-proj/src/st_transform.rs
@@ -14,7 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use crate::transform::{ProjCrsEngine, ProjCrsEngineBuilder};
+
 use arrow_array::builder::{BinaryBuilder, StringViewBuilder};
 use arrow_array::ArrayRef;
 use arrow_schema::DataType;
@@ -22,21 +22,22 @@ use datafusion_common::cast::{as_string_view_array, 
as_struct_array};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
 use datafusion_expr::ColumnarValue;
-use sedona_common::{sedona_internal_datafusion_err, sedona_internal_err};
+use sedona_common::sedona_internal_err;
 use sedona_expr::item_crs::make_item_crs;
 use sedona_expr::scalar_udf::{ScalarKernelRef, SedonaScalarKernel};
 use sedona_functions::executor::WkbExecutor;
-use sedona_geometry::transform::{transform, CachingCrsEngine, CrsEngine, 
CrsTransform};
+use sedona_geometry::transform::{transform, CrsEngine, CrsTransform};
 use sedona_geometry::wkb_factory::WKB_MIN_PROBABLE_BYTES;
 use sedona_schema::crs::{deserialize_crs, Crs};
 use sedona_schema::datatypes::{Edges, SedonaType, WKB_GEOMETRY, 
WKB_GEOMETRY_ITEM_CRS};
 use sedona_schema::matchers::ArgMatcher;
-use std::cell::OnceCell;
 use std::io::Write;
 use std::iter::zip;
-use std::sync::{Arc, RwLock};
+use std::sync::Arc;
 use wkb::reader::Wkb;
 
+use crate::transform::with_global_proj_engine;
+
 /// ST_Transform() implementation using the proj crate
 pub fn st_transform_impl() -> ScalarKernelRef {
     Arc::new(STTransform {})
@@ -364,79 +365,6 @@ impl<'a> ArgInput<'a> {
     }
 }
 
-/// Configure the global PROJ engine
-///
-/// Provides an opportunity for a calling application to provide the
-/// [ProjCrsEngineBuilder] whose `build()` method will be used to create
-/// a set of thread local [CrsEngine]s which in turn will perform the actual
-/// computations. This provides an opportunity to configure locations of
-/// various files in addition to network CDN access preferences.
-///
-/// This configuration can be set more than once; however, once the engines
-/// are constructed they cannot currently be reconfigured. This code is 
structured
-/// deliberately to ensure that if an error occurs creating an engine that the
-/// configuration can be set again. Notably, this will occur if this crate was
-/// built without proj-sys the first time somebody calls st_transform.
-pub fn configure_global_proj_engine(builder: ProjCrsEngineBuilder) -> 
Result<()> {
-    let mut global_builder = PROJ_ENGINE_BUILDER.try_write().map_err(|_| {
-        DataFusionError::Configuration(
-            "Failed to acquire write lock for global PROJ 
configuration".to_string(),
-        )
-    })?;
-    global_builder.replace(builder);
-    Ok(())
-}
-
-/// Do something with the global thread-local PROJ engine, creating it if it 
has not
-/// already been created.
-pub(crate) fn with_global_proj_engine<
-    R,
-    F: FnMut(&CachingCrsEngine<ProjCrsEngine>) -> Result<R>,
->(
-    mut func: F,
-) -> Result<R> {
-    PROJ_ENGINE.with(|engine_cell| {
-        // If there is already an engine, use it!
-        if let Some(engine) = engine_cell.get() {
-            return func(engine);
-        }
-
-        // Otherwise, attempt to get the builder
-        let maybe_builder = PROJ_ENGINE_BUILDER.read().map_err(|_| {
-            // Highly unlikely (can only occur when a panic occurred during 
set)
-            sedona_internal_datafusion_err!(
-                "Failed to acquire read lock for global PROJ configuration"
-            )
-        })?;
-
-        // ...and build the engine. This will use a default configuration
-        // (i.e., proj_sys or error) if the builder was never set.
-        let proj_engine = maybe_builder
-            .as_ref()
-            .unwrap_or(&ProjCrsEngineBuilder::default())
-            .build()
-            .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
-        engine_cell
-            .set(CachingCrsEngine::new(proj_engine))
-            .map_err(|_| sedona_internal_datafusion_err!("Failed to set cached 
PROJ transform"))?;
-        func(engine_cell.get().unwrap())
-    })
-}
-
-/// Global builder as a thread-safe RwLock. Normally set once on application 
start
-/// or never set to use all default settings.
-static PROJ_ENGINE_BUILDER: RwLock<Option<ProjCrsEngineBuilder>> =
-    RwLock::<Option<ProjCrsEngineBuilder>>::new(None);
-
-// CrsTransform backed by PROJ is not thread safe, so we define the cache as 
thread-local
-// to avoid race conditions.
-thread_local! {
-    static PROJ_ENGINE: OnceCell<CachingCrsEngine<ProjCrsEngine>> = const {
-        OnceCell::<CachingCrsEngine<ProjCrsEngine>>::new()
-    };
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/c/sedona-proj/src/transform.rs b/c/sedona-proj/src/transform.rs
index 09de1209..1790a51a 100644
--- a/c/sedona-proj/src/transform.rs
+++ b/c/sedona-proj/src/transform.rs
@@ -14,16 +14,20 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+
 use crate::error::SedonaProjError;
 use crate::proj::{Proj, ProjContext};
+use datafusion_common::{exec_datafusion_err, DataFusionError, Result};
 use geo_traits::Dimensions;
+use sedona_common::sedona_internal_datafusion_err;
 use sedona_geometry::bounding_box::BoundingBox;
 use sedona_geometry::error::SedonaGeometryError;
 use sedona_geometry::interval::IntervalTrait;
-use sedona_geometry::transform::{CrsEngine, CrsTransform};
-use std::cell::RefCell;
+use sedona_geometry::transform::{CachingCrsEngine, CrsEngine, CrsTransform};
+use std::cell::{OnceCell, RefCell};
 use std::path::PathBuf;
 use std::rc::Rc;
+use std::sync::RwLock;
 
 /// Builder for a [ProjCrsEngine]
 ///
@@ -133,6 +137,79 @@ impl ProjCrsEngineBuilder {
     }
 }
 
+/// Configure the global PROJ engine
+///
+/// Provides an opportunity for a calling application to provide the
+/// [ProjCrsEngineBuilder] whose `build()` method will be used to create
+/// a set of thread local [CrsEngine]s which in turn will perform the actual
+/// computations. This provides an opportunity to configure locations of
+/// various files in addition to network CDN access preferences.
+///
+/// This configuration can be set more than once; however, once the engines
+/// are constructed they cannot currently be reconfigured. This code is 
structured
+/// deliberately to ensure that if an error occurs creating an engine that the
+/// configuration can be set again. Notably, this will occur if this crate was
+/// built without proj-sys the first time somebody calls st_transform.
+pub fn configure_global_proj_engine(builder: ProjCrsEngineBuilder) -> 
Result<()> {
+    let mut global_builder = PROJ_ENGINE_BUILDER.try_write().map_err(|_| {
+        DataFusionError::Configuration(
+            "Failed to acquire write lock for global PROJ 
configuration".to_string(),
+        )
+    })?;
+    global_builder.replace(builder);
+    Ok(())
+}
+
+/// Do something with the global thread-local PROJ engine, creating it if it 
has not
+/// already been created.
+pub(crate) fn with_global_proj_engine<
+    R,
+    F: FnMut(&CachingCrsEngine<ProjCrsEngine>) -> Result<R>,
+>(
+    mut func: F,
+) -> Result<R> {
+    PROJ_ENGINE.with(|engine_cell| {
+        // If there is already an engine, use it!
+        if let Some(engine) = engine_cell.get() {
+            return func(engine);
+        }
+
+        // Otherwise, attempt to get the builder
+        let maybe_builder = PROJ_ENGINE_BUILDER.read().map_err(|_| {
+            // Highly unlikely (can only occur when a panic occurred during 
set)
+            sedona_internal_datafusion_err!(
+                "Failed to acquire read lock for global PROJ configuration"
+            )
+        })?;
+
+        // ...and build the engine. This will use a default configuration
+        // (i.e., proj_sys or error) if the builder was never set.
+        let proj_engine = maybe_builder
+            .as_ref()
+            .unwrap_or(&ProjCrsEngineBuilder::default())
+            .build()
+            .map_err(|e| DataFusionError::External(Box::new(e)))?;
+
+        engine_cell
+            .set(CachingCrsEngine::new(proj_engine))
+            .map_err(|_| sedona_internal_datafusion_err!("Failed to set cached 
PROJ transform"))?;
+        func(engine_cell.get().unwrap())
+    })
+}
+
+/// Global builder as a thread-safe RwLock. Normally set once on application 
start
+/// or never set to use all default settings.
+static PROJ_ENGINE_BUILDER: RwLock<Option<ProjCrsEngineBuilder>> =
+    RwLock::<Option<ProjCrsEngineBuilder>>::new(None);
+
+// CrsTransform backed by PROJ is not thread safe, so we define the cache as 
thread-local
+// to avoid race conditions.
+thread_local! {
+    static PROJ_ENGINE: OnceCell<CachingCrsEngine<ProjCrsEngine>> = const {
+        OnceCell::<CachingCrsEngine<ProjCrsEngine>>::new()
+    };
+}
+
 /// A [CrsEngine] implemented using PROJ
 ///
 /// Use the [ProjCrsEngineBuilder] to create this object.
@@ -141,6 +218,19 @@ pub struct ProjCrsEngine {
     ctx: Rc<ProjContext>,
 }
 
+impl ProjCrsEngine {
+    /// Resolve the CRS represented by this object to a PROJJSON string
+    pub fn to_projjson(&self, crs_string: &str) -> Result<String> {
+        let source_crs = Proj::try_new(self.ctx.clone(), 
crs_string).map_err(|e| {
+            exec_datafusion_err!("Failed to create CRS from source 
'{crs_string}': {e}")
+        })?;
+
+        source_crs
+            .to_projjson()
+            .map_err(|e| exec_datafusion_err!("Failed to export '{crs_string}' 
as PROJJSON: {e}"))
+    }
+}
+
 impl CrsEngine for ProjCrsEngine {
     fn get_transform_crs_to_crs(
         &self,
@@ -256,6 +346,22 @@ mod test {
     use sedona_geometry::transform::transform;
     use wkb::reader::read_wkb;
 
+    #[test]
+    fn proj_as_projjson() {
+        let engine = ProjCrsEngineBuilder::default().build().unwrap();
+        let projjson = engine.to_projjson("EPSG:3857").unwrap();
+        assert!(
+            projjson.starts_with("{"),
+            "Unexpected PROJJSON output: {projjson}"
+        );
+
+        let err = engine.to_projjson("gazornenplat").unwrap_err();
+        assert_eq!(
+            err.message(),
+            "Failed to create CRS from source 'gazornenplat': Invalid PROJ 
string syntax"
+        );
+    }
+
     #[test]
     fn proj_crs_to_crs() {
         let engine = ProjCrsEngineBuilder::default().build().unwrap();
diff --git a/python/sedonadb/tests/io/test_parquet.py 
b/python/sedonadb/tests/io/test_parquet.py
index ccdbcd40..33300ac3 100644
--- a/python/sedonadb/tests/io/test_parquet.py
+++ b/python/sedonadb/tests/io/test_parquet.py
@@ -440,6 +440,22 @@ def test_write_geoparquet_1_1(con, geoarrow_data):
         assert "bbox" in df_roundtrip.columns
 
 
+def test_write_geoparquet_ensure_projjson_crs(con):
+    df = con.sql("SELECT ST_Point(1, 2, 'EPSG:3857') AS geometry")
+
+    with tempfile.TemporaryDirectory() as td:
+        tmp_parquet = Path(td) / "tmp.parquet"
+        df.to_parquet(tmp_parquet)
+
+        file_kv_metadata = parquet.ParquetFile(tmp_parquet).metadata.metadata
+        assert b"geo" in file_kv_metadata
+        geo_metadata = json.loads(file_kv_metadata[b"geo"])
+        crs = geo_metadata["columns"]["geometry"]["crs"]
+        assert crs != "EPSG:3857"
+        assert crs["id"]["authority"] == "EPSG"
+        assert crs["id"]["code"] == 3857
+
+
 def test_write_geoparquet_unknown(con):
     with pytest.raises(SedonaError, match="Unexpected GeoParquet version 
string"):
         con.sql("SELECT 1 as one").to_parquet(
diff --git a/python/sedonadb/tests/test_context.py 
b/python/sedonadb/tests/test_context.py
index 6b876eed..be370619 100644
--- a/python/sedonadb/tests/test_context.py
+++ b/python/sedonadb/tests/test_context.py
@@ -173,7 +173,7 @@ def test_read_parquet_geometry_columns_roundtrip(con, 
tmp_path):
 
     geom_meta = _geom_column_metadata(out_geo2)
     assert geom_meta["encoding"] == "WKB"
-    assert geom_meta["crs"] == "EPSG:3857"
+    assert geom_meta["crs"]["id"] == {"authority": "EPSG", "code": 3857}
 
     # Test 5: overriding with a different CRS replaces the previous value.
     geometry_columns = json.dumps({"geom": {"encoding": "WKB", "crs": 
"EPSG:4326"}})
@@ -212,7 +212,7 @@ def test_read_parquet_geometry_columns_roundtrip(con, 
tmp_path):
     df.to_parquet(out_geo_multi)
     geom_meta = _geom_column_metadata(out_geo_multi)
     assert geom_meta["encoding"] == "WKB"
-    assert geom_meta["crs"] == "EPSG:3857"
+    assert geom_meta["crs"]["id"] == {"authority": "EPSG", "code": 3857}
     assert geom_meta["edges"] == "spherical"
 
     # Test 8: specify a non-existent column raises error
@@ -261,7 +261,7 @@ def 
test_read_parquet_geometry_columns_multiple_columns(con, tmp_path):
     geom1_meta = _geom_column_metadata(out_geo2, "geom1")
     geom2_meta = _geom_column_metadata(out_geo2, "geom2")
     assert geom1_meta["encoding"] == "WKB"
-    assert geom1_meta["crs"] == "EPSG:3857"
+    assert geom1_meta["crs"]["id"] == {"authority": "EPSG", "code": 3857}
     assert geom2_meta["encoding"] == "WKB"
 
 
diff --git a/rust/sedona-common/src/option.rs b/rust/sedona-common/src/option.rs
index 280a4705..21b228d4 100644
--- a/rust/sedona-common/src/option.rs
+++ b/rust/sedona-common/src/option.rs
@@ -15,13 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 use std::fmt::Display;
+use std::sync::Arc;
 
 use datafusion::config::{ConfigEntry, ConfigExtension, ConfigField, 
ExtensionOptions, Visit};
 use datafusion::prelude::SessionConfig;
-use datafusion_common::config_namespace;
 use datafusion_common::Result;
+use datafusion_common::{config_err, config_namespace};
 use regex::Regex;
 
+use crate::sedona_internal_err;
+
 /// Default minimum number of analyzed geometries for speculative execution 
mode to select an
 /// optimal execution mode.
 pub const DEFAULT_SPECULATIVE_THRESHOLD: usize = 1000;
@@ -39,6 +42,9 @@ config_namespace! {
     pub struct SedonaOptions {
         /// Options for spatial join
         pub spatial_join: SpatialJoinOptions, default = 
SpatialJoinOptions::default()
+
+        /// Global [CrsProvider] for CRS metadata operations
+        pub crs_provider: CrsProviderOption, default = 
CrsProviderOption::default()
     }
 }
 
@@ -405,6 +411,67 @@ impl ConfigField for TgIndexType {
     }
 }
 
+/// Trait defining an abstract provider of Coordinate Reference System metadata
+///
+/// Unlike a CrsEngine, which provides concrete coordinate transformations for
+/// pairs of projections, a CrsProvider is handles metadata-only operations.
+/// Currently this is only used to resolve an arbitrary CRS representation to
+/// PROJJSON (e.g., to write valid GeoParquet files from arbitrary CRSes), but
+/// could also be used to validate CRSes.
+pub trait CrsProvider: std::fmt::Debug + Send + Sync {
+    fn to_projjson(&self, crs_string: &str) -> Result<String>;
+}
+
+/// Wrapper class implementing [ConfigField] that allows a [CrsProvider]
+/// member in [SedonaOptions].
+#[derive(Debug, Clone)]
+pub struct CrsProviderOption(Arc<dyn CrsProvider>);
+
+impl CrsProviderOption {
+    /// Create a new option from a [CrsProvider] reference
+    pub fn new(inner: Arc<dyn CrsProvider>) -> Self {
+        CrsProviderOption(inner)
+    }
+
+    /// Convert an arbitrary string to a PROJJSON representation if possible
+    pub fn to_projjson(&self, crs_string: &str) -> Result<String> {
+        self.0.to_projjson(crs_string)
+    }
+}
+
+impl Default for CrsProviderOption {
+    fn default() -> Self {
+        Self(Arc::new(DefaultCrsProvider {}))
+    }
+}
+
+impl PartialEq for CrsProviderOption {
+    fn eq(&self, other: &Self) -> bool {
+        Arc::ptr_eq(&self.0, &other.0)
+    }
+}
+
+impl ConfigField for CrsProviderOption {
+    fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) 
{
+        v.some(key, format!("{:?}", self.0), description);
+    }
+
+    fn set(&mut self, key: &str, _value: &str) -> Result<()> {
+        config_err!("Can't set {key} from SQL")
+    }
+}
+
+#[derive(Debug)]
+struct DefaultCrsProvider {}
+
+impl CrsProvider for DefaultCrsProvider {
+    fn to_projjson(&self, crs_string: &str) -> Result<String> {
+        sedona_internal_err!(
+            "Can't convert {crs_string} to PROJJSON CRS (no CrsProvider 
registered)"
+        )
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -523,4 +590,32 @@ mod tests {
         assert!(config.set("", "invalid").is_err());
         assert!(config.set("", "fixed[10]").is_err());
     }
+
+    #[test]
+    fn test_default_crs_provider_returns_error() {
+        let provider = CrsProviderOption::default();
+        let result = provider.to_projjson("EPSG:4326");
+        assert!(result.is_err());
+        let err_msg = result.unwrap_err().to_string();
+        assert!(
+            err_msg.contains("Can't convert EPSG:4326 to PROJJSON CRS"),
+            "Unexpected error message: {err_msg}"
+        );
+        assert!(
+            err_msg.contains("no CrsProvider registered"),
+            "Unexpected error message: {err_msg}"
+        );
+    }
+
+    #[test]
+    fn test_crs_provider_option_set_from_sql_returns_error() {
+        let mut option = CrsProviderOption::default();
+        let result = option.set("sedona.crs_provider", "some_value");
+        assert!(result.is_err());
+        let err_msg = result.unwrap_err().to_string();
+        assert!(
+            err_msg.contains("Can't set sedona.crs_provider from SQL"),
+            "Unexpected error message: {err_msg}"
+        );
+    }
 }
diff --git a/rust/sedona-geometry/src/transform.rs 
b/rust/sedona-geometry/src/transform.rs
index 2540535e..ba7b5367 100644
--- a/rust/sedona-geometry/src/transform.rs
+++ b/rust/sedona-geometry/src/transform.rs
@@ -152,6 +152,13 @@ pub struct CachingCrsEngine<T: CrsEngine> {
     pipeline_cache: RefCell<LruCache<PipelineCacheKey<'static>, Rc<dyn 
CrsTransform>>>,
 }
 
+impl<T: CrsEngine> CachingCrsEngine<T> {
+    /// Return a reference to the wrapped engine
+    pub fn engine(&self) -> &T {
+        &self.engine
+    }
+}
+
 /// Cache key for CRS to CRS transforms
 #[derive(Clone, Debug, Hash, PartialEq, Eq)]
 struct CrsToCrsCacheKey<'a> {
diff --git a/rust/sedona-geoparquet/src/writer.rs 
b/rust/sedona-geoparquet/src/writer.rs
index 3ac62980..33be505e 100644
--- a/rust/sedona-geoparquet/src/writer.rs
+++ b/rust/sedona-geoparquet/src/writer.rs
@@ -45,7 +45,7 @@ use datafusion_physical_plan::{
 use float_next_after::NextAfter;
 use futures::StreamExt;
 use geo_traits::GeometryTrait;
-use sedona_common::sedona_internal_err;
+use sedona_common::{sedona_internal_err, SedonaOptions};
 use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
 use sedona_functions::executor::WkbExecutor;
 use sedona_geometry::{
@@ -58,6 +58,7 @@ use sedona_schema::{
     matchers::ArgMatcher,
     schema::SedonaSchema,
 };
+use serde_json::Value;
 
 use crate::{
     metadata::{GeoParquetColumnMetadata, GeoParquetCovering, 
GeoParquetMetadata},
@@ -147,9 +148,28 @@ pub fn create_geoparquet_writer_physical_plan(
         if crs == lnglat() {
             // Do nothing, lnglat is the meaning of an omitted CRS
         } else if let Some(crs) = crs {
-            column_metadata.crs = Some(crs.to_json().parse().map_err(|e| {
+            let mut crs_value: Value = crs.to_json().parse().map_err(|e| {
                 exec_datafusion_err!("Failed to parse CRS for column '{}' 
{e}", f.name())
-            })?);
+            })?;
+
+            // Ensure crs is PROJJSON to ensure this file is not rejected by 
downstream readers
+            if let Value::String(string) = &crs_value {
+                if let Some(sedona_options) =
+                    session_config_options.extensions.get::<SedonaOptions>()
+                {
+                    let projjson_string = 
sedona_options.crs_provider.to_projjson(string)?;
+                    crs_value = projjson_string.parse().map_err(|e| {
+                        exec_datafusion_err!(
+                            "Failed to parse CRS for column '{}' from 
CrsProvider {e}",
+                            f.name()
+                        )
+                    })?;
+                } else {
+                    return sedona_internal_err!("SedonaOptions not available");
+                }
+            }
+
+            column_metadata.crs = Some(crs_value);
         } else {
             return exec_err!(
                 "Can't write GeoParquet from null CRS\nUse ST_SetSRID({}, ...) 
to assign it one",
diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs
index 0de88261..1664173a 100644
--- a/rust/sedona/src/context.rs
+++ b/rust/sedona/src/context.rs
@@ -46,7 +46,10 @@ use datafusion_expr::dml::InsertOp;
 use datafusion_expr::sqlparser::dialect::{dialect_from_str, Dialect};
 use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, SortExpr};
 use parking_lot::Mutex;
-use sedona_common::option::add_sedona_option_extension;
+use sedona_common::{
+    option::add_sedona_option_extension, sedona_internal_datafusion_err, 
CrsProviderOption,
+    SedonaOptions,
+};
 use sedona_datasource::provider::external_listing_table;
 use sedona_datasource::spec::ExternalFormatSpec;
 use sedona_expr::scalar_udf::IntoScalarKernelRefs;
@@ -103,7 +106,18 @@ impl SedonaContext {
         // and perhaps for all of these initializing them optionally from 
environment
         // variables.
         let session_config = 
SessionConfig::from_env()?.with_information_schema(true);
-        let session_config = add_sedona_option_extension(session_config);
+        let mut session_config = add_sedona_option_extension(session_config);
+
+        // Always register the PROJ CrsProvider by default (if PROJ is not 
configured
+        // before it is used an error will be raised).
+        let opts = session_config
+            .options_mut()
+            .extensions
+            .get_mut::<SedonaOptions>()
+            .ok_or_else(|| sedona_internal_datafusion_err!("SedonaOptions not 
available"))?;
+        opts.crs_provider =
+            
CrsProviderOption::new(Arc::new(sedona_proj::provider::ProjCrsProvider::default()));
+
         #[cfg(feature = "pointcloud")]
         let session_config = session_config.with_option_extension(
             PointcloudOptions::default()

Reply via email to