This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new ace5542a feat(rust/sedona-functions): Implement ST_AsEWKB with item 
CRS support (#535)
ace5542a is described below

commit ace5542add43a79b7eeceaf175287a9a8bcd5304
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Jan 23 11:56:26 2026 -0600

    feat(rust/sedona-functions): Implement ST_AsEWKB with item CRS support 
(#535)
    
    Co-authored-by: Copilot <[email protected]>
---
 python/sedonadb/python/sedonadb/testing.py  |   8 +-
 python/sedonadb/tests/functions/test_wkb.py |  93 +++++++
 rust/sedona-common/src/error.rs             |  17 +-
 rust/sedona-functions/src/executor.rs       |  14 ++
 rust/sedona-functions/src/lib.rs            |   1 +
 rust/sedona-functions/src/register.rs       |   1 +
 rust/sedona-functions/src/st_asewkb.rs      | 368 ++++++++++++++++++++++++++++
 rust/sedona-geometry/src/ewkb_factory.rs    | 353 ++++++++++++++++++++++++++
 rust/sedona-geometry/src/lib.rs             |   1 +
 rust/sedona-schema/src/crs.rs               |  11 +-
 10 files changed, 854 insertions(+), 13 deletions(-)

diff --git a/python/sedonadb/python/sedonadb/testing.py 
b/python/sedonadb/python/sedonadb/testing.py
index ec8b870a..eb271060 100644
--- a/python/sedonadb/python/sedonadb/testing.py
+++ b/python/sedonadb/python/sedonadb/testing.py
@@ -635,11 +635,15 @@ class PostGISSingleThread(PostGIS):
             cur.execute("SET max_parallel_workers_per_gather TO 0")
 
 
-def geom_or_null(arg):
+def geom_or_null(arg, srid=None):
     """Format SQL expression for a geometry object or NULL"""
     if arg is None:
         return "NULL"
-    return f"ST_GeomFromText('{arg}')"
+
+    if srid is None:
+        return f"ST_GeomFromText('{arg}')"
+    else:
+        return f"ST_GeomFromEWKT('SRID={srid};{arg}')"
 
 
 def geog_or_null(arg):
diff --git a/python/sedonadb/tests/functions/test_wkb.py 
b/python/sedonadb/tests/functions/test_wkb.py
new file mode 100644
index 00000000..424d9a36
--- /dev/null
+++ b/python/sedonadb/tests/functions/test_wkb.py
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+import shapely
+from sedonadb.testing import PostGIS, SedonaDB, geom_or_null
+
+
[email protected]("eng", [SedonaDB, PostGIS])
[email protected]("srid", [None, 4326])
[email protected](
+    "geom",
+    [
+        # XY dimensions
+        "POINT (1 2)",
+        "LINESTRING (1 2, 3 4, 5 6)",
+        "POLYGON ((0 1, 2 0, 2 3, 0 3, 0 1))",
+        "MULTIPOINT ((1 2), (3 4))",
+        "MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))",
+        "MULTIPOLYGON (((0 1, 2 0, 2 3, 0 3, 0 1)))",
+        "GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))",
+        # XYZ dimensions
+        "POINT Z (1 2 3)",
+        "LINESTRING Z (1 2 3, 4 5 6)",
+        "POLYGON Z ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))",
+        "MULTIPOINT Z ((1 2 3), (4 5 6))",
+        "MULTILINESTRING Z ((1 2 3, 4 5 6), (7 8 9, 10 11 12))",
+        "MULTIPOLYGON Z (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))",
+        "GEOMETRYCOLLECTION Z (POINT Z (1 2 3))",
+        # XYM dimensions
+        "POINT M (1 2 3)",
+        "LINESTRING M (1 2 3, 4 5 6)",
+        "POLYGON M ((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2))",
+        "MULTIPOINT M ((1 2 3), (4 5 6))",
+        "MULTILINESTRING M ((1 2 3, 4 5 6), (7 8 9, 10 11 12))",
+        "MULTIPOLYGON M (((0 1 2, 3 0 2, 3 4 2, 0 4 2, 0 1 2)))",
+        "GEOMETRYCOLLECTION M (POINT M (1 2 3))",
+        # XYZM dimensions
+        "POINT ZM (1 2 3 4)",
+        "LINESTRING ZM (1 2 3 4, 5 6 7 8)",
+        "POLYGON ZM ((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3))",
+        "MULTIPOINT ZM ((1 2 3 4), (5 6 7 8))",
+        "MULTILINESTRING ZM ((1 2 3 4, 5 6 7 8), (9 10 11 12, 13 14 15 16))",
+        "MULTIPOLYGON ZM (((0 1 2 3, 4 0 2 3, 4 5 2 3, 0 5 2 3, 0 1 2 3)))",
+        "GEOMETRYCOLLECTION ZM (POINT ZM (1 2 3 4))",
+        # Empty geometries
+        "POINT EMPTY",
+        "LINESTRING EMPTY",
+        "POLYGON EMPTY",
+        "MULTIPOINT EMPTY",
+        "MULTILINESTRING EMPTY",
+        "MULTIPOLYGON EMPTY",
+        "GEOMETRYCOLLECTION EMPTY",
+        # NULL
+        None,
+    ],
+)
+def test_st_asewkb(eng, srid, geom):
+    eng = eng.create_or_skip()
+
+    if geom is not None:
+        shapely_geom = shapely.from_wkt(geom)
+        if srid is not None:
+            shapely_geom = shapely.set_srid(shapely_geom, srid)
+            write_srid = True
+        else:
+            write_srid = False
+
+        expected = shapely.to_wkb(
+            shapely_geom,
+            output_dimension=4,
+            byte_order=1,
+            flavor="extended",
+            include_srid=write_srid,
+        )
+    else:
+        expected = None
+
+    eng.assert_query_result(f"SELECT ST_AsEWKB({geom_or_null(geom, srid)})", 
expected)
diff --git a/rust/sedona-common/src/error.rs b/rust/sedona-common/src/error.rs
index 101db2c4..5b7c04b0 100644
--- a/rust/sedona-common/src/error.rs
+++ b/rust/sedona-common/src/error.rs
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-/// Macro to create Sedona Internal Error that avoids the misleading error 
message from
-/// DataFusionError::Internal.
+/// Macro to create Sedona Internal Error from places such as `map_err()` that
+/// require a DataFusionError instead of an Err
 #[macro_export]
-macro_rules! sedona_internal_err {
+macro_rules! sedona_internal_datafusion_err {
     ($($args:expr),*) => {{
         let msg = std::format!(
             "SedonaDB internal error: {}{}.\nThis issue was likely caused by a 
bug in SedonaDB's code. \
@@ -28,7 +28,16 @@ macro_rules! sedona_internal_err {
             datafusion_common::DataFusionError::get_back_trace(),
         );
         // We avoid using Internal to avoid the message suggesting it's 
internal to DataFusion
-        Err(datafusion_common::DataFusionError::External(msg.into()))
+        datafusion_common::DataFusionError::External(msg.into())
+    }};
+}
+
+/// Macro to create Sedona Internal Error that avoids the misleading error 
message from
+/// DataFusionError::Internal.
+#[macro_export]
+macro_rules! sedona_internal_err {
+    ($($args:expr),*) => {{
+        Err($crate::sedona_internal_datafusion_err!($($args),*))
     }};
 }
 
diff --git a/rust/sedona-functions/src/executor.rs 
b/rust/sedona-functions/src/executor.rs
index f1d679d2..8809ac0e 100644
--- a/rust/sedona-functions/src/executor.rs
+++ b/rust/sedona-functions/src/executor.rs
@@ -383,6 +383,20 @@ impl ScalarGeo for ScalarValue {
             | ScalarValue::BinaryView(maybe_item)
             | ScalarValue::LargeBinary(maybe_item) => 
Ok(maybe_item.as_deref()),
             ScalarValue::Null => Ok(None),
+            ScalarValue::Struct(s)
+                if s.fields().len() == 2
+                    && s.fields()[0].name() == "item"
+                    && s.fields()[1].name() == "crs" =>
+            {
+                let item_type = 
SedonaType::from_storage_field(&s.fields()[0])?;
+                let mut out = None;
+                s.column(0).iter_as_wkb_bytes(&item_type, 1, |v| {
+                    out = v;
+                    Ok(())
+                })?;
+
+                Ok(out)
+            }
             _ => sedona_internal_err!("Can't iterate over {:?} ScalarValue as 
&[u8]", self),
         }
     }
diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs
index 6e38e426..6e8f884b 100644
--- a/rust/sedona-functions/src/lib.rs
+++ b/rust/sedona-functions/src/lib.rs
@@ -28,6 +28,7 @@ mod st_affine_helpers;
 pub mod st_analyze_agg;
 mod st_area;
 mod st_asbinary;
+mod st_asewkb;
 mod st_asgeojson;
 mod st_astext;
 mod st_azimuth;
diff --git a/rust/sedona-functions/src/register.rs 
b/rust/sedona-functions/src/register.rs
index 8161923a..14405409 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -66,6 +66,7 @@ pub fn default_function_set() -> FunctionSet {
         crate::st_affine::st_affine_udf,
         crate::st_area::st_area_udf,
         crate::st_asbinary::st_asbinary_udf,
+        crate::st_asewkb::st_asewkb_udf,
         crate::st_asgeojson::st_asgeojson_udf,
         crate::st_astext::st_astext_udf,
         crate::st_azimuth::st_azimuth_udf,
diff --git a/rust/sedona-functions/src/st_asewkb.rs 
b/rust/sedona-functions/src/st_asewkb.rs
new file mode 100644
index 00000000..76cab162
--- /dev/null
+++ b/rust/sedona-functions/src/st_asewkb.rs
@@ -0,0 +1,368 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use std::{sync::Arc, vec};
+
+use arrow_array::builder::BinaryBuilder;
+use arrow_schema::DataType;
+use datafusion_common::{
+    cast::{as_string_view_array, as_struct_array},
+    error::Result,
+    exec_datafusion_err, exec_err, ScalarValue,
+};
+use datafusion_expr::{
+    scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation, 
Volatility,
+};
+use sedona_common::sedona_internal_err;
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_geometry::{ewkb_factory::write_ewkb_geometry, 
wkb_factory::WKB_MIN_PROBABLE_BYTES};
+use sedona_schema::{crs::deserialize_crs, datatypes::SedonaType, 
matchers::ArgMatcher};
+
+use crate::executor::WkbExecutor;
+
+/// ST_AsEWKB() scalar UDF implementation
+///
+/// An implementation of EWKB writing using Sedona's geometry EWKB/WKB 
facilities.
+pub fn st_asewkb_udf() -> SedonaScalarUDF {
+    SedonaScalarUDF::new(
+        "st_asewkb",
+        vec![Arc::new(STAsEWKBItemCrs {}), Arc::new(STAsEWKB {})],
+        Volatility::Immutable,
+        Some(st_asewkb_doc()),
+    )
+}
+
+fn st_asewkb_doc() -> Documentation {
+    Documentation::builder(
+        DOC_SECTION_OTHER,
+        r#"Return the Extended Well-Known Binary (EWKB) representation of a 
geometry or geography.
+
+Compared to ST_AsBinary(), this function embeds an integer SRID derived from 
the type or derived
+from the item-level CRS for item CRS types. This is particularly useful for 
integration with
+PostGIS"#,
+        "ST_AsEWKB (A: Geometry)",
+    )
+    .with_argument("geom", "geometry: Input geometry or geography")
+    .with_sql_example("SELECT ST_AsEWKB(ST_Point(1.0, 2.0, 4326))")
+    .build()
+}
+
+#[derive(Debug)]
+struct STAsEWKB {}
+
+impl SedonaScalarKernel for STAsEWKB {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![ArgMatcher::is_geometry_or_geography()],
+            SedonaType::Arrow(DataType::Binary),
+        );
+
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        let executor = WkbExecutor::new(arg_types, args);
+        let mut builder = BinaryBuilder::with_capacity(
+            executor.num_iterations(),
+            WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+        );
+
+        let maybe_srid = match &arg_types[0] {
+            SedonaType::Wkb(_, crs) | SedonaType::WkbView(_, crs) => match crs 
{
+                Some(crs) => match crs.srid()? {
+                    Some(0) => None,
+                    Some(srid) => Some(srid),
+                    _ => return exec_err!("CRS {crs} cannot be represented by 
a single SRID"),
+                },
+                None => None,
+            },
+            SedonaType::Arrow(DataType::Null) => None,
+            _ => return sedona_internal_err!("Unexpected input to invoke_batch 
in ST_AsEWKB"),
+        };
+
+        executor.execute_wkb_void(|maybe_wkb| {
+            match maybe_wkb {
+                Some(wkb) => {
+                    write_ewkb_geometry(&mut builder, &wkb, maybe_srid)
+                        .map_err(|e| exec_datafusion_err!("EWKB writer error 
{e}"))?;
+                    builder.append_value([]);
+                }
+                None => builder.append_null(),
+            }
+
+            Ok(())
+        })?;
+
+        executor.finish(Arc::new(builder.finish()))
+    }
+}
+
+#[derive(Debug)]
+struct STAsEWKBItemCrs {}
+
+impl SedonaScalarKernel for STAsEWKBItemCrs {
+    fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+        let matcher = ArgMatcher::new(
+            vec![ArgMatcher::is_item_crs()],
+            SedonaType::Arrow(DataType::Binary),
+        );
+
+        matcher.match_args(args)
+    }
+
+    fn invoke_batch(
+        &self,
+        arg_types: &[SedonaType],
+        args: &[ColumnarValue],
+    ) -> Result<ColumnarValue> {
+        let executor = WkbExecutor::new(arg_types, args);
+        let mut builder = BinaryBuilder::with_capacity(
+            executor.num_iterations(),
+            WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+        );
+
+        let crs_array_ref = match &args[0] {
+            ColumnarValue::Array(array) => {
+                let struct_array = as_struct_array(array)?;
+                struct_array.column(1).clone()
+            }
+            ColumnarValue::Scalar(ScalarValue::Struct(struct_array)) => {
+                struct_array.column(1).clone()
+            }
+            _ => return sedona_internal_err!("Unexpected item_crs type"),
+        };
+
+        let crs_array = as_string_view_array(&crs_array_ref)?;
+        let mut srid_iter = crs_array
+            .into_iter()
+            .map(|maybe_crs_str| match maybe_crs_str {
+                None => Ok(None),
+                Some(crs_str) => {
+                    match deserialize_crs(crs_str)
+                        .map_err(|e| exec_datafusion_err!("{}", e.message()))?
+                    {
+                        None => Ok(None),
+                        Some(crs) => match crs.srid()? {
+                            Some(0) => Ok(None),
+                            Some(srid) => Ok(Some(srid)),
+                            _ => {
+                                exec_err!("CRS {crs} cannot be represented by 
a single SRID")
+                            }
+                        },
+                    }
+                }
+            });
+
+        executor.execute_wkb_void(|maybe_wkb| {
+            let maybe_srid = srid_iter.next().unwrap()?;
+            match maybe_wkb {
+                Some(wkb) => {
+                    write_ewkb_geometry(&mut builder, &wkb, maybe_srid)
+                        .map_err(|e| exec_datafusion_err!("EWKB writer error 
{e}"))?;
+                    builder.append_value([]);
+                }
+                None => builder.append_null(),
+            }
+
+            Ok(())
+        })?;
+
+        executor.finish(Arc::new(builder.finish()))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use arrow_array::{ArrayRef, BinaryArray};
+    use datafusion_common::scalar::ScalarValue;
+    use datafusion_expr::ScalarUDF;
+    use rstest::rstest;
+    use sedona_schema::{
+        crs::lnglat,
+        datatypes::{
+            Edges, WKB_GEOGRAPHY, WKB_GEOGRAPHY_ITEM_CRS, WKB_GEOMETRY, 
WKB_GEOMETRY_ITEM_CRS,
+            WKB_VIEW_GEOGRAPHY, WKB_VIEW_GEOMETRY,
+        },
+    };
+    use sedona_testing::{
+        create::{create_array_item_crs, create_scalar_item_crs},
+        testers::ScalarUdfTester,
+    };
+
+    use super::*;
+
+    const POINT12: [u8; 21] = [
+        0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0xf0, 0x3f, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x40,
+    ];
+
+    const POINT12_LNGLAT: [u8; 25] = [
+        0x01, 0x01, 0x00, 0x00, 0x20, 0xe6, 0x10, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x00, 0x00,
+        0xf0, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40,
+    ];
+
+    #[test]
+    fn udf_metadata() {
+        let udf: ScalarUDF = st_asewkb_udf().into();
+        assert_eq!(udf.name(), "st_asewkb");
+        assert!(udf.documentation().is_some())
+    }
+
+    #[rstest]
+    fn udf_no_srid(
+        #[values(
+            WKB_GEOMETRY,
+            WKB_GEOGRAPHY,
+            WKB_VIEW_GEOMETRY,
+            WKB_VIEW_GEOGRAPHY,
+            WKB_GEOMETRY_ITEM_CRS.clone(),
+            WKB_GEOGRAPHY_ITEM_CRS.clone(),
+        )]
+        sedona_type: SedonaType,
+    ) {
+        let udf = st_asewkb_udf();
+        let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
+
+        assert_eq!(
+            tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(),
+            ScalarValue::Binary(Some(POINT12.to_vec()))
+        );
+
+        assert_eq!(
+            tester.invoke_wkb_scalar(None).unwrap(),
+            ScalarValue::Binary(None)
+        );
+
+        let expected_array: BinaryArray = [Some(POINT12), None, 
Some(POINT12)].iter().collect();
+        assert_eq!(
+            &tester
+                .invoke_wkb_array(vec![Some("POINT (1 2)"), None, Some("POINT 
(1 2)")])
+                .unwrap(),
+            &(Arc::new(expected_array) as ArrayRef)
+        );
+    }
+
+    #[rstest]
+    fn udf_srid_from_type(
+        #[values(
+            SedonaType::Wkb(Edges::Planar, lnglat()),
+            SedonaType::Wkb(Edges::Spherical, lnglat())
+        )]
+        sedona_type: SedonaType,
+    ) {
+        let udf = st_asewkb_udf();
+        let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
+
+        assert_eq!(
+            tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap(),
+            ScalarValue::Binary(Some(POINT12_LNGLAT.to_vec()))
+        );
+
+        assert_eq!(
+            tester.invoke_wkb_scalar(None).unwrap(),
+            ScalarValue::Binary(None)
+        );
+
+        let expected_array: BinaryArray = [Some(POINT12_LNGLAT), None, 
Some(POINT12_LNGLAT)]
+            .iter()
+            .collect();
+        assert_eq!(
+            &tester
+                .invoke_wkb_array(vec![Some("POINT (1 2)"), None, Some("POINT 
(1 2)")])
+                .unwrap(),
+            &(Arc::new(expected_array) as ArrayRef)
+        );
+    }
+
+    #[test]
+    fn udf_srid_from_item_crs() {
+        let udf = st_asewkb_udf();
+        let tester = ScalarUdfTester::new(udf.into(), 
vec![WKB_GEOMETRY_ITEM_CRS.clone()]);
+
+        let scalar_with_srid =
+            create_scalar_item_crs(Some("POINT (1 2)"), Some("EPSG:4326"), 
&WKB_GEOMETRY);
+        assert_eq!(
+            tester.invoke_scalar(scalar_with_srid).unwrap(),
+            ScalarValue::Binary(Some(POINT12_LNGLAT.to_vec()))
+        );
+
+        let array_with_srid = create_array_item_crs(
+            &[Some("POINT (1 2)"), None, Some("POINT (1 2)")],
+            [Some("EPSG:4326"), None, Some("EPSG:4326")],
+            &WKB_GEOMETRY,
+        );
+        let expected_array: BinaryArray = [Some(POINT12_LNGLAT), None, 
Some(POINT12_LNGLAT)]
+            .iter()
+            .collect();
+        assert_eq!(
+            &tester.invoke_array(array_with_srid).unwrap(),
+            &(Arc::new(expected_array) as ArrayRef)
+        );
+    }
+
+    #[test]
+    fn udf_invalid_type_crs() {
+        let udf = st_asewkb_udf();
+
+        let crs_where_srid_returns_none = 
deserialize_crs("EPSG:9999999999").unwrap();
+        let sedona_type = SedonaType::Wkb(Edges::Planar, 
crs_where_srid_returns_none);
+
+        let tester = ScalarUdfTester::new(udf.into(), vec![sedona_type]);
+        let err = tester.invoke_wkb_scalar(Some("POINT (1 2)")).unwrap_err();
+        assert_eq!(
+            err.message(),
+            "CRS epsg:9999999999 cannot be represented by a single SRID"
+        );
+    }
+
+    #[test]
+    fn udf_invalid_item_crs() {
+        let udf = st_asewkb_udf();
+        let tester = ScalarUdfTester::new(udf.into(), 
vec![WKB_GEOMETRY_ITEM_CRS.clone()]);
+
+        // Very large SRID
+        let scalar_with_srid_outside_u32 = create_scalar_item_crs(
+            Some("POINT (1 2)"),
+            Some("EPSG:999999999999"),
+            &WKB_GEOMETRY,
+        );
+        let err = tester
+            .invoke_scalar(scalar_with_srid_outside_u32)
+            .unwrap_err();
+        assert_eq!(
+            err.message(),
+            "CRS epsg:999999999999 cannot be represented by a single SRID"
+        );
+
+        // CRS that fails to parse in deserialize_crs()
+        let scalar_with_unparsable_crs = create_scalar_item_crs(
+            Some("POINT (1 2)"),
+            Some("This is invalid JSON and also not auth:code"),
+            &WKB_GEOMETRY,
+        );
+        let err = tester
+            .invoke_scalar(scalar_with_unparsable_crs)
+            .unwrap_err();
+        assert_eq!(
+            err.message(),
+            "Error deserializing PROJJSON Crs: expected value at line 1 column 
1"
+        );
+    }
+}
diff --git a/rust/sedona-geometry/src/ewkb_factory.rs 
b/rust/sedona-geometry/src/ewkb_factory.rs
new file mode 100644
index 00000000..9a54fa73
--- /dev/null
+++ b/rust/sedona-geometry/src/ewkb_factory.rs
@@ -0,0 +1,353 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::io::Write;
+
+use geo_traits::{
+    GeometryCollectionTrait, GeometryTrait, LineStringTrait, 
MultiLineStringTrait, MultiPointTrait,
+    MultiPolygonTrait, PointTrait, PolygonTrait,
+};
+use wkb::reader::Wkb;
+
+use crate::{error::SedonaGeometryError, wkb_factory::write_wkb_coord_trait};
+
+const EWKB_Z_BIT: u32 = 0x80000000;
+const EWKB_M_BIT: u32 = 0x40000000;
+const EWKB_SRID_BIT: u32 = 0x20000000;
+
+pub fn write_ewkb_geometry(
+    buf: &mut impl Write,
+    geom: &Wkb,
+    srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+    match geom.as_type() {
+        geo_traits::GeometryType::Point(p) => write_ewkb_point(buf, p, srid),
+        geo_traits::GeometryType::LineString(ls) => 
write_ewkb_line_string(buf, ls, srid),
+        geo_traits::GeometryType::Polygon(poly) => write_ewkb_polygon(buf, 
poly, srid),
+        geo_traits::GeometryType::MultiPoint(mp) => 
write_ewkb_multi_point(buf, mp, srid),
+        geo_traits::GeometryType::MultiLineString(mls) => {
+            write_ewkb_multi_line_string(buf, mls, srid)
+        }
+        geo_traits::GeometryType::MultiPolygon(mpoly) => 
write_ewkb_multi_polygon(buf, mpoly, srid),
+        geo_traits::GeometryType::GeometryCollection(gc) => {
+            write_ewkb_geometry_collection(buf, gc, srid)
+        }
+        _ => Err(SedonaGeometryError::Invalid(
+            "Unsupported EWKB geometry type".to_string(),
+        )),
+    }
+}
+
+fn write_ewkb_point(
+    buf: &mut impl Write,
+    geom: &wkb::reader::Point,
+    srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+    write_geometry_type_and_srid(1, geom.dimension(), srid, buf)?;
+    match geom.byte_order() {
+        wkb::Endianness::BigEndian => match geom.coord() {
+            Some(c) => write_wkb_coord_trait(buf, &c)?,
+            None => {
+                for _ in 0..geom.dim().size() {
+                    buf.write_all(&f64::NAN.to_le_bytes())?;
+                }
+            }
+        },
+        wkb::Endianness::LittleEndian => buf.write_all(geom.coord_slice())?,
+    }
+
+    Ok(())
+}
+
+fn write_ewkb_line_string(
+    buf: &mut impl Write,
+    geom: &wkb::reader::LineString,
+    srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+    write_geometry_type_and_srid(2, geom.dimension(), srid, buf)?;
+    let num_coords = geom.num_coords() as u32;
+    buf.write_all(&num_coords.to_le_bytes())?;
+    match geom.byte_order() {
+        wkb::Endianness::BigEndian => {
+            for c in geom.coords() {
+                write_wkb_coord_trait(buf, &c)?;
+            }
+        }
+        wkb::Endianness::LittleEndian => buf.write_all(geom.coords_slice())?,
+    }
+
+    Ok(())
+}
+
+fn write_linearring(
+    buf: &mut impl Write,
+    geom: &wkb::reader::LinearRing,
+) -> Result<(), SedonaGeometryError> {
+    let num_coords = geom.num_coords() as u32;
+    buf.write_all(&num_coords.to_le_bytes())?;
+    match geom.byte_order() {
+        wkb::Endianness::BigEndian => {
+            for c in geom.coords() {
+                write_wkb_coord_trait(buf, &c)?;
+            }
+        }
+        wkb::Endianness::LittleEndian => buf.write_all(geom.coords_slice())?,
+    }
+
+    Ok(())
+}
+
+fn write_ewkb_polygon(
+    buf: &mut impl Write,
+    geom: &wkb::reader::Polygon,
+    srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+    write_geometry_type_and_srid(3, geom.dimension(), srid, buf)?;
+    let num_rings = geom.num_interiors() as u32 + geom.exterior().is_some() as 
u32;
+    buf.write_all(&num_rings.to_le_bytes())?;
+
+    if let Some(exterior) = geom.exterior() {
+        write_linearring(buf, exterior)?;
+    }
+
+    for interior in geom.interiors() {
+        write_linearring(buf, interior)?;
+    }
+
+    Ok(())
+}
+
+fn write_ewkb_multi_point(
+    buf: &mut impl Write,
+    geom: &wkb::reader::MultiPoint,
+    srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+    write_geometry_type_and_srid(4, geom.dimension(), srid, buf)?;
+    let num_children = geom.num_points() as u32;
+    buf.write_all(&num_children.to_le_bytes())?;
+
+    for child in geom.points() {
+        write_ewkb_point(buf, &child, None)?;
+    }
+
+    Ok(())
+}
+
+fn write_ewkb_multi_line_string(
+    buf: &mut impl Write,
+    geom: &wkb::reader::MultiLineString,
+    srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+    write_geometry_type_and_srid(5, geom.dimension(), srid, buf)?;
+    let num_children = geom.num_line_strings() as u32;
+    buf.write_all(&num_children.to_le_bytes())?;
+
+    for child in geom.line_strings() {
+        write_ewkb_line_string(buf, child, None)?;
+    }
+
+    Ok(())
+}
+
+fn write_ewkb_multi_polygon(
+    buf: &mut impl Write,
+    geom: &wkb::reader::MultiPolygon,
+    srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+    write_geometry_type_and_srid(6, geom.dimension(), srid, buf)?;
+    let num_children = geom.num_polygons() as u32;
+    buf.write_all(&num_children.to_le_bytes())?;
+
+    for child in geom.polygons() {
+        write_ewkb_polygon(buf, child, None)?;
+    }
+
+    Ok(())
+}
+
+fn write_ewkb_geometry_collection(
+    buf: &mut impl Write,
+    geom: &wkb::reader::GeometryCollection,
+    srid: Option<u32>,
+) -> Result<(), SedonaGeometryError> {
+    write_geometry_type_and_srid(7, geom.dimension(), srid, buf)?;
+    let num_children = geom.num_geometries() as u32;
+    buf.write_all(&num_children.to_le_bytes())?;
+
+    for child in geom.geometries() {
+        write_ewkb_geometry(buf, child, None)?;
+    }
+
+    Ok(())
+}
+
+fn write_geometry_type_and_srid(
+    mut base_type: u32,
+    dimensions: wkb::reader::Dimension,
+    srid: Option<u32>,
+    buf: &mut impl Write,
+) -> Result<(), SedonaGeometryError> {
+    buf.write_all(&[0x01])?;
+
+    match dimensions {
+        wkb::reader::Dimension::Xy => {}
+        wkb::reader::Dimension::Xyz => base_type |= EWKB_Z_BIT,
+        wkb::reader::Dimension::Xym => base_type |= EWKB_M_BIT,
+        wkb::reader::Dimension::Xyzm => {
+            base_type |= EWKB_Z_BIT;
+            base_type |= EWKB_M_BIT;
+        }
+    }
+
+    if let Some(srid) = srid {
+        base_type |= EWKB_SRID_BIT;
+        buf.write_all(&base_type.to_le_bytes())?;
+        buf.write_all(&srid.to_le_bytes())?;
+    } else {
+        buf.write_all(&base_type.to_le_bytes())?;
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod test {
+
+    use rstest::rstest;
+    use wkb::{writer::WriteOptions, Endianness};
+
+    use super::*;
+
+    #[rstest]
+    fn test_roundtrip(
+        #[values(Endianness::LittleEndian, Endianness::BigEndian)] endianness: 
Endianness,
+    ) {
+        for wkt_str in ROUNDTRIP_CASES {
+            let wkt: wkt::Wkt<f64> = wkt_str.parse().unwrap();
+
+            let mut iso_wkb = Vec::new();
+            wkb::writer::write_geometry(&mut iso_wkb, &wkt, &WriteOptions { 
endianness }).unwrap();
+            let wkb_geom = wkb::reader::read_wkb(&iso_wkb).unwrap();
+
+            let mut ewkb_no_srid = Vec::new();
+            write_ewkb_geometry(&mut ewkb_no_srid, &wkb_geom, None).unwrap();
+
+            let mut ewkb_with_srid = Vec::new();
+            write_ewkb_geometry(&mut ewkb_with_srid, &wkb_geom, 
Some(4326)).unwrap();
+
+            // Check that the ewkbs have the correct number of bytes
+            assert_eq!(
+                ewkb_no_srid.len(),
+                iso_wkb.len(),
+                "incorrect number of bytes for case {wkt_str} without srid"
+            );
+            assert_eq!(
+                ewkb_with_srid.len(),
+                ewkb_no_srid.len() + size_of::<u32>(),
+                "incorrect number of bytes for case {wkt_str} with srid"
+            );
+
+            // Check the rendered WKT of the no srid EWKB
+            let wkb_geom_roundtrip_no_srid = 
wkb::reader::read_wkb(&ewkb_no_srid).unwrap();
+            let mut wkt_roundtrip_no_srid = String::new();
+            wkt::to_wkt::write_geometry(&mut wkt_roundtrip_no_srid, 
&wkb_geom_roundtrip_no_srid)
+                .unwrap();
+            assert_eq!(wkt_roundtrip_no_srid, wkt_str);
+
+            // Check the rendered WKT of the srid EWKB
+            let wkb_geom_roundtrip_with_srid = 
wkb::reader::read_wkb(&ewkb_with_srid).unwrap();
+            let mut wkt_roundtrip_with_srid = String::new();
+            wkt::to_wkt::write_geometry(
+                &mut wkt_roundtrip_with_srid,
+                &wkb_geom_roundtrip_with_srid,
+            )
+            .unwrap();
+            assert_eq!(wkt_roundtrip_with_srid, wkt_str);
+        }
+    }
+
+    const ROUNDTRIP_CASES: [&str; 60] = [
+        // XY dimensions
+        "POINT(1 2)",
+        "LINESTRING(1 2,3 4,5 6)",
+        "POLYGON((0 1,2 0,2 3,0 3,0 1))",
+        "MULTIPOINT((1 2),(3 4))",
+        "MULTILINESTRING((1 2,3 4),(5 6,7 8))",
+        "MULTIPOLYGON(((0 1,2 0,2 3,0 3,0 1)))",
+        "GEOMETRYCOLLECTION(POINT(1 2),LINESTRING(3 4,5 6))",
+        "GEOMETRYCOLLECTION(GEOMETRYCOLLECTION(POINT(1 2)))",
+        // XYZ dimensions
+        "POINT Z(1 2 3)",
+        "LINESTRING Z(1 2 3,4 5 6)",
+        "POLYGON Z((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2))",
+        "MULTIPOINT Z((1 2 3),(4 5 6))",
+        "MULTILINESTRING Z((1 2 3,4 5 6),(7 8 9,10 11 12))",
+        "MULTIPOLYGON Z(((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2)))",
+        "GEOMETRYCOLLECTION Z(POINT Z(1 2 3))",
+        "GEOMETRYCOLLECTION Z(GEOMETRYCOLLECTION Z(POINT Z(1 2 3)))",
+        // XYM dimensions
+        "POINT M(1 2 3)",
+        "LINESTRING M(1 2 3,4 5 6)",
+        "POLYGON M((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2))",
+        "MULTIPOINT M((1 2 3),(4 5 6))",
+        "MULTILINESTRING M((1 2 3,4 5 6),(7 8 9,10 11 12))",
+        "MULTIPOLYGON M(((0 1 2,3 0 2,3 4 2,0 4 2,0 1 2)))",
+        "GEOMETRYCOLLECTION M(POINT M(1 2 3))",
+        "GEOMETRYCOLLECTION M(GEOMETRYCOLLECTION M(POINT M(1 2 3)))",
+        // XYZM dimensions
+        "POINT ZM(1 2 3 4)",
+        "LINESTRING ZM(1 2 3 4,5 6 7 8)",
+        "POLYGON ZM((0 1 2 3,4 0 2 3,4 5 2 3,0 5 2 3,0 1 2 3))",
+        "MULTIPOINT ZM((1 2 3 4),(5 6 7 8))",
+        "MULTILINESTRING ZM((1 2 3 4,5 6 7 8),(9 10 11 12,13 14 15 16))",
+        "MULTIPOLYGON ZM(((0 1 2 3,4 0 2 3,4 5 2 3,0 5 2 3,0 1 2 3)))",
+        "GEOMETRYCOLLECTION ZM(POINT ZM(1 2 3 4))",
+        "GEOMETRYCOLLECTION ZM(GEOMETRYCOLLECTION ZM(POINT ZM(1 2 3 4)))",
+        // Empty geometries
+        "POINT EMPTY",
+        "LINESTRING EMPTY",
+        "POLYGON EMPTY",
+        "MULTIPOINT EMPTY",
+        "MULTILINESTRING EMPTY",
+        "MULTIPOLYGON EMPTY",
+        "GEOMETRYCOLLECTION EMPTY",
+        // Empty geometries Z
+        "POINT Z EMPTY",
+        "LINESTRING Z EMPTY",
+        "POLYGON Z EMPTY",
+        "MULTIPOINT Z EMPTY",
+        "MULTILINESTRING Z EMPTY",
+        "MULTIPOLYGON Z EMPTY",
+        "GEOMETRYCOLLECTION Z EMPTY",
+        // Empty geometries M
+        "POINT M EMPTY",
+        "LINESTRING M EMPTY",
+        "POLYGON M EMPTY",
+        "MULTIPOINT M EMPTY",
+        "MULTILINESTRING M EMPTY",
+        "MULTIPOLYGON M EMPTY",
+        "GEOMETRYCOLLECTION M EMPTY",
+        // Empty geometries ZM
+        "POINT ZM EMPTY",
+        "LINESTRING ZM EMPTY",
+        "POLYGON ZM EMPTY",
+        "MULTIPOINT ZM EMPTY",
+        "MULTILINESTRING ZM EMPTY",
+        "MULTIPOLYGON ZM EMPTY",
+        "GEOMETRYCOLLECTION ZM EMPTY",
+    ];
+}
diff --git a/rust/sedona-geometry/src/lib.rs b/rust/sedona-geometry/src/lib.rs
index f189ec7b..5de9f65e 100644
--- a/rust/sedona-geometry/src/lib.rs
+++ b/rust/sedona-geometry/src/lib.rs
@@ -18,6 +18,7 @@ pub mod analyze;
 pub mod bounding_box;
 pub mod bounds;
 pub mod error;
+pub mod ewkb_factory;
 pub mod interval;
 pub mod is_empty;
 pub mod point_count;
diff --git a/rust/sedona-schema/src/crs.rs b/rust/sedona-schema/src/crs.rs
index a4649b15..3813df11 100644
--- a/rust/sedona-schema/src/crs.rs
+++ b/rust/sedona-schema/src/crs.rs
@@ -14,7 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use datafusion_common::{DataFusionError, Result};
+use datafusion_common::{plan_datafusion_err, plan_err, DataFusionError, 
Result};
 use lru::LruCache;
 use std::cell::RefCell;
 use std::fmt::{Debug, Display};
@@ -318,9 +318,8 @@ impl FromStr for ProjJSON {
     type Err = DataFusionError;
 
     fn from_str(s: &str) -> Result<Self> {
-        let value: Value = serde_json::from_str(s).map_err(|err| {
-            DataFusionError::Internal(format!("Error deserializing PROJJSON 
Crs: {err}"))
-        })?;
+        let value: Value = serde_json::from_str(s)
+            .map_err(|err| plan_datafusion_err!("Error deserializing PROJJSON 
Crs: {err}"))?;
 
         Self::try_new(value)
     }
@@ -329,9 +328,7 @@ impl FromStr for ProjJSON {
 impl ProjJSON {
     pub fn try_new(value: Value) -> Result<Self> {
         if !value.is_object() {
-            return Err(DataFusionError::Internal(format!(
-                "Can't create PROJJSON from non-object: {value}"
-            )));
+            return plan_err!("Can't create PROJJSON from non-object: {value}");
         }
 
         Ok(Self { value })


Reply via email to