paleolimbot commented on code in PR #90:
URL: https://github.com/apache/sedona-db/pull/90#discussion_r2355657323


##########
rust/sedona-functions/src/executor.rs:
##########
@@ -246,6 +246,58 @@ impl GeometryFactory for WkbGeometryFactory {
     }
 }
 
+#[derive(Default)]
+pub(crate) struct WkbBytesFactory {}
+
+impl GeometryFactory for WkbBytesFactory {
+    type Geom<'a> = &'a [u8];
+
+    fn try_from_wkb<'a>(&self, wkb_bytes: &'a [u8]) -> Result<Self::Geom<'a>> {
+        Ok(wkb_bytes)
+    }
+}
+
+/// Alias for an executor that iterates over geometries in their raw [Wkb] 
bytes.
+///
+/// This [GenericExecutor] implementation provides more optimization 
opportunities,
+/// but it requires additional manual processing of the raw [Wkb] bytes 
compared to
+/// the [WkbExecutor].
+pub(crate) type WkbBytesExecutor<'a, 'b> =
+    GenericExecutor<'a, 'b, WkbBytesFactory, WkbBytesFactory>;
+
+impl<'b> GenericExecutor<'_, 'b, WkbBytesFactory, WkbBytesFactory> {
+    /// Execute a function by iterating over [Wkb]'s raw binary representation.
+    /// The provided `func` can assume its input bytes is a valid [Wkb] binary.
+    pub fn execute_wkb_bytes_void<F: FnMut(Option<&'b [u8]>) -> Result<()>>(
+        &self,
+        mut func: F,
+    ) -> Result<()> {
+        // Ensure the first argument of the executor is either Wkb, WkbView, or
+        // a Null type (to support columns of all-null values)
+        match &self.arg_types[0] {
+            SedonaType::Wkb(_, _)
+            | SedonaType::WkbView(_, _)
+            | SedonaType::Arrow(DataType::Null) => {}
+            other => {
+                return sedona_internal_err!(
+                    "Expected SedonaType::Wkb or SedonaType::WkbView or 
SedonaType::Arrow(DataType::Null) for the first arg, got {}",
+                    other
+                )
+            }
+        }
+
+        match &self.args[0] {
+            ColumnarValue::Array(array) => {
+                array.iter_as_wkb_bytes(&self.arg_types[0], 
self.num_iterations, func)
+            }
+            ColumnarValue::Scalar(scalar_value) => {
+                let maybe_bytes = scalar_value.scalar_as_wkb_bytes()?;
+                func(maybe_bytes)
+            }
+        }
+    }
+}
+

Review Comment:
   (See the below change...I am almost positive that 
`executor.execute_wkb_void()` does exactly this already unless I'm missing 
something!)
   
   ```suggestion
   
   ```



##########
rust/sedona-functions/src/st_geometrytype.rs:
##########
@@ -67,16 +65,16 @@ impl SedonaScalarKernel for STGeometryType {
         arg_types: &[SedonaType],
         args: &[ColumnarValue],
     ) -> Result<ColumnarValue> {
-        let executor = WkbExecutor::new(arg_types, args);
-        let min_output_size = "POINT".len() * executor.num_iterations();
+        let executor = WkbBytesExecutor::new(arg_types, args);
+        let min_output_size = "ST_POINT".len() * executor.num_iterations();
         let mut builder = 
StringBuilder::with_capacity(executor.num_iterations(), min_output_size);
 
-        // We can do quite a lot better than this with some vectorized WKB 
processing,
-        // but for now we just do a slow iteration
-        executor.execute_wkb_void(|maybe_item| {
-            match maybe_item {
-                Some(item) => {
-                    builder.append_option(invoke_scalar(&item)?);
+        // Iterate over raw WKB bytes for faster type inference
+        executor.execute_wkb_bytes_void(|maybe_bytes| {

Review Comment:
   ```suggestion
           executor.execute_wkb_void(|maybe_bytes| {
   ```



##########
rust/sedona-functions/src/executor.rs:
##########
@@ -246,6 +246,58 @@ impl GeometryFactory for WkbGeometryFactory {
     }
 }
 
+#[derive(Default)]
+pub(crate) struct WkbBytesFactory {}

Review Comment:
   Optional, but this will probably be useful in a number of places!
   
   ```suggestion
   /// A [GeometryFactory] whose geometry type are raw WKB bytes
   ///
   /// Using this geometry factory iterates over items as references to the raw 
underlying
   /// bytes, which is useful for writing optimized kernels that do not need 
the full buffer to
   /// be validated and/or parsed.
   #[derive(Default)]
   pub struct WkbBytesFactory {}
   ```



##########
rust/sedona-functions/src/st_geometrytype.rs:
##########
@@ -87,20 +85,35 @@ impl SedonaScalarKernel for STGeometryType {
     }
 }
 
-fn invoke_scalar(item: &Wkb) -> Result<Option<String>> {
-    match item.as_type() {
-        geo_traits::GeometryType::Point(_) => Ok(Some("ST_Point".to_string())),
-        geo_traits::GeometryType::LineString(_) => 
Ok(Some("ST_LineString".to_string())),
-        geo_traits::GeometryType::Polygon(_) => 
Ok(Some("ST_Polygon".to_string())),
-        geo_traits::GeometryType::MultiPoint(_) => 
Ok(Some("ST_MultiPoint".to_string())),
-        geo_traits::GeometryType::MultiLineString(_) => 
Ok(Some("ST_MultiLineString".to_string())),
-        geo_traits::GeometryType::MultiPolygon(_) => 
Ok(Some("ST_MultiPolygon".to_string())),
-        geo_traits::GeometryType::GeometryCollection(_) => {
-            Ok(Some("ST_GeometryCollection".to_string()))
-        }
-
-        // Other geometry types in geo that we should not get here: Rect, 
Triangle, Line
-        _ => sedona_internal_err!("unexpected geometry type"),
+/// Fast-path inference of geometry type name from raw WKB bytes
+/// An error will be thrown for invalid WKB bytes input
+///
+/// Spec: https://libgeos.org/specifications/wkb/
+///
+/// TODO: Move it to `Wkb` crate

Review Comment:
   ```suggestion
   ```
   
   (We have a lot of stuff like this, both in this crate and the internal 
sedona-geometry)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@sedona.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to