paleolimbot commented on code in PR #241:
URL: https://github.com/apache/sedona-db/pull/241#discussion_r2462420329


##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -41,41 +54,37 @@ struct STBuffer {}
 
 impl SedonaScalarKernel for STBuffer {
     fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
-        let matcher = ArgMatcher::new(
-            vec![ArgMatcher::is_geometry(), ArgMatcher::is_numeric()],
-            WKB_GEOMETRY,
-        );
+        if !(2..=4).contains(&args.len()) {
+            return Err(DataFusionError::Plan(format!(
+                "ST_Buffer expects 2-4 arguments, got {}",
+                args.len()
+            )));
+        }
+
+        let mut matchers = vec![ArgMatcher::is_geometry(), 
ArgMatcher::is_numeric()];
 
-        matcher.match_args(args)
+        if args.len() >= 3 {
+            matchers.push(ArgMatcher::is_boolean());
+        }
+        if args.len() == 4 {
+            matchers.push(ArgMatcher::is_string());
+        }
+
+        ArgMatcher::new(matchers, WKB_GEOMETRY).match_args(args)
     }
 
     fn invoke_batch(
         &self,
         arg_types: &[SedonaType],
         args: &[ColumnarValue],
     ) -> Result<ColumnarValue> {
-        // Default params
-        let params_builder = BufferParams::builder();
+        // Extract Args
+        let distance: Option<f64> = extract_optional_f64(&args[1])?;

Review Comment:
   For the distance parameter, I think that it is worth the effort to allow 
this to be an array (and I have confidence in your ability to do so 🙂 ). The 
pattern we use for this in other places is something like:
   
   ```rust
   let distance_value = 
args[1].cast_to(&DataType::Float64)?.to_array(executor.num_iterations());
   let distance_array = if let ColumnarValue::Array(array_ref) = distance_value 
{
     as_float64_array(array_ref)?
   } else {
     return sedona_internal_err!("Unexpected scalar value");
   };
   
   let mut distance_iter = distance_array.iter();
   
   // ...
   
   executor.execute_wkb_void(|wkb| {
      match (wkb, distance_iter.next().unwrap() {
          (Some(wkb_item), Some(distance_item)) => { invoke_scalar(...) }
          _ => { /* append null */ }
      }
   })
   ```



##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -41,41 +54,37 @@ struct STBuffer {}
 
 impl SedonaScalarKernel for STBuffer {
     fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
-        let matcher = ArgMatcher::new(
-            vec![ArgMatcher::is_geometry(), ArgMatcher::is_numeric()],
-            WKB_GEOMETRY,
-        );
+        if !(2..=4).contains(&args.len()) {
+            return Err(DataFusionError::Plan(format!(
+                "ST_Buffer expects 2-4 arguments, got {}",
+                args.len()
+            )));

Review Comment:
   The kernel system is really intended have multiple `SedonaScalarKernel`s 
that handle the matching of the various numbers/types of arguments. For example:
   
   ```rust
   struct STBuffer{};
   
   impl SedonaScalarKernel for STBufferStyle {
       fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> 
{
           let matcher = ArgMatcher::new(
               vec![ArgMatcher::is_geometry(), ArgMatcher::is_numeric()],
               WKB_GEOMETRY,
           );
           matcher.match_args(args)
       }
   
       fn invoke_batch(...) { invoke_batch_impl(...) }
   }
   
   struct STBufferStyle{};
   
   impl SedonaScalarKernel for STBuffer {
       fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> 
{
           let matcher = ArgMatcher::new(
               vec![ArgMatcher::is_geometry(), ArgMatcher::is_numeric(), 
ArgMatcher::is_string()],
               WKB_GEOMETRY,
           );
           matcher.match_args(args)
       }
   
       fn invoke_batch(...) { invoke_batch_impl(...) }
   }
   
   fn invoke_batch_impl(...) { ... }
   ```



##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -115,6 +124,136 @@ fn invoke_scalar(
     Ok(())
 }
 
+fn extract_optional_f64(arg: &ColumnarValue) -> Result<Option<f64>> {
+    let casted = arg.cast_to(&DataType::Float64, None)?;
+    match &casted {
+        ColumnarValue::Scalar(scalar) if !scalar.is_null() => {
+            Ok(Some(f64::try_from(scalar.clone())?))
+        }
+        ColumnarValue::Scalar(_) => Ok(None),
+        _ => Err(DataFusionError::Execution(format!(
+            "Expected scalar distance, got: {:?}",
+            arg
+        ))),
+    }
+}
+
+fn extract_optional_bool(arg: Option<&ColumnarValue>) -> Result<Option<bool>> {
+    let Some(arg) = arg else { return Ok(None) };
+    let casted = arg.cast_to(&DataType::Boolean, None)?;
+    match &casted {
+        ColumnarValue::Scalar(scalar) if !scalar.is_null() => {
+            Ok(Some(bool::try_from(scalar.clone())?))
+        }
+        ColumnarValue::Scalar(_) => Ok(None),
+        _ => Err(DataFusionError::Execution(format!(
+            "Expected scalar useSpheroid parameter, got: {:?}",
+            arg
+        ))),
+    }
+}
+
+fn extract_optional_string(arg: Option<&ColumnarValue>) -> 
Result<Option<String>> {
+    let Some(arg) = arg else { return Ok(None) };
+    let casted = arg.cast_to(&DataType::Utf8, None)?;
+    match &casted {
+        ColumnarValue::Scalar(ScalarValue::Utf8(Some(s)) | 
ScalarValue::LargeUtf8(Some(s))) => {
+            Ok(Some(s.clone()))
+        }
+        ColumnarValue::Scalar(scalar) if scalar.is_null() => Ok(None),
+        ColumnarValue::Scalar(_) => Ok(None),
+        _ => Err(DataFusionError::Execution(format!(
+            "Expected scalar bufferStyleParameters, got: {:?}",
+            arg
+        ))),
+    }
+}
+
+fn parse_buffer_params(params_str: Option<&str>) -> Result<BufferParams> {

Review Comment:
   Given the complexity here, I think it is worth writing dedicated tests for 
this function below, including the error messages. (In many places we do `let 
err = parse_buffer_params(...).unwrap_err(); assert_eq!(err.message(), "...")`).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to