martin-g commented on code in PR #241:
URL: https://github.com/apache/sedona-db/pull/241#discussion_r2464631356
##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -54,50 +67,73 @@ impl SedonaScalarKernel for STBuffer {
arg_types: &[SedonaType],
args: &[ColumnarValue],
) -> Result<ColumnarValue> {
- // Default params
- let params_builder = BufferParams::builder();
+ invoke_batch_impl(arg_types, args)
+ }
+}
- let params = params_builder
- .build()
- .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
- // Extract the constant scalar value before looping over the input
geometries
- let distance: Option<f64>;
- let arg1 = args[1].cast_to(&DataType::Float64, None)?;
- if let ColumnarValue::Scalar(scalar_arg) = &arg1 {
- if scalar_arg.is_null() {
- distance = None;
- } else {
- distance = Some(f64::try_from(scalar_arg.clone())?);
- }
- } else {
- return Err(DataFusionError::Execution(format!(
- "Invalid distance: {:?}",
- args[1]
- )));
- }
+pub fn st_buffer_style_impl() -> ScalarKernelRef {
+ Arc::new(STBufferStyle {})
+}
+#[derive(Debug)]
+struct STBufferStyle {}
Review Comment:
I wonder whether the second struct is really needed.
Could you use an optional ArgMatcher
(https://github.com/apache/sedona-db/blob/8e6234c7401fc72895752558cc6aeaedd1ff470a/rust/sedona-schema/src/matchers.rs#L202)
for the third parameter and keep all the logic in STBuffer ?!
##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -54,50 +67,73 @@ impl SedonaScalarKernel for STBuffer {
arg_types: &[SedonaType],
args: &[ColumnarValue],
) -> Result<ColumnarValue> {
- // Default params
- let params_builder = BufferParams::builder();
+ invoke_batch_impl(arg_types, args)
+ }
+}
- let params = params_builder
- .build()
- .map_err(|e| DataFusionError::External(Box::new(e)))?;
-
- // Extract the constant scalar value before looping over the input
geometries
- let distance: Option<f64>;
- let arg1 = args[1].cast_to(&DataType::Float64, None)?;
- if let ColumnarValue::Scalar(scalar_arg) = &arg1 {
- if scalar_arg.is_null() {
- distance = None;
- } else {
- distance = Some(f64::try_from(scalar_arg.clone())?);
- }
- } else {
- return Err(DataFusionError::Execution(format!(
- "Invalid distance: {:?}",
- args[1]
- )));
- }
+pub fn st_buffer_style_impl() -> ScalarKernelRef {
+ Arc::new(STBufferStyle {})
+}
+#[derive(Debug)]
+struct STBufferStyle {}
- let executor = GeosExecutor::new(arg_types, args);
- let mut builder = BinaryBuilder::with_capacity(
- executor.num_iterations(),
- WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
- );
- executor.execute_wkb_void(|wkb| {
- match (wkb, distance) {
- (Some(wkb), Some(distance)) => {
- invoke_scalar(&wkb, distance, ¶ms, &mut builder)?;
- builder.append_value([]);
- }
- _ => builder.append_null(),
- }
+impl SedonaScalarKernel for STBufferStyle {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(
+ vec![
+ ArgMatcher::is_geometry(),
+ ArgMatcher::is_numeric(),
+ ArgMatcher::is_string(),
+ ],
+ WKB_GEOMETRY,
+ );
- Ok(())
- })?;
+ matcher.match_args(args)
+ }
- executor.finish(Arc::new(builder.finish()))
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ invoke_batch_impl(arg_types, args)
}
}
+fn invoke_batch_impl(arg_types: &[SedonaType], args: &[ColumnarValue]) ->
Result<ColumnarValue> {
+ let executor = GeosExecutor::new(arg_types, args);
+ let mut builder = BinaryBuilder::with_capacity(
+ executor.num_iterations(),
+ WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+ );
+
+ // Extract Args
+ let distance_value = args[1]
+ .cast_to(&DataType::Float64, None)?
+ .to_array(executor.num_iterations())?;
+ let distance_array = as_float64_array(&distance_value)?;
+ let mut distance_iter = distance_array.iter();
+
+ let buffer_style_params = extract_optional_string(args.get(2))?;
+
+ // Build BufferParams based on style parameters
+ let params = parse_buffer_params(buffer_style_params.as_deref())?;
+
+ executor.execute_wkb_void(|wkb| {
+ match (wkb, distance_iter.next().unwrap()) {
Review Comment:
Is it safe to call `.unwrap()` here ?
I.e. do you want to panic when there is a problem or an Err could be
returned and let the caller decide what to do with it?
##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -115,6 +151,115 @@ fn invoke_scalar(
Ok(())
}
+fn extract_optional_string(arg: Option<&ColumnarValue>) ->
Result<Option<String>> {
+ let Some(arg) = arg else { return Ok(None) };
+ let casted = arg.cast_to(&DataType::Utf8, None)?;
+ match &casted {
+ ColumnarValue::Scalar(ScalarValue::Utf8(Some(s)) |
ScalarValue::LargeUtf8(Some(s))) => {
+ Ok(Some(s.clone()))
+ }
+ ColumnarValue::Scalar(scalar) if scalar.is_null() => Ok(None),
+ ColumnarValue::Scalar(_) => Ok(None),
+ _ => Err(DataFusionError::Execution(format!(
+ "Expected scalar bufferStyleParameters, got: {:?}",
+ arg
+ ))),
+ }
+}
+
+fn parse_buffer_params(params_str: Option<&str>) -> Result<BufferParams> {
+ let Some(params_str) = params_str else {
+ return BufferParams::builder()
+ .build()
+ .map_err(|e| DataFusionError::External(Box::new(e)));
+ };
+
+ let mut params_builder = BufferParams::builder();
+
+ for param in params_str.split_whitespace() {
+ let Some((key, value)) = param.split_once('=') else {
+ return Err(DataFusionError::Execution(format!(
+ "Missing value for buffer parameter: {}",
+ param
+ )));
+ };
+
+ match key.to_lowercase().as_str() {
+ "endcap" => {
+ params_builder =
params_builder.end_cap_style(parse_cap_style(value)?);
+ }
+ "join" => {
+ params_builder =
params_builder.join_style(parse_join_style(value)?);
+ }
+ "side" => {
+ params_builder =
params_builder.single_sided(parse_side(value)?);
+ }
+ "mitre_limit" | "miter_limit" => {
+ let limit: f64 = parse_number(value, "mitre_limit")?;
+ params_builder = params_builder.mitre_limit(limit);
+ }
+ "quad_segs" | "quadrant_segments" => {
+ let segs = parse_number(value, "quadrant_segments")?;
+ params_builder = params_builder.quadrant_segments(segs);
+ }
+ _ => {
+ return Err(DataFusionError::Execution(format!(
+ "Invalid buffer parameter: {} (accept: 'endcap', 'join',
'mitre_limit', 'miter_limit', 'quad_segs' and 'side')",
+ key.to_lowercase()
Review Comment:
IMO it would be clearer for the developer to see the original key here
instead of its lowercase version.
##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -115,6 +151,115 @@ fn invoke_scalar(
Ok(())
}
+fn extract_optional_string(arg: Option<&ColumnarValue>) ->
Result<Option<String>> {
+ let Some(arg) = arg else { return Ok(None) };
+ let casted = arg.cast_to(&DataType::Utf8, None)?;
+ match &casted {
+ ColumnarValue::Scalar(ScalarValue::Utf8(Some(s)) |
ScalarValue::LargeUtf8(Some(s))) => {
+ Ok(Some(s.clone()))
+ }
+ ColumnarValue::Scalar(scalar) if scalar.is_null() => Ok(None),
+ ColumnarValue::Scalar(_) => Ok(None),
+ _ => Err(DataFusionError::Execution(format!(
+ "Expected scalar bufferStyleParameters, got: {:?}",
+ arg
+ ))),
+ }
+}
+
+fn parse_buffer_params(params_str: Option<&str>) -> Result<BufferParams> {
+ let Some(params_str) = params_str else {
+ return BufferParams::builder()
+ .build()
+ .map_err(|e| DataFusionError::External(Box::new(e)));
+ };
+
+ let mut params_builder = BufferParams::builder();
+
+ for param in params_str.split_whitespace() {
+ let Some((key, value)) = param.split_once('=') else {
+ return Err(DataFusionError::Execution(format!(
+ "Missing value for buffer parameter: {}",
+ param
+ )));
+ };
+
+ match key.to_lowercase().as_str() {
+ "endcap" => {
+ params_builder =
params_builder.end_cap_style(parse_cap_style(value)?);
+ }
+ "join" => {
+ params_builder =
params_builder.join_style(parse_join_style(value)?);
+ }
+ "side" => {
+ params_builder =
params_builder.single_sided(parse_side(value)?);
+ }
+ "mitre_limit" | "miter_limit" => {
+ let limit: f64 = parse_number(value, "mitre_limit")?;
+ params_builder = params_builder.mitre_limit(limit);
+ }
+ "quad_segs" | "quadrant_segments" => {
+ let segs = parse_number(value, "quadrant_segments")?;
+ params_builder = params_builder.quadrant_segments(segs);
+ }
+ _ => {
+ return Err(DataFusionError::Execution(format!(
+ "Invalid buffer parameter: {} (accept: 'endcap', 'join',
'mitre_limit', 'miter_limit', 'quad_segs' and 'side')",
+ key.to_lowercase()
+ )));
+ }
+ }
+ }
+
+ params_builder
+ .build()
+ .map_err(|e| DataFusionError::External(Box::new(e)))
+}
+
+fn parse_cap_style(value: &str) -> Result<CapStyle> {
+ match value.to_lowercase().as_str() {
+ "round" => Ok(CapStyle::Round),
+ "flat" | "butt" => Ok(CapStyle::Flat),
+ "square" => Ok(CapStyle::Square),
+ _ => Err(DataFusionError::Execution(format!(
+ "Invalid endcap style: '{}'. Valid options: round, flat, butt,
square",
+ value
+ ))),
+ }
+}
+
+fn parse_join_style(value: &str) -> Result<JoinStyle> {
+ match value.to_lowercase().as_str() {
Review Comment:
nit: this will cause allocations.
You may want to use eq_ignore_ascii_case() instead.
##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -115,6 +151,115 @@ fn invoke_scalar(
Ok(())
}
+fn extract_optional_string(arg: Option<&ColumnarValue>) ->
Result<Option<String>> {
+ let Some(arg) = arg else { return Ok(None) };
+ let casted = arg.cast_to(&DataType::Utf8, None)?;
+ match &casted {
+ ColumnarValue::Scalar(ScalarValue::Utf8(Some(s)) |
ScalarValue::LargeUtf8(Some(s))) => {
+ Ok(Some(s.clone()))
+ }
+ ColumnarValue::Scalar(scalar) if scalar.is_null() => Ok(None),
+ ColumnarValue::Scalar(_) => Ok(None),
+ _ => Err(DataFusionError::Execution(format!(
+ "Expected scalar bufferStyleParameters, got: {:?}",
+ arg
+ ))),
+ }
+}
+
+fn parse_buffer_params(params_str: Option<&str>) -> Result<BufferParams> {
+ let Some(params_str) = params_str else {
+ return BufferParams::builder()
+ .build()
+ .map_err(|e| DataFusionError::External(Box::new(e)));
+ };
+
+ let mut params_builder = BufferParams::builder();
+
+ for param in params_str.split_whitespace() {
+ let Some((key, value)) = param.split_once('=') else {
+ return Err(DataFusionError::Execution(format!(
+ "Missing value for buffer parameter: {}",
+ param
+ )));
+ };
+
+ match key.to_lowercase().as_str() {
+ "endcap" => {
+ params_builder =
params_builder.end_cap_style(parse_cap_style(value)?);
+ }
+ "join" => {
+ params_builder =
params_builder.join_style(parse_join_style(value)?);
+ }
+ "side" => {
+ params_builder =
params_builder.single_sided(parse_side(value)?);
+ }
+ "mitre_limit" | "miter_limit" => {
+ let limit: f64 = parse_number(value, "mitre_limit")?;
+ params_builder = params_builder.mitre_limit(limit);
+ }
+ "quad_segs" | "quadrant_segments" => {
+ let segs = parse_number(value, "quadrant_segments")?;
+ params_builder = params_builder.quadrant_segments(segs);
+ }
+ _ => {
+ return Err(DataFusionError::Execution(format!(
+ "Invalid buffer parameter: {} (accept: 'endcap', 'join',
'mitre_limit', 'miter_limit', 'quad_segs' and 'side')",
+ key.to_lowercase()
+ )));
+ }
+ }
+ }
+
+ params_builder
+ .build()
+ .map_err(|e| DataFusionError::External(Box::new(e)))
+}
+
+fn parse_cap_style(value: &str) -> Result<CapStyle> {
+ match value.to_lowercase().as_str() {
+ "round" => Ok(CapStyle::Round),
+ "flat" | "butt" => Ok(CapStyle::Flat),
+ "square" => Ok(CapStyle::Square),
+ _ => Err(DataFusionError::Execution(format!(
+ "Invalid endcap style: '{}'. Valid options: round, flat, butt,
square",
+ value
+ ))),
+ }
+}
+
+fn parse_join_style(value: &str) -> Result<JoinStyle> {
+ match value.to_lowercase().as_str() {
+ "round" => Ok(JoinStyle::Round),
+ "mitre" | "miter" => Ok(JoinStyle::Mitre),
+ "bevel" => Ok(JoinStyle::Bevel),
+ _ => Err(DataFusionError::Execution(format!(
+ "Invalid join style: '{}'. Valid options: round, mitre, miter,
bevel",
+ value
+ ))),
+ }
+}
+
+fn parse_side(value: &str) -> Result<bool> {
Review Comment:
```suggestion
fn is_single_sided(value: &str) -> Result<bool> {
```
nit: I think `is_single_sided` would be a better name here.
`parse_side` would be more appropriate if it was returning a `Side` enum
(with variants BOTH, LEFT and RIGHT) and this enum could have a method
`is_single_sided`
##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -115,6 +151,115 @@ fn invoke_scalar(
Ok(())
}
+fn extract_optional_string(arg: Option<&ColumnarValue>) ->
Result<Option<String>> {
+ let Some(arg) = arg else { return Ok(None) };
+ let casted = arg.cast_to(&DataType::Utf8, None)?;
+ match &casted {
+ ColumnarValue::Scalar(ScalarValue::Utf8(Some(s)) |
ScalarValue::LargeUtf8(Some(s))) => {
+ Ok(Some(s.clone()))
+ }
+ ColumnarValue::Scalar(scalar) if scalar.is_null() => Ok(None),
+ ColumnarValue::Scalar(_) => Ok(None),
+ _ => Err(DataFusionError::Execution(format!(
+ "Expected scalar bufferStyleParameters, got: {:?}",
+ arg
+ ))),
+ }
+}
+
+fn parse_buffer_params(params_str: Option<&str>) -> Result<BufferParams> {
+ let Some(params_str) = params_str else {
+ return BufferParams::builder()
+ .build()
+ .map_err(|e| DataFusionError::External(Box::new(e)));
+ };
+
+ let mut params_builder = BufferParams::builder();
+
+ for param in params_str.split_whitespace() {
+ let Some((key, value)) = param.split_once('=') else {
+ return Err(DataFusionError::Execution(format!(
+ "Missing value for buffer parameter: {}",
+ param
+ )));
+ };
+
+ match key.to_lowercase().as_str() {
+ "endcap" => {
+ params_builder =
params_builder.end_cap_style(parse_cap_style(value)?);
+ }
+ "join" => {
+ params_builder =
params_builder.join_style(parse_join_style(value)?);
+ }
+ "side" => {
+ params_builder =
params_builder.single_sided(parse_side(value)?);
+ }
+ "mitre_limit" | "miter_limit" => {
+ let limit: f64 = parse_number(value, "mitre_limit")?;
+ params_builder = params_builder.mitre_limit(limit);
+ }
+ "quad_segs" | "quadrant_segments" => {
+ let segs = parse_number(value, "quadrant_segments")?;
+ params_builder = params_builder.quadrant_segments(segs);
+ }
+ _ => {
+ return Err(DataFusionError::Execution(format!(
+ "Invalid buffer parameter: {} (accept: 'endcap', 'join',
'mitre_limit', 'miter_limit', 'quad_segs' and 'side')",
+ key.to_lowercase()
+ )));
+ }
+ }
+ }
+
+ params_builder
+ .build()
+ .map_err(|e| DataFusionError::External(Box::new(e)))
+}
+
+fn parse_cap_style(value: &str) -> Result<CapStyle> {
+ match value.to_lowercase().as_str() {
Review Comment:
nit: this will cause allocations.
You may want to use eq_ignore_ascii_case() instead.
##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -115,6 +151,115 @@ fn invoke_scalar(
Ok(())
}
+fn extract_optional_string(arg: Option<&ColumnarValue>) ->
Result<Option<String>> {
+ let Some(arg) = arg else { return Ok(None) };
+ let casted = arg.cast_to(&DataType::Utf8, None)?;
+ match &casted {
+ ColumnarValue::Scalar(ScalarValue::Utf8(Some(s)) |
ScalarValue::LargeUtf8(Some(s))) => {
+ Ok(Some(s.clone()))
+ }
+ ColumnarValue::Scalar(scalar) if scalar.is_null() => Ok(None),
+ ColumnarValue::Scalar(_) => Ok(None),
+ _ => Err(DataFusionError::Execution(format!(
+ "Expected scalar bufferStyleParameters, got: {:?}",
+ arg
+ ))),
+ }
+}
+
+fn parse_buffer_params(params_str: Option<&str>) -> Result<BufferParams> {
+ let Some(params_str) = params_str else {
+ return BufferParams::builder()
+ .build()
+ .map_err(|e| DataFusionError::External(Box::new(e)));
+ };
+
+ let mut params_builder = BufferParams::builder();
+
+ for param in params_str.split_whitespace() {
+ let Some((key, value)) = param.split_once('=') else {
+ return Err(DataFusionError::Execution(format!(
+ "Missing value for buffer parameter: {}",
+ param
+ )));
+ };
+
+ match key.to_lowercase().as_str() {
Review Comment:
nit: this will cause allocations.
You may want to use eq_ignore_ascii_case() instead.
##########
c/sedona-geos/src/st_buffer.rs:
##########
@@ -115,6 +151,115 @@ fn invoke_scalar(
Ok(())
}
+fn extract_optional_string(arg: Option<&ColumnarValue>) ->
Result<Option<String>> {
+ let Some(arg) = arg else { return Ok(None) };
+ let casted = arg.cast_to(&DataType::Utf8, None)?;
+ match &casted {
+ ColumnarValue::Scalar(ScalarValue::Utf8(Some(s)) |
ScalarValue::LargeUtf8(Some(s))) => {
+ Ok(Some(s.clone()))
+ }
+ ColumnarValue::Scalar(scalar) if scalar.is_null() => Ok(None),
+ ColumnarValue::Scalar(_) => Ok(None),
+ _ => Err(DataFusionError::Execution(format!(
+ "Expected scalar bufferStyleParameters, got: {:?}",
+ arg
+ ))),
+ }
+}
+
+fn parse_buffer_params(params_str: Option<&str>) -> Result<BufferParams> {
+ let Some(params_str) = params_str else {
+ return BufferParams::builder()
+ .build()
+ .map_err(|e| DataFusionError::External(Box::new(e)));
+ };
+
+ let mut params_builder = BufferParams::builder();
+
+ for param in params_str.split_whitespace() {
+ let Some((key, value)) = param.split_once('=') else {
+ return Err(DataFusionError::Execution(format!(
+ "Missing value for buffer parameter: {}",
+ param
+ )));
+ };
+
+ match key.to_lowercase().as_str() {
+ "endcap" => {
+ params_builder =
params_builder.end_cap_style(parse_cap_style(value)?);
+ }
+ "join" => {
+ params_builder =
params_builder.join_style(parse_join_style(value)?);
+ }
+ "side" => {
+ params_builder =
params_builder.single_sided(parse_side(value)?);
+ }
+ "mitre_limit" | "miter_limit" => {
+ let limit: f64 = parse_number(value, "mitre_limit")?;
+ params_builder = params_builder.mitre_limit(limit);
+ }
+ "quad_segs" | "quadrant_segments" => {
+ let segs = parse_number(value, "quadrant_segments")?;
+ params_builder = params_builder.quadrant_segments(segs);
+ }
+ _ => {
+ return Err(DataFusionError::Execution(format!(
+ "Invalid buffer parameter: {} (accept: 'endcap', 'join',
'mitre_limit', 'miter_limit', 'quad_segs' and 'side')",
Review Comment:
```suggestion
"Invalid buffer parameter: {} (accept: 'endcap', 'join',
'mitre_limit', 'miter_limit', 'quad_segs', 'quadrant_segments' and 'side')",
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]