paleolimbot commented on code in PR #790: URL: https://github.com/apache/sedona-db/pull/790#discussion_r3261632797
########## docs/reference/sql/st_hausdorffdistance.qmd: ########## @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +--- +title: ST_HausdorffDistance +description: Returns the Hausdorff distance between two geometries. This is a measure of how similar two geometries are. +kernels: + - returns: double + args: + - geom1: geometry + - geom2: geometry + - returns: double + args: + - geom1: geometry + - geom2: geometry + - density_frac: double Review Comment: We have funny hand-rolled syntax for these, but this should work: ```suggestion - returns: double args: - geometry - geometry - returns: double args: - geometry - geometry - name: density_frac type: float64 description: Densification to apply before calculation ``` ########## docs/reference/sql/st_hausdorffdistance.qmd: ########## @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +--- +title: ST_HausdorffDistance +description: Returns the Hausdorff distance between two geometries. This is a measure of how similar two geometries are. Review Comment: ```suggestion description: Returns the Hausdorff distance between two geometries. ``` ########## python/sedonadb/tests/functions/st_hausdorffdistance.py: ########## @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from sedonadb.testing import geom_or_null, PostGIS, SedonaDB + + [email protected]("eng", [SedonaDB, PostGIS]) [email protected]( + ("geom1", "geom2", "density_frac", "expected"), + [ + (None, None, None, None), + ("POINT (0 0)", None, None, None), Review Comment: Can you add `(None, "POINT (0 1)", None, ...)`, `("POINT (0 1)", "POINT (0 1)", None)`, and checks for EMTPY behaviour (empty linestring + non empty linestring and reversed) ########## python/sedonadb/tests/functions/st_hausdorffdistance.py: ########## Review Comment: This can be added next to wherever `st_distance` is tested (I'm not sure it needs its own file) ########## c/sedona-geos/src/st_hausdorffdistance.rs: ########## @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +use arrow_array::builder::Float64Builder; +use arrow_schema::DataType; +use datafusion_common::{error::Result, DataFusionError}; +use datafusion_expr::{ColumnarValue, Volatility}; + +use ::geos::Geom; + +use sedona_expr::{ + item_crs::ItemCrsKernel, + scalar_udf::{SedonaScalarKernel, SedonaScalarUDF}, +}; +use sedona_schema::{ + datatypes::{SedonaType, WKB_GEOMETRY}, + matchers::ArgMatcher, +}; +use std::sync::Arc; +use sedona_functions::executor::WkbExecutor; +use crate::executor::GeosExecutor; + +pub fn st_hausdorff_distance_impl() -> SedonaScalarUDF { + SedonaScalarUDF::new( + "st_hausdorff_distance", + ItemCrsKernel::wrap_impl(vec![Arc::new(STHausdorffDistance)]), + Volatility::Immutable, + ) +} + +#[derive(Debug)] +struct STHausdorffDistance; + +impl SedonaScalarKernel for STHausdorffDistance { + fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> { + let matcher = ArgMatcher::new( + vec![ArgMatcher::is_geometry(), ArgMatcher::is_geometry()], + SedonaType::Arrow(DataType::Float64), + ); + matcher.match_args(args) + } + + fn invoke_batch( + &self, + arg_types: &[SedonaType], + args: &[ColumnarValue], + ) -> Result<ColumnarValue> { + let mut executor = GeosExecutor::new(arg_types, args); + let mut builder = Float64Builder::with_capacity(executor.num_iterations()); + + + executor.execute_wkb_void(|maybe_g1, maybe_g2| { + match (maybe_g1, maybe_g2) { + (Some(wkb1), Some(wkb2)) => { + + let g1 = ::geos::Geometry::try_from(wkb1) + .map_err(|e| DataFusionError::Execution(format!("GEOS conversion error: {}", e)))?; + let g2 = ::geos::Geometry::try_from(wkb2) + .map_err(|e| DataFusionError::Execution(format!("GEOS conversion error: {}", e)))?; + + let dist = g1.hausdorff_distance(&g2) + .map_err(|e| DataFusionError::Execution(format!("ST_HausdorffDistance error: {}", e)))?; + + builder.append_value(dist); + } + _ => builder.append_null(), + } + Ok(()) + })?; + + executor.finish(Arc::new(builder.finish())) + } Review Comment: Can you add Rust tests (you can use the tests for st_distance as a template). The rust tests check for basic functionality and ensure that scalar and array arguments work everywhere. ########## python/sedonadb/tests/functions/st_hausdorffdistance.py: ########## @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +from sedonadb.testing import geom_or_null, PostGIS, SedonaDB + + [email protected]("eng", [SedonaDB, PostGIS]) [email protected]( + ("geom1", "geom2", "density_frac", "expected"), + [ + (None, None, None, None), + ("POINT (0 0)", None, None, None), + ("LINESTRING (0 0, 2 0)", "LINESTRING (0 1, 1 2, 2 1)", None, 1.4142135623730951), + # Case with density fraction + ("LINESTRING (0 0, 100 0)", "LINESTRING (0 0, 50 1, 100 0)", 0.5, 1.0), + # Identical geometries + ("POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))", None, 0.0), + # Disjoint geometries + ("POINT (0 0)", "POINT (10 10)", None, 14.142135623730951), + ], +) +def test_st_hausdorff_distance(eng, geom1, geom2, density_frac, expected): + eng = eng.create_or_skip() + + if density_frac is None: + query = f"SELECT ST_HausdorffDistance({geom_or_null(geom1)}, {geom_or_null(geom2)})" + else: + query = f"SELECT ST_HausdorffDistance({geom_or_null(geom1)}, {geom_or_null(geom2)}, {density_frac})" + Review Comment: Can you separate the tests for the version with and without `density_frac` (i.e., `def test_st_hasdorff_distance_density_frac()`? This will let us separate the "null" from "not specified" case. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
