This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch SEDONA-591 in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 4aa920280f82f0259cf4d12d963227b04c41c0b1 Author: Furqaan Khan <[email protected]> AuthorDate: Sat Apr 27 00:57:21 2024 -0400 [TASK-277] Add ST_MaxDistance (#170) * temp commit * Revert "temp commit" This reverts commit 8cd8ecc6b3ae533379b779ba08c23b12df47e2fc. * feat: Add ST_LongestLine * feat: Add ST_MaxDistance * docs: add alias information --- .../java/org/apache/sedona/common/Functions.java | 4 ++++ docs/api/flink/Function.md | 23 ++++++++++++++++++++++ docs/api/snowflake/vector-data/Function.md | 21 ++++++++++++++++++++ docs/api/sql/Function.md | 23 ++++++++++++++++++++++ .../main/java/org/apache/sedona/flink/Catalog.java | 1 + .../apache/sedona/flink/expressions/Functions.java | 10 ++++++++++ .../java/org/apache/sedona/flink/FunctionTest.java | 9 +++++++++ python/sedona/sql/st_functions.py | 13 ++++++++++++ python/tests/sql/test_dataframe_api.py | 4 ++++ python/tests/sql/test_function.py | 6 ++++++ .../sedona/snowflake/snowsql/TestFunctions.java | 9 +++++++++ .../sedona/snowflake/snowsql/TestFunctionsV2.java | 10 ++++++++++ .../org/apache/sedona/snowflake/snowsql/UDFs.java | 9 +++++++++ .../apache/sedona/snowflake/snowsql/UDFsV2.java | 8 ++++++++ .../scala/org/apache/sedona/sql/UDF/Catalog.scala | 1 + .../sql/sedona_sql/expressions/Functions.scala | 8 ++++++++ .../sql/sedona_sql/expressions/st_functions.scala | 3 +++ .../apache/sedona/sql/dataFrameAPITestScala.scala | 7 +++++++ .../org/apache/sedona/sql/functionTestScala.scala | 7 +++++++ 19 files changed, 176 insertions(+) diff --git a/common/src/main/java/org/apache/sedona/common/Functions.java b/common/src/main/java/org/apache/sedona/common/Functions.java index c583a3592..dcd5fbf6a 100644 --- a/common/src/main/java/org/apache/sedona/common/Functions.java +++ b/common/src/main/java/org/apache/sedona/common/Functions.java @@ -1058,6 +1058,10 @@ public class Functions { return isExteriorRingCCW && isInteriorRingCCW; } + public static double maxDistance(Geometry geom1, Geometry geom2) { + return longestLine(geom1, geom2).getLength(); + } + public static Geometry longestLine(Geometry geom1, Geometry geom2) { double maxLength = - Double.MAX_VALUE; Coordinate longestStart = null; diff --git a/docs/api/flink/Function.md b/docs/api/flink/Function.md index 01da7ce93..1e026ad6f 100644 --- a/docs/api/flink/Function.md +++ b/docs/api/flink/Function.md @@ -2449,6 +2449,29 @@ Result: +------------------+------------------------+ ``` +## ST_MaxDistance + +Introduction: Calculates and returns the length value representing the maximum distance between any two points across the input geometries. This function is an alias for `ST_LongestDistance`. + +Format: `ST_MaxDistance(geom1: Geometry, geom2: Geometry)` + +Since: `vTBD` + +SQL Example: + +```sql +SELECT ST_MaxDistance( + ST_GeomFromText("POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"), + ST_GeomFromText("POLYGON ((10 20, 30 30, 40 20, 30 10, 10 20))") +) +``` + +Output: + +``` +36.05551275463989 +``` + ## ST_MinimumBoundingCircle Introduction: Returns the smallest circle polygon that contains a geometry. The optional quadrantSegments parameter determines how many segments to use per quadrant and the default number of segments is 48. diff --git a/docs/api/snowflake/vector-data/Function.md b/docs/api/snowflake/vector-data/Function.md index 95e41d2ca..3674661cc 100644 --- a/docs/api/snowflake/vector-data/Function.md +++ b/docs/api/snowflake/vector-data/Function.md @@ -1765,6 +1765,27 @@ Result: The previous implementation only worked for (multi)polygons and had a different interpretation of the second, boolean, argument. It would also sometimes return multiple geometries for a single geometry input. +## ST_MaxDistance + +Introduction: Calculates and returns the length value representing the maximum distance between any two points across the input geometries. This function is an alias for `ST_LongestDistance`. + +Format: `ST_MaxDistance(geom1: Geometry, geom2: Geometry)` + +SQL Example: + +```sql +SELECT ST_MaxDistance( + ST_GeomFromText("POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"), + ST_GeomFromText("POLYGON ((10 20, 30 30, 40 20, 30 10, 10 20))") +) +``` + +Output: + +``` +36.05551275463989 +``` + ## ST_MinimumBoundingCircle Introduction: Returns the smallest circle polygon that contains a geometry. diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md index 12b598d0d..a278ee312 100644 --- a/docs/api/sql/Function.md +++ b/docs/api/sql/Function.md @@ -2461,6 +2461,29 @@ Result: Be sure to check you code when upgrading. The previous implementation only worked for (multi)polygons and had a different interpretation of the second, boolean, argument. It would also sometimes return multiple geometries for a single geometry input. +## ST_MaxDistance + +Introduction: Calculates and returns the length value representing the maximum distance between any two points across the input geometries. This function is an alias for `ST_LongestDistance`. + +Format: `ST_MaxDistance(geom1: Geometry, geom2: Geometry)` + +Since: `vTBD` + +SQL Example: + +```sql +SELECT ST_MaxDistance( + ST_GeomFromText("POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"), + ST_GeomFromText("POLYGON ((10 20, 30 30, 40 20, 30 10, 10 20))") +) +``` + +Output: + +``` +36.05551275463989 +``` + ## ST_MinimumBoundingCircle Introduction: Returns the smallest circle polygon that contains a geometry. The optional quadrantSegments parameter determines how many segments to use per quadrant and the default number of segments has been changed to 48 since v1.5.0. diff --git a/flink/src/main/java/org/apache/sedona/flink/Catalog.java b/flink/src/main/java/org/apache/sedona/flink/Catalog.java index 591b783d0..84b394f8f 100644 --- a/flink/src/main/java/org/apache/sedona/flink/Catalog.java +++ b/flink/src/main/java/org/apache/sedona/flink/Catalog.java @@ -144,6 +144,7 @@ public class Catalog { new Functions.ST_Polygonize(), new Functions.ST_MakePolygon(), new Functions.ST_MakeValid(), + new Functions.ST_MaxDistance(), new Functions.ST_MinimumBoundingCircle(), new Functions.ST_MinimumBoundingRadius(), new Functions.ST_Multi(), diff --git a/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java b/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java index e30769cc7..cad1be4bc 100644 --- a/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java +++ b/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java @@ -902,6 +902,16 @@ public class Functions { } } + public static class ST_MaxDistance extends ScalarFunction { + @DataTypeHint(value = "Double") + public Double eval(@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object g1, + @DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object g2) { + Geometry geom1 = (Geometry) g1; + Geometry geom2 = (Geometry) g2; + return org.apache.sedona.common.Functions.maxDistance(geom1, geom2); + } + } + public static class ST_MinimumBoundingCircle extends ScalarFunction { @DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) public Geometry eval(@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object o, diff --git a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java index 8105827e1..8afd16012 100644 --- a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java +++ b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java @@ -1076,6 +1076,15 @@ public class FunctionTest extends TestBase{ assertEquals("MULTIPOLYGON (((1 5, 3 3, 1 1, 1 5)), ((5 3, 7 5, 7 1, 5 3)))", result.toString()); } + @Test + public void testMaxDistnace() { + Table tbl = tableEnv.sqlQuery( + "SELECT ST_GeomFromWKT('POLYGON ((40 180, 110 160, 180 180, 180 120, 140 90, 160 40, 80 10, 70 40, 20 50, 40 180),(60 140, 99 77.5, 90 140, 60 140))') as geom"); + Double actual = (Double) first(tbl.select(call(Functions.ST_MaxDistance.class.getSimpleName(), $("geom"), $("geom")))).getField(0); + Double expected = 206.15528128088303; + assertEquals(expected, actual); + } + @Test public void testMinimumBoundingCircle() { Table table = tableEnv.sqlQuery("SELECT ST_GeomFromWKT('LINESTRING (0 0, 1 0)') AS geom"); diff --git a/python/sedona/sql/st_functions.py b/python/sedona/sql/st_functions.py index 9b65f6ad0..f12d6a9f2 100644 --- a/python/sedona/sql/st_functions.py +++ b/python/sedona/sql/st_functions.py @@ -1029,6 +1029,19 @@ def ST_MakeValid(geometry: ColumnOrName, keep_collapsed: Optional[Union[ColumnOr args = (geometry,) if keep_collapsed is None else (geometry, keep_collapsed) return _call_st_function("ST_MakeValid", args) +@validate_argument_types +def ST_MaxDistance(geom1: ColumnOrName, geom2: ColumnOrName) -> Column: + """Calculate the maximum distance between two furthest points in the geometries + + :param geom1: + :type geom1: ColumnOrName + :param geom2: + :type geom2: ColumnOrName + :return: Maximum distance between the geometries + :rtype: Column + """ + return _call_st_function("ST_MaxDistance", (geom1, geom2)) + @validate_argument_types def ST_MinimumBoundingCircle(geometry: ColumnOrName, quadrant_segments: Optional[Union[ColumnOrName, int]] = None) -> Column: diff --git a/python/tests/sql/test_dataframe_api.py b/python/tests/sql/test_dataframe_api.py index 0426c7414..9a502a52e 100644 --- a/python/tests/sql/test_dataframe_api.py +++ b/python/tests/sql/test_dataframe_api.py @@ -164,6 +164,7 @@ test_configurations = [ (stf.ST_MMax, ("line",), "4D_line", "", 3.0), (stf.ST_MakeValid, ("geom",), "invalid_geom", "", "MULTIPOLYGON (((1 5, 3 3, 1 1, 1 5)), ((5 3, 7 5, 7 1, 5 3)))"), (stf.ST_MakeLine, ("line1", "line2"), "two_lines", "", "LINESTRING (0 0, 1 1, 0 0, 3 2)"), + (stf.ST_MaxDistance, ("a", "b"), "overlapping_polys", "", 3.1622776601683795), (stf.ST_Points, ("line",), "linestring_geom", "ST_Normalize(geom)", "MULTIPOINT (0 0, 1 0, 2 0, 3 0, 4 0, 5 0)"), (stf.ST_Polygon, ("geom", 4236), "closed_linestring_geom", "", "POLYGON ((0 0, 1 0, 1 1, 0 0))"), (stf.ST_Polygonize, ("geom",), "noded_linework", "ST_Normalize(geom)", "GEOMETRYCOLLECTION (POLYGON ((0 2, 1 3, 2 4, 2 3, 2 2, 1 2, 0 2)), POLYGON ((2 2, 2 3, 2 4, 3 3, 4 2, 3 2, 2 2)))"), @@ -352,6 +353,9 @@ wrong_type_configurations = [ (stf.ST_MMax, (None,)), (stf.ST_MakeValid, (None,)), (stf.ST_MakePolygon, (None,)), + (stf.ST_MaxDistance, (None, None)), + (stf.ST_MaxDistance, (None, "")), + (stf.ST_MaxDistance, ("", None)), (stf.ST_MinimumBoundingCircle, (None,)), (stf.ST_MinimumBoundingRadius, (None,)), (stf.ST_Multi, (None,)), diff --git a/python/tests/sql/test_function.py b/python/tests/sql/test_function.py index 064e30277..5d08c5de4 100644 --- a/python/tests/sql/test_function.py +++ b/python/tests/sql/test_function.py @@ -1392,6 +1392,12 @@ class TestPredicateJoin(TestBase): expected = "LINESTRING (180 180, 20 50)" assert expected == actual + def test_st_max_distance(self): + basedf = self.spark.sql("SELECT ST_GeomFromWKT('POLYGON ((40 180, 110 160, 180 180, 180 120, 140 90, 160 40, 80 10, 70 40, 20 50, 40 180),(60 140, 99 77.5, 90 140, 60 140))') as geom") + actual = basedf.selectExpr("ST_MaxDistance(geom, geom)").take(1)[0][0] + expected = 206.15528128088303 + assert expected == actual + def test_st_s2_cell_ids(self): test_cases = [ "'POLYGON((-1 0, 1 0, 0 0, 0 1, -1 0))'", diff --git a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java index 7d0e0e19a..94d8ac97a 100644 --- a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java +++ b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java @@ -613,6 +613,15 @@ public class TestFunctions extends TestBase { ); } + @Test + public void test_ST_MaxDistance() { + registerUDF("ST_MaxDistance", byte[].class, byte[].class); + verifySqlSingleRes( + "SELECT sedona.ST_MaxDistance(sedona.ST_GeomFromWKT('POLYGON ((40 180, 110 160, 180 180, 180 120, 140 90, 160 40, 80 10, 70 40, 20 50, 40 180),(60 140, 99 77.5, 90 140, 60 140))'), sedona.ST_GeomFromWKT('POLYGON ((40 180, 110 160, 180 180, 180 120, 140 90, 160 40, 80 10, 70 40, 20 50, 40 180),(60 140, 99 77.5, 90 140, 60 140))'))", + 206.15528128088303 + ); + } + @Test public void test_ST_LineSubstring() { registerUDF("ST_LineSubstring", byte[].class, double.class, double.class); diff --git a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java index 2154027f2..cc998e835 100644 --- a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java +++ b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java @@ -618,6 +618,16 @@ public class TestFunctionsV2 "MULTIPOLYGON(((1 5,3 3,1 1,1 5)),((5 3,7 5,7 1,5 3)))" ); } + + @Test + public void test_ST_MaxDistance() { + registerUDFV2("ST_MaxDistance", String.class, String.class); + verifySqlSingleRes( + "SELECT sedona.ST_MaxDistance(ST_GeomFromWKT('POLYGON ((40 180, 110 160, 180 180, 180 120, 140 90, 160 40, 80 10, 70 40, 20 50, 40 180),(60 140, 99 77.5, 90 140, 60 140))'), ST_GeomFromWKT('POLYGON ((40 180, 110 160, 180 180, 180 120, 140 90, 160 40, 80 10, 70 40, 20 50, 40 180),(60 140, 99 77.5, 90 140, 60 140))'))", + 206.15528128088303 + ); + } + @Test public void test_ST_MinimumBoundingCircle() { registerUDFV2("ST_MinimumBoundingCircle", String.class, int.class); diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java index a184cc6ec..657890e8a 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java @@ -809,6 +809,15 @@ public class UDFs { Constructors.makePoint(x, y, z, m) ); } + + @UDFAnnotations.ParamMeta(argNames = {"geom1", "geom2"}) + public static double ST_MaxDistance(byte[] geom1, byte[] geom2) { + return Functions.maxDistance( + GeometrySerde.deserialize(geom1), + GeometrySerde.deserialize(geom2) + ); + } + @UDFAnnotations.ParamMeta(argNames = {"wkt", "srid"}) public static byte[] ST_MLineFromText(String wkt, int srid) throws ParseException { return GeometrySerde.serialize( diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java index 720f106b3..155b29b79 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java @@ -748,6 +748,14 @@ public class UDFsV2 ); } + @UDFAnnotations.ParamMeta(argNames = {"geom1", "geom2"}, argTypes = {"Geometry", "Geometry"}) + public static double ST_MaxDistance(String geom1, String geom2) { + return Functions.maxDistance( + GeometrySerde.deserGeoJson(geom1), + GeometrySerde.deserGeoJson(geom2) + ); + } + @UDFAnnotations.ParamMeta(argNames = {"geometry", "quadrantSegments"}, argTypes = {"Geometry", "int"}, returnTypes = "Geometry") public static String ST_MinimumBoundingCircle(String geometry, int quadrantSegments) { return GeometrySerde.serGeoJson( diff --git a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala index f3b46011d..8c12246a3 100644 --- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala +++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala @@ -157,6 +157,7 @@ object Catalog { function[ST_Polygon](), function[ST_Polygonize](), function[ST_MakePolygon](null), + function[ST_MaxDistance](), function[ST_GeoHash](), function[ST_GeomFromGeoHash](null), function[ST_PointFromGeoHash](null), diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala index 2e1d5062d..8d5b67bdf 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala @@ -855,6 +855,14 @@ case class ST_MakePolygon(inputExpressions: Seq[Expression]) } } +case class ST_MaxDistance(inputExpressions: Seq[Expression]) + extends InferredExpression(Functions.maxDistance _) { + + protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { + copy(inputExpressions = newChildren) + } +} + case class ST_GeoHash(inputExpressions: Seq[Expression]) extends InferredExpression(Functions.geohash _) { diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala index 374b5aaa4..6550fe575 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala @@ -263,6 +263,9 @@ object st_functions extends DataFrameAPI { def ST_MakeValid(geometry: Column, keepCollapsed: Column): Column = wrapExpression[ST_MakeValid](geometry, keepCollapsed) def ST_MakeValid(geometry: String, keepCollapsed: Boolean): Column = wrapExpression[ST_MakeValid](geometry, keepCollapsed) + def ST_MaxDistance(geom1: Column, geom2: Column): Column = wrapExpression[ST_MaxDistance](geom1, geom2) + def ST_MaxDistance(geom1: String, geom2: String): Column = wrapExpression[ST_MaxDistance](geom1, geom2) + def ST_MinimumBoundingCircle(geometry: Column): Column = wrapExpression[ST_MinimumBoundingCircle](geometry, BufferParameters.DEFAULT_QUADRANT_SEGMENTS * 6) def ST_MinimumBoundingCircle(geometry: String): Column = wrapExpression[ST_MinimumBoundingCircle](geometry, BufferParameters.DEFAULT_QUADRANT_SEGMENTS * 6) def ST_MinimumBoundingCircle(geometry: Column, quadrantSegments: Column): Column = wrapExpression[ST_MinimumBoundingCircle](geometry, quadrantSegments) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala index 1fa25d270..6975295fb 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala @@ -935,6 +935,13 @@ class dataFrameAPITestScala extends TestBaseScala { assert(expected.equals(actual)) } + it("Passed ST_MaxDistance()") { + val baseDf = sparkSession.sql("SELECT ST_GeomFromWKT('POLYGON ((40 180, 110 160, 180 180, 180 120, 140 90, 160 40, 80 10, 70 40, 20 50, 40 180),(60 140, 99 77.5, 90 140, 60 140))') as geom") + val actual = baseDf.select(ST_MaxDistance("geom", "geom")).first().get(0) + val expected = 206.15528128088303 + assert(expected == actual) + } + it("Passed ST_FlipCoordinates") { val baseDf = sparkSession.sql("SELECT ST_Point(0.0, 1.0) AS geom") val df = baseDf.select(ST_FlipCoordinates("geom")) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala index 3f08784a3..174af005d 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala @@ -1633,6 +1633,13 @@ class functionTestScala extends TestBaseScala with Matchers with GeometrySample assert(expected.equals(actual)) } + it("Should pass ST_MaxDistance") { + val baseDf = sparkSession.sql("SELECT ST_GeomFromWKT('POLYGON ((40 180, 110 160, 180 180, 180 120, 140 90, 160 40, 80 10, 70 40, 20 50, 40 180),(60 140, 99 77.5, 90 140, 60 140))') as geom") + val actual = baseDf.selectExpr("ST_MaxDistance(geom, geom)").first().get(0) + val expected = 206.15528128088303 + assert(expected == actual) + } + it("Should pass ST_FlipCoordinates") { val pointDF = createSamplePointDf(5, "geom") val oldX = pointDF.take(1)(0).get(0).asInstanceOf[Geometry].getCoordinate.x
