This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch SEDONA-583 in repository https://gitbox.apache.org/repos/asf/sedona.git
commit e4f769cfd52b6671d056502bf93efcf4b2bf8fa3 Author: Pranav Toggi <[email protected]> AuthorDate: Wed Apr 10 21:09:06 2024 -0400 [TASK-23] Adds ST_Length2D (#160) * Implement for Flink, Scala * Implement for python, snowflake * Add python test * Update docs * Update docs * fix test * fix test * fix test * fix test --- docs/api/flink/Function.md | 22 +++++++++++++++++++++- docs/api/snowflake/vector-data/Function.md | 14 +++++++++++++- docs/api/sql/Function.md | 22 +++++++++++++++++++++- .../main/java/org/apache/sedona/flink/Catalog.java | 1 + .../apache/sedona/flink/expressions/Functions.java | 8 ++++++++ .../java/org/apache/sedona/flink/FunctionTest.java | 9 +++++++++ python/sedona/sql/st_functions.py | 11 +++++++++++ python/tests/sql/test_dataframe_api.py | 2 ++ python/tests/sql/test_function.py | 13 +++++++++++++ .../sedona/snowflake/snowsql/TestFunctions.java | 9 +++++++++ .../sedona/snowflake/snowsql/TestFunctionsV2.java | 9 +++++++++ .../org/apache/sedona/snowflake/snowsql/UDFs.java | 7 +++++++ .../apache/sedona/snowflake/snowsql/UDFsV2.java | 7 +++++++ .../scala/org/apache/sedona/sql/UDF/Catalog.scala | 1 + .../sql/sedona_sql/expressions/Functions.scala | 13 +++++++++++++ .../sql/sedona_sql/expressions/st_functions.scala | 3 +++ .../apache/sedona/sql/dataFrameAPITestScala.scala | 8 ++++++++ .../org/apache/sedona/sql/functionTestScala.scala | 9 +++++++++ 18 files changed, 165 insertions(+), 3 deletions(-) diff --git a/docs/api/flink/Function.md b/docs/api/flink/Function.md index 8139a090f..8e55dfc6f 100644 --- a/docs/api/flink/Function.md +++ b/docs/api/flink/Function.md @@ -1955,7 +1955,7 @@ gid | validity_info ## ST_Length -Introduction: Return the perimeter of A +Introduction: Returns the perimeter of A. Format: `ST_Length (A: Geometry)` @@ -1973,6 +1973,26 @@ Output: 123.0147027033899 ``` +## ST_Length2D + +Introduction: Returns the perimeter of A. This function is an alias of [ST_Length](#st_length). + +Format: ST_Length2D (A:geometry) + +Since: `vTBD` + +Example: + +```SQL +SELECT ST_Length2D(ST_GeomFromWKT('LINESTRING(38 16,38 50,65 50,66 16,38 16)')) +``` + +Output: + +``` +123.0147027033899 +``` + ## ST_LengthSpheroid Introduction: Return the geodesic perimeter of A using WGS84 spheroid. Unit is meter. Works better for large geometries (country level) compared to `ST_Length` + `ST_Transform`. It is equivalent to PostGIS `ST_Length(geography, use_spheroid=true)` and `ST_LengthSpheroid` function and produces nearly identical results. diff --git a/docs/api/snowflake/vector-data/Function.md b/docs/api/snowflake/vector-data/Function.md index 676cf3736..a0d0bb679 100644 --- a/docs/api/snowflake/vector-data/Function.md +++ b/docs/api/snowflake/vector-data/Function.md @@ -1448,7 +1448,7 @@ gid | validity_info ## ST_Length -Introduction: Return the perimeter of A +Introduction: Returns the perimeter of A. Format: ST_Length (A:geometry) @@ -1459,6 +1459,18 @@ SELECT ST_Length(polygondf.countyshape) FROM polygondf ``` +## ST_Length2D + +Introduction: Returns the perimeter of A. This function is an alias of [ST_Length](#st_length). + +Format: ST_Length2D (A:geometry) + +SQL example: +```SQL +SELECT ST_Length2D(polygondf.countyshape) +FROM polygondf +``` + ## ST_LengthSpheroid Introduction: Return the geodesic perimeter of A using WGS84 spheroid. Unit is meter. Works better for large geometries (country level) compared to `ST_Length` + `ST_Transform`. It is equivalent to PostGIS `ST_Length(geography, use_spheroid=true)` and `ST_LengthSpheroid` function and produces nearly identical results. diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md index 182d2afbf..69bfb4def 100644 --- a/docs/api/sql/Function.md +++ b/docs/api/sql/Function.md @@ -1961,7 +1961,7 @@ gid | validity_info ## ST_Length -Introduction: Return the perimeter of A +Introduction: Returns the perimeter of A. Format: `ST_Length (A: Geometry)` @@ -1979,6 +1979,26 @@ Output: 123.0147027033899 ``` +## ST_Length2D + +Introduction: Returns the perimeter of A. This function is an alias of [ST_Length](#st_length). + +Format: ST_Length2D (A:geometry) + +Since: `vTBD` + +SQL Example: + +```SQL +SELECT ST_Length2D(ST_GeomFromWKT('LINESTRING(38 16,38 50,65 50,66 16,38 16)')) +``` + +Output: + +``` +123.0147027033899 +``` + ## ST_LengthSpheroid Introduction: Return the geodesic perimeter of A using WGS84 spheroid. Unit is meter. Works better for large geometries (country level) compared to `ST_Length` + `ST_Transform`. It is equivalent to PostGIS `ST_Length(geography, use_spheroid=true)` and `ST_LengthSpheroid` function and produces nearly identical results. diff --git a/flink/src/main/java/org/apache/sedona/flink/Catalog.java b/flink/src/main/java/org/apache/sedona/flink/Catalog.java index 443fbe932..234b1f435 100644 --- a/flink/src/main/java/org/apache/sedona/flink/Catalog.java +++ b/flink/src/main/java/org/apache/sedona/flink/Catalog.java @@ -77,6 +77,7 @@ public class Catalog { new Functions.ST_GeometryType(), new Functions.ST_Intersection(), new Functions.ST_Length(), + new Functions.ST_Length2D(), new Functions.ST_LengthSpheroid(), new Functions.ST_LineInterpolatePoint(), new Functions.ST_LineLocatePoint(), diff --git a/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java b/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java index 0f7593181..bd8735c42 100644 --- a/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java +++ b/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java @@ -320,6 +320,14 @@ public class Functions { } } + public static class ST_Length2D extends ScalarFunction { + @DataTypeHint("Double") + public Double eval(@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object o) { + Geometry geom = (Geometry) o; + return org.apache.sedona.common.Functions.length(geom); + } + } + public static class ST_LengthSpheroid extends ScalarFunction { @DataTypeHint("Double") public Double eval(@DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) Object o) { diff --git a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java index 57dc92ece..02ddfe568 100644 --- a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java +++ b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java @@ -426,6 +426,15 @@ public class FunctionTest extends TestBase{ assertEquals(4, result, 0); } + @Test + public void testLength2D() { + Table polygonTable = createPolygonTable(1); + Table resultTable = polygonTable.select(call(Functions.ST_Length2D.class.getSimpleName(), $(polygonColNames[0]))); + assertNotNull(first(resultTable).getField(0)); + double result = (double) first(resultTable).getField(0); + assertEquals(4, result, 0); + } + @Test public void testLengthSpheroid() { Table tbl = tableEnv.sqlQuery( diff --git a/python/sedona/sql/st_functions.py b/python/sedona/sql/st_functions.py index 02a3001c8..46c2fd064 100644 --- a/python/sedona/sql/st_functions.py +++ b/python/sedona/sql/st_functions.py @@ -792,6 +792,17 @@ def ST_Length(geometry: ColumnOrName) -> Column: """ return _call_st_function("ST_Length", geometry) +@validate_argument_types +def ST_Length2D(geometry: ColumnOrName) -> Column: + """Calculate the length of a linestring geometry. + + :param geometry: Linestring geometry column to calculate length for. + :type geometry: ColumnOrName + :return: Length of geometry as a double column. + :rtype: Column + """ + return _call_st_function("ST_Length2D", geometry) + @validate_argument_types def ST_LengthSpheroid(geometry: ColumnOrName) -> Column: """Calculate the perimeter of a geometry using WGS84 spheroid. diff --git a/python/tests/sql/test_dataframe_api.py b/python/tests/sql/test_dataframe_api.py index bd75bf16a..814dcf4a3 100644 --- a/python/tests/sql/test_dataframe_api.py +++ b/python/tests/sql/test_dataframe_api.py @@ -143,6 +143,7 @@ test_configurations = [ (stf.ST_IsValid, ("geom", 1), "triangle_geom", "", True), (stf.ST_IsValid, ("geom", 0), "triangle_geom", "", True), (stf.ST_Length, ("line",), "linestring_geom", "", 5.0), + (stf.ST_Length2D, ("line",), "linestring_geom", "", 5.0), (stf.ST_LengthSpheroid, ("point",), "point_geom", "", 0.0), (stf.ST_LineFromMultiPoint, ("multipoint",), "multipoint_geom", "", "LINESTRING (10 40, 40 30, 20 20, 30 10)"), (stf.ST_LineInterpolatePoint, ("line", 0.5), "linestring_geom", "", "POINT (2.5 0)"), @@ -317,6 +318,7 @@ wrong_type_configurations = [ (stf.ST_IsValid, (None,)), (stf.ST_IsValidReason, (None,)), (stf.ST_Length, (None,)), + (stf.ST_Length2D, (None,)), (stf.ST_LineFromMultiPoint, (None,)), (stf.ST_LineInterpolatePoint, (None, 0.5)), (stf.ST_LineInterpolatePoint, ("", None)), diff --git a/python/tests/sql/test_function.py b/python/tests/sql/test_function.py index 2bf755825..391f0070e 100644 --- a/python/tests/sql/test_function.py +++ b/python/tests/sql/test_function.py @@ -214,6 +214,19 @@ class TestPredicateJoin(TestBase): function_df = self.spark.sql("select ST_Length(polygondf.countyshape) from polygondf") function_df.show() + def test_st_length2d(self): + polygon_wkt_df = self.spark.read.format("csv"). \ + option("delimiter", "\t"). \ + option("header", "false").load(mixed_wkt_geometry_input_location) + + polygon_wkt_df.createOrReplaceTempView("polygontable") + + polygon_df = self.spark.sql("select ST_GeomFromWKT(polygontable._c0) as countyshape from polygontable") + polygon_df.createOrReplaceTempView("polygondf") + + function_df = self.spark.sql("select ST_Length2D(polygondf.countyshape) from polygondf") + assert function_df.take(1)[0][0] == 1.6244272911181594 + def test_st_area(self): polygon_wkt_df = self.spark.read.format("csv"). \ option("delimiter", "\t"). \ diff --git a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java index 48f890e9e..1c1ad3e2d 100644 --- a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java +++ b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctions.java @@ -546,6 +546,15 @@ public class TestFunctions extends TestBase { 2.8284271247461903 ); } + + @Test + public void test_ST_Length2D() { + registerUDF("ST_Length2D", byte[].class); + verifySqlSingleRes( + "select sedona.ST_Length2D(sedona.ST_GeomFromText('LINESTRING(0 0, 2 2)'))", + 2.8284271247461903 + ); + } @Test public void test_ST_LineFromMultiPoint() { registerUDF("ST_LineFromMultiPoint", byte[].class); diff --git a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java index 3806c0d52..c018b1d3e 100644 --- a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java +++ b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestFunctionsV2.java @@ -539,6 +539,15 @@ public class TestFunctionsV2 2.8284271247461903 ); } + + @Test + public void test_ST_Length2D() { + registerUDFV2("ST_Length2D", String.class); + verifySqlSingleRes( + "select sedona.ST_Length2D(ST_GeometryFromWKT('LINESTRING(0 0, 2 2)'))", + 2.8284271247461903 + ); + } @Test public void test_ST_LineFromMultiPoint() { registerUDFV2("ST_LineFromMultiPoint", String.class); diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java index 7dc314e9f..eb93f1fa1 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java @@ -699,6 +699,13 @@ public class UDFs { ); } + @UDFAnnotations.ParamMeta(argNames = {"geometry"}) + public static double ST_Length2D(byte[] geometry) { + return Functions.length( + GeometrySerde.deserialize(geometry) + ); + } + @UDFAnnotations.ParamMeta(argNames = {"geometry"}) public static byte[] ST_LineFromMultiPoint(byte[] geometry) { return GeometrySerde.serialize( diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java index a49d1ba7b..bbcf8669d 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFsV2.java @@ -630,6 +630,13 @@ public class UDFsV2 ); } + @UDFAnnotations.ParamMeta(argNames = {"geometry"}, argTypes = {"Geometry"}) + public static double ST_Length2D(String geometry) { + return Functions.length( + GeometrySerde.deserGeoJson(geometry) + ); + } + @UDFAnnotations.ParamMeta(argNames = {"geometry"}, argTypes = {"Geometry"}, returnTypes = "Geometry") public static String ST_LineFromMultiPoint(String geometry) { return GeometrySerde.serGeoJson( diff --git a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala index 92c9ac807..b3355894f 100644 --- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala +++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala @@ -78,6 +78,7 @@ object Catalog { function[ST_ShiftLongitude](), function[ST_Envelope](), function[ST_Length](), + function[ST_Length2D](), function[ST_Area](), function[ST_Centroid](), function[ST_Transform](true), diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala index 2db5e6166..36f878b12 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala @@ -210,6 +210,19 @@ case class ST_Length(inputExpressions: Seq[Expression]) } } +/** + * Return the length measurement of a Geometry + * + * @param inputExpressions + */ +case class ST_Length2D(inputExpressions: Seq[Expression]) + extends InferredExpression(Functions.length _) { + + protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { + copy(inputExpressions = newChildren) + } +} + /** * Return the area measurement of a Geometry. * diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala index 6dbde20c9..86176a834 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala @@ -203,6 +203,9 @@ object st_functions extends DataFrameAPI { def ST_Length(geometry: Column): Column = wrapExpression[ST_Length](geometry) def ST_Length(geometry: String): Column = wrapExpression[ST_Length](geometry) + def ST_Length2D(geometry: Column): Column = wrapExpression[ST_Length2D](geometry) + def ST_Length2D(geometry: String): Column = wrapExpression[ST_Length2D](geometry) + def ST_LineFromMultiPoint(geometry: Column): Column = wrapExpression[ST_LineFromMultiPoint](geometry) def ST_LineFromMultiPoint(geometry: String): Column = wrapExpression[ST_LineFromMultiPoint](geometry) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala index 2548b1d3b..b1b9b78eb 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala @@ -408,6 +408,14 @@ class dataFrameAPITestScala extends TestBaseScala { assert(actualResult == expectedResult) } + it("Passed ST_Length2D") { + val lineDf = sparkSession.sql("SELECT ST_GeomFromWKT('LINESTRING (0 0, 1 0)') AS geom") + val df = lineDf.select(ST_Length2D("geom")) + val actualResult = df.take(1)(0).get(0).asInstanceOf[Double] + val expectedResult = 1.0 + assert(actualResult == expectedResult) + } + it("Passed ST_Area") { val polygonDf = sparkSession.sql("SELECT ST_GeomFromWKT('POLYGON ((0 0, 1 0, 1 1, 0 0))') AS geom") val df = polygonDf.select(ST_Area("geom")) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala index 8711b3fb2..81bc10d09 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala @@ -173,6 +173,15 @@ class functionTestScala extends TestBaseScala with Matchers with GeometrySample assert(functionDf.count() > 0); } + it("Passed ST_Length2D") { + var polygonWktDf = sparkSession.read.format("csv").option("delimiter", "\t").option("header", "false").load(mixedWktGeometryInputLocation) + polygonWktDf.createOrReplaceTempView("polygontable") + var polygonDf = sparkSession.sql("select ST_GeomFromWKT(polygontable._c0) as countyshape from polygontable") + polygonDf.createOrReplaceTempView("polygondf") + var functionDf = sparkSession.sql("select ST_Length2D(polygondf.countyshape) from polygondf") + assert(functionDf.count() > 0); + } + it("Passed ST_Area") { var polygonWktDf = sparkSession.read.format("csv").option("delimiter", "\t").option("header", "false").load(mixedWktGeometryInputLocation) polygonWktDf.createOrReplaceTempView("polygontable")
