This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch SEDONA-575 in repository https://gitbox.apache.org/repos/asf/sedona.git
commit f2fda0444d8cefd8a954c25e195eb347a0de2569 Author: Furqaan Khan <[email protected]> AuthorDate: Mon Apr 8 15:15:28 2024 -0400 [TASK-156] Add ST_GeometryFromText (#155) * feat: add ST_GeometryFromText * feat: add ST_GeometryFromText optional parameter --- docs/api/flink/Constructor.md | 24 ++++++++++++++++++++++ docs/api/snowflake/vector-data/Constructor.md | 22 ++++++++++++++++++++ docs/api/sql/Constructor.md | 24 ++++++++++++++++++++++ .../main/java/org/apache/sedona/flink/Catalog.java | 1 + .../sedona/flink/expressions/Constructors.java | 13 ++++++++++++ .../org/apache/sedona/flink/ConstructorTest.java | 17 +++++++++++++++ python/sedona/sql/st_constructors.py | 14 +++++++++++++ python/tests/sql/test_constructor_test.py | 13 ++++++++++++ python/tests/sql/test_dataframe_api.py | 2 ++ .../sedona/snowflake/snowsql/TestConstructors.java | 15 ++++++++++++++ .../org/apache/sedona/snowflake/snowsql/UDFs.java | 14 +++++++++++++ .../scala/org/apache/sedona/sql/UDF/Catalog.scala | 1 + .../sql/sedona_sql/expressions/Constructors.scala | 13 ++++++++++++ .../sedona_sql/expressions/st_constructors.scala | 7 +++++++ .../apache/sedona/sql/constructorTestScala.scala | 13 ++++++++++++ .../apache/sedona/sql/dataFrameAPITestScala.scala | 14 +++++++++++++ 16 files changed, 207 insertions(+) diff --git a/docs/api/flink/Constructor.md b/docs/api/flink/Constructor.md index bbad2a7e1..e2920261b 100644 --- a/docs/api/flink/Constructor.md +++ b/docs/api/flink/Constructor.md @@ -291,6 +291,30 @@ Output: POINT(40.7128 -74.006) ``` +## ST_GeometryFromText + +Introduction: Construct a Geometry from WKT. If SRID is not set, it defaults to 0 (unknown). Alias of [ST_GeomFromWKT](#ST_GeomFromWKT) + +Format: + +`ST_GeometryFromText (Wkt: String)` + +`ST_GeometryFromText (Wkt: String, srid: Integer)` + +Since: `vTBD` + +SQL Example + +```sql +SELECT ST_GeometryFromText('POINT(40.7128 -74.0060)') +``` + +Output: + +``` +POINT(40.7128 -74.006) +``` + ## ST_LineFromText Introduction: Construct a LineString from Text diff --git a/docs/api/snowflake/vector-data/Constructor.md b/docs/api/snowflake/vector-data/Constructor.md index 3ba2433e2..581773359 100644 --- a/docs/api/snowflake/vector-data/Constructor.md +++ b/docs/api/snowflake/vector-data/Constructor.md @@ -266,6 +266,28 @@ Output: POINT(40.7128 -74.006) ``` +## ST_GeometryFromText + +Introduction: Construct a Geometry from WKT. If SRID is not set, it defaults to 0 (unknown). Alias of [ST_GeomFromWKT](#ST_GeomFromWKT) + +Format: + +`ST_GeometryFromText (Wkt: String)` + +`ST_GeometryFromText (Wkt: String, srid: Integer)` + +SQL Example + +```sql +SELECT ST_GeometryFromText('POINT(40.7128 -74.0060)') +``` + +Output: + +``` +POINT(40.7128 -74.006) +``` + ## ST_LineFromText Introduction: Construct a Line from Wkt text diff --git a/docs/api/sql/Constructor.md b/docs/api/sql/Constructor.md index ca88f1915..327d8c2be 100644 --- a/docs/api/sql/Constructor.md +++ b/docs/api/sql/Constructor.md @@ -341,6 +341,30 @@ Output: POINT(40.7128 -74.006) ``` +## ST_GeometryFromText + +Introduction: Construct a Geometry from WKT. If SRID is not set, it defaults to 0 (unknown). Alias of [ST_GeomFromWKT](#ST_GeomFromWKT) + +Format: + +`ST_GeometryFromText (Wkt: String)` + +`ST_GeometryFromText (Wkt: String, srid: Integer)` + +Since: `vTBD` + +SQL Example + +```sql +SELECT ST_GeometryFromText('POINT(40.7128 -74.0060)') +``` + +Output: + +``` +POINT(40.7128 -74.006) +``` + ## ST_LineFromText Introduction: Construct a Line from Wkt text diff --git a/flink/src/main/java/org/apache/sedona/flink/Catalog.java b/flink/src/main/java/org/apache/sedona/flink/Catalog.java index 8411b4ded..234e58b59 100644 --- a/flink/src/main/java/org/apache/sedona/flink/Catalog.java +++ b/flink/src/main/java/org/apache/sedona/flink/Catalog.java @@ -37,6 +37,7 @@ public class Catalog { new Constructors.ST_GeomFromWKT(), new Constructors.ST_GeomFromEWKT(), new Constructors.ST_GeomFromText(), + new Constructors.ST_GeometryFromText(), new Constructors.ST_GeomFromWKB(), new Constructors.ST_GeomFromEWKB(), new Constructors.ST_GeomFromGeoJSON(), diff --git a/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java b/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java index 5b66e41dd..cd53ac09d 100644 --- a/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java +++ b/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java @@ -180,6 +180,19 @@ public class Constructors { } } + public static class ST_GeometryFromText extends ScalarFunction { + @DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) + public Geometry eval(@DataTypeHint("String") String wktString) throws ParseException { + return org.apache.sedona.common.Constructors.geomFromWKT(wktString, 0); + } + + + @DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) + public Geometry eval(@DataTypeHint("String") String wktString, @DataTypeHint("Int") Integer srid) throws ParseException { + return org.apache.sedona.common.Constructors.geomFromWKT(wktString, srid); + } + } + public static class ST_GeomFromText extends ScalarFunction { @DataTypeHint(value = "RAW", bridgedTo = org.locationtech.jts.geom.Geometry.class) public Geometry eval(@DataTypeHint("String") String wktString) throws ParseException { diff --git a/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java b/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java index 236a0cd61..b92d92100 100644 --- a/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java +++ b/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java @@ -19,6 +19,7 @@ import org.apache.flink.table.api.Table; import org.apache.flink.types.Row; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.sedona.flink.expressions.Constructors; +import org.apache.sedona.flink.expressions.Functions; import org.junit.BeforeClass; import org.junit.Test; import org.locationtech.jts.geom.Coordinate; @@ -256,6 +257,22 @@ public class ConstructorTest extends TestBase{ assertEquals(data.get(data.size() - 1).getField(0).toString(), result.getField(0).toString()); } + @Test + public void testGeometryFromText() { + List<Row> data = createPolygonWKT(testDataSize); + Table wktTable = createTextTable(data, polygonColNames); + Table geomTable = wktTable.select(call(Constructors.ST_GeometryFromText.class.getSimpleName(), + $(polygonColNames[0])).as(polygonColNames[0]), + $(polygonColNames[1])); + Row result = last(geomTable); + assertEquals(data.get(data.size() - 1).getField(0).toString(), result.getField(0).toString()); + + geomTable = wktTable.select(call(Constructors.ST_GeometryFromText.class.getSimpleName(), + $(polygonColNames[0]), 4326)); + int actual = (int) last(geomTable.select(call(Functions.ST_SRID.class.getSimpleName(), $("_c0")))).getField(0); + assertEquals(4326, actual); + } + @Test public void testPolygonFromEnvelope() { Double minX = 1.0; diff --git a/python/sedona/sql/st_constructors.py b/python/sedona/sql/st_constructors.py index 9289dc734..c0fb0fd26 100644 --- a/python/sedona/sql/st_constructors.py +++ b/python/sedona/sql/st_constructors.py @@ -97,6 +97,20 @@ def ST_GeomFromText(wkt: ColumnOrName, srid: Optional[ColumnOrNameOrNumber] = No return _call_constructor_function("ST_GeomFromText", args) +@validate_argument_types +def ST_GeometryFromText(wkt: ColumnOrName, srid: Optional[ColumnOrNameOrNumber] = None) -> Column: + """Generate a geometry column from a Well-Known Text (WKT) string column. + This is an alias of ST_GeomFromWKT. + + :param wkt: WKT string column to generate from. + :type wkt: ColumnOrName + :return: Geometry column representing the WKT string. + :rtype: Column + """ + args = (wkt) if srid is None else (wkt, srid) + + return _call_constructor_function("ST_GeometryFromText", args) + @validate_argument_types def ST_GeomFromWKB(wkb: ColumnOrName) -> Column: diff --git a/python/tests/sql/test_constructor_test.py b/python/tests/sql/test_constructor_test.py index cdcf114da..c90c5db7a 100644 --- a/python/tests/sql/test_constructor_test.py +++ b/python/tests/sql/test_constructor_test.py @@ -115,6 +115,19 @@ class TestConstructors(TestBase): polygon_df.show(10) assert polygon_df.count() == 100 + def test_st_geometry_from_text(self): + polygon_wkt_df = self.spark.read.format("csv").\ + option("delimiter", "\t").\ + option("header", "false").\ + load(mixed_wkt_geometry_input_location) + + polygon_wkt_df.createOrReplaceTempView("polygontable") + polygon_df = self.spark.sql("select ST_GeometryFromText(polygontable._c0) as countyshape from polygontable") + assert polygon_df.count() == 100 + + polygon_df = self.spark.sql("select ST_GeomFromText(polygontable._c0, 4326) as countyshape from polygontable") + assert polygon_df.count() == 100 + def test_st_geom_from_wkb(self): polygon_wkb_df = self.spark.read.format("csv").\ option("delimiter", "\t").\ diff --git a/python/tests/sql/test_dataframe_api.py b/python/tests/sql/test_dataframe_api.py index 3fb6e0724..420a26ed1 100644 --- a/python/tests/sql/test_dataframe_api.py +++ b/python/tests/sql/test_dataframe_api.py @@ -44,6 +44,7 @@ test_configurations = [ (stc.ST_GeomFromKML, ("kml",), "constructor", "", "LINESTRING (-71.16 42.26, -71.17 42.26)"), (stc.ST_GeomFromText, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3 4)"), (stc.ST_GeomFromText, ("wkt",4326), "linestring_wkt", "", "LINESTRING (1 2, 3 4)"), + (stc.ST_GeometryFromText, ("wkt", 4326), "linestring_wkt", "", "LINESTRING (1 2, 3 4)"), (stc.ST_GeomFromWKB, ("wkbLine",), "constructor", "ST_ReducePrecision(geom, 2)", "LINESTRING (-2.1 -0.35, -1.5 -0.67)"), (stc.ST_GeomFromEWKB, ("wkbLine",), "constructor", "ST_ReducePrecision(geom, 2)", "LINESTRING (-2.1 -0.35, -1.5 -0.67)"), (stc.ST_GeomFromWKT, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3 4)"), @@ -238,6 +239,7 @@ wrong_type_configurations = [ (stc.ST_GeomFromWKB, (None,)), (stc.ST_GeomFromEWKB, (None,)), (stc.ST_GeomFromWKT, (None,)), + (stc.ST_GeometryFromText, (None,)), (stc.ST_LineFromText, (None,)), (stc.ST_LineStringFromText, (None, "")), (stc.ST_LineStringFromText, ("", None)), diff --git a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java index e0664774d..941b31fc6 100644 --- a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java +++ b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java @@ -81,6 +81,21 @@ public class TestConstructors extends TestBase{ "POINT (0 1)" ); } + + @Test + public void test_ST_GeometryFromText() { + registerUDF("ST_GeometryFromText", String.class); + verifySqlSingleRes( + "select sedona.ST_AsText(sedona.ST_GeometryFromText('POINT (0 1)'))", + "POINT (0 1)" + ); + registerUDF("ST_GeometryFromText", String.class, int.class); + verifySqlSingleRes( + "select sedona.ST_AsText(sedona.ST_GeometryFromText('POINT (0 1)', 4326))", + "POINT (0 1)" + ); + } + @Test public void test_ST_GeomFromWKB() { registerUDF("ST_GeomFromWKB", byte[].class); diff --git a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java index 4981573cc..a4ed232b6 100644 --- a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java +++ b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/UDFs.java @@ -492,6 +492,20 @@ public class UDFs { ); } + @UDFAnnotations.ParamMeta(argNames = {"wkt"}) + public static byte[] ST_GeometryFromText(String geomString) throws ParseException { + return GeometrySerde.serialize( + Constructors.geomFromWKT(geomString, 0) + ); + } + + @UDFAnnotations.ParamMeta(argNames = {"wkt", "srid"}) + public static byte[] ST_GeometryFromText(String geomString, int srid) throws ParseException { + return GeometrySerde.serialize( + Constructors.geomFromWKT(geomString, srid) + ); + } + @UDFAnnotations.ParamMeta(argNames = {"wkb"}) public static byte[] ST_GeomFromWKB(byte[] wkb) throws ParseException { return wkb; diff --git a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala index 6144735b8..a411f67b7 100644 --- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala +++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala @@ -43,6 +43,7 @@ object Catalog { function[ST_PolygonFromText](), function[ST_LineStringFromText](), function[ST_GeomFromText](0), + function[ST_GeometryFromText](0), function[ST_LineFromText](), function[ST_GeomFromWKT](0), function[ST_GeomFromEWKT](), diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala index a1c9ba575..a7a7811e4 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala @@ -104,6 +104,19 @@ case class ST_GeomFromEWKT(inputExpressions: Seq[Expression]) } } +/** + * Return a Geometry from a WKT string. Alias to ST_GeomFromWKT + * + * @param inputExpressions This function takes a geometry string and a srid. The string format must be WKT. + */ +case class ST_GeometryFromText(inputExpressions: Seq[Expression]) + extends InferredExpression(Constructors.geomFromWKT _) { + + protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = { + copy(inputExpressions = newChildren) + } +} + /** * Return a Geometry from a WKT string diff --git a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala index 15f417a7d..eabf1429d 100644 --- a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala +++ b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala @@ -45,6 +45,13 @@ object st_constructors extends DataFrameAPI { def ST_GeomFromText(wkt: String, srid: Int): Column = wrapExpression[ST_GeomFromText](wkt, srid) + def ST_GeometryFromText(wkt: Column): Column = wrapExpression[ST_GeometryFromText](wkt, 0) + def ST_GeometryFromText(wkt: String): Column = wrapExpression[ST_GeometryFromText](wkt, 0) + + def ST_GeometryFromText(wkt: Column, srid: Column): Column = wrapExpression[ST_GeometryFromText](wkt, srid) + + def ST_GeometryFromText(wkt: String, srid: Int): Column = wrapExpression[ST_GeometryFromText](wkt, srid) + def ST_GeomFromWKB(wkb: Column): Column = wrapExpression[ST_GeomFromWKB](wkb) def ST_GeomFromWKB(wkb: String): Column = wrapExpression[ST_GeomFromWKB](wkb) diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala index 995640790..564e9e6d6 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala @@ -234,6 +234,19 @@ class constructorTestScala extends TestBaseScala { } } + it("Passed ST_GeometryFromText") { + var polygonWktDf = sparkSession.read.format("csv").option("delimiter", "\t").option("header", "false").load(mixedWktGeometryInputLocation) + polygonWktDf.createOrReplaceTempView("polygontable") + var polygonDf = sparkSession.sql("select ST_GeometryFromText(polygontable._c0, 4326) as countyshape from polygontable") + assert(polygonDf.count() == 100) + val nullGeom = sparkSession.sql("select ST_GeometryFromText(null)") + assert(nullGeom.first().isNullAt(0)) + // Fail on wrong input type + intercept[Exception] { + sparkSession.sql("SELECT ST_GeometryFromText(0)").collect() + } + } + it("Passed ST_GeomFromWKT multipolygon read as polygon bug") { val multipolygon = """'MULTIPOLYGON (((-97.143362 27.84948, -97.14051 27.849375, -97.13742 27.849375, -97.13647 27.851056, -97.136945 27.853788, -97.138728 27.855784, -97.141223 27.853158, -97.143362 27.84948)), diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala index 6df29042e..70e7201dd 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala @@ -174,6 +174,20 @@ class dataFrameAPITestScala extends TestBaseScala { assert(actualResult.getSRID == 4269) } + it("passed ST_GeometryFromText") { + val df = sparkSession.sql("SELECT 'POINT(0.0 1.0)' AS wkt").select(ST_GeometryFromText("wkt")) + val actualResult = df.take(1)(0).get(0).asInstanceOf[Geometry].toText() + val expectedResult = "POINT (0 1)" + assert(actualResult == expectedResult) + } + + it("passed ST_GeometryFromText with srid") { + val df = sparkSession.sql("SELECT 'POINT(0.0 1.0)' AS wkt").select(ST_GeometryFromText("wkt", 4326)) + val actualResult = df.take(1)(0).get(0).asInstanceOf[Geometry] + assert(actualResult.toText == "POINT (0 1)") + assert(actualResult.getSRID == 4326) + } + it("passed st_geomfromtext") { val df = sparkSession.sql("SELECT 'POINT(0.0 1.0)' AS wkt").select(ST_GeomFromText("wkt")) val actualResult = df.take(1)(0).get(0).asInstanceOf[Geometry].toText()
