This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new c340a976 [SEDONA-333] Implement ST_GeomFromEWKT (#937)
c340a976 is described below

commit c340a9763f2d580ec59473da8e97fc18955b0ae4
Author: Junhao Liu <[email protected]>
AuthorDate: Wed Aug 2 11:51:18 2023 +0800

    [SEDONA-333] Implement ST_GeomFromEWKT (#937)
---
 .../org/apache/sedona/common/Constructors.java     | 22 +++++++++++++++++++++
 .../org/apache/sedona/common/ConstructorsTest.java | 21 ++++++++++++++++++++
 docs/api/flink/Constructor.md                      | 20 +++++++++++++++++++
 docs/api/sql/Constructor.md                        | 20 +++++++++++++++++++
 .../main/java/org/apache/sedona/flink/Catalog.java |  1 +
 .../sedona/flink/expressions/Constructors.java     |  7 +++++++
 .../org/apache/sedona/flink/ConstructorTest.java   | 17 ++++++++++++++++
 python/sedona/sql/st_constructors.py               | 12 +++++++++++
 python/tests/sql/test_constructor_test.py          |  6 ++++++
 python/tests/sql/test_dataframe_api.py             |  3 +++
 .../scala/org/apache/sedona/sql/UDF/Catalog.scala  |  1 +
 .../sql/sedona_sql/expressions/Constructors.scala  | 13 ++++++++++++
 .../sedona_sql/expressions/st_constructors.scala   |  3 +++
 .../apache/sedona/sql/constructorTestScala.scala   | 23 ++++++++++++++++++++++
 .../apache/sedona/sql/dataFrameAPITestScala.scala  |  7 +++++++
 15 files changed, 176 insertions(+)

diff --git a/common/src/main/java/org/apache/sedona/common/Constructors.java 
b/common/src/main/java/org/apache/sedona/common/Constructors.java
index c391d77f..b82ba014 100644
--- a/common/src/main/java/org/apache/sedona/common/Constructors.java
+++ b/common/src/main/java/org/apache/sedona/common/Constructors.java
@@ -43,6 +43,28 @@ public class Constructors {
         return new WKTReader(geometryFactory).read(wkt);
     }
 
+    public static Geometry geomFromEWKT(String ewkt) throws ParseException {
+        if (ewkt == null) {
+            return null;
+        }
+        int SRID = 0;
+        String wkt = ewkt;
+        
+        int index = ewkt.indexOf("SRID=");
+        if (index != -1) {
+            int semicolonIndex = ewkt.indexOf(';', index);
+            if (semicolonIndex != -1) {
+                SRID = Integer.parseInt(ewkt.substring(index + 5, 
semicolonIndex));
+                wkt = ewkt.substring(semicolonIndex + 1);
+            }
+            else {
+                throw new ParseException("Invalid EWKT string");
+            }
+        }
+        GeometryFactory geometryFactory = new GeometryFactory(new 
PrecisionModel(), SRID);
+        return new WKTReader(geometryFactory).read(wkt);
+    }
+
     public static Geometry geomFromWKB(byte[] wkb) throws ParseException {
         return new WKBReader().read(wkb);
     }
diff --git 
a/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java 
b/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java
index 1caa93aa..0e2da3fc 100644
--- a/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java
@@ -37,6 +37,27 @@ public class ConstructorsTest {
         ParseException invalid = assertThrows(ParseException.class, () -> 
Constructors.geomFromWKT("not valid", 0));
         assertEquals("Unknown geometry type: NOT (line 1)", 
invalid.getMessage());
     }
+
+    @Test
+    public void geomFromEWKT() throws ParseException {
+        assertNull(Constructors.geomFromEWKT(null));
+
+        Geometry geom = Constructors.geomFromEWKT("POINT (1 1)");
+        assertEquals(0, geom.getSRID());
+        assertEquals("POINT (1 1)", geom.toText());
+
+        geom = Constructors.geomFromEWKT("SRID=4269; POINT (1 1)");
+        assertEquals(4269, geom.getSRID());
+        assertEquals("POINT (1 1)", geom.toText());
+
+        geom = Constructors.geomFromEWKT("SRID=4269;POINT (1 1)");
+        assertEquals(4269, geom.getSRID());
+        assertEquals("POINT (1 1)", geom.toText());
+
+        ParseException invalid = assertThrows(ParseException.class, () -> 
Constructors.geomFromEWKT("not valid"));
+        assertEquals("Unknown geometry type: NOT (line 1)", 
invalid.getMessage());
+    }
+
     @Test
     public void mLineFromWKT() throws ParseException {
         assertNull(Constructors.mLineFromText(null, 0));
diff --git a/docs/api/flink/Constructor.md b/docs/api/flink/Constructor.md
index d76fe44c..2015ffc3 100644
--- a/docs/api/flink/Constructor.md
+++ b/docs/api/flink/Constructor.md
@@ -230,6 +230,26 @@ Output:
 POINT(40.7128 -74.006)
 ```
 
+## ST_GeomFromEWKT
+
+Introduction: Construct a Geometry from OGC Extended WKT
+
+Format:
+`ST_GeomFromEWKT (EWkt:string)`
+
+Since: `v1.5.0`
+
+SQL example:
+```sql
+SELECT ST_AsText(ST_GeomFromEWKT('SRID=4269;POINT(40.7128 -74.0060)'))
+```
+
+Output:
+
+```
+POINT(40.7128 -74.006)
+```
+
 ## ST_LineFromText
 
 Introduction: Construct a LineString from Text
diff --git a/docs/api/sql/Constructor.md b/docs/api/sql/Constructor.md
index 92bc14d8..d023ec2f 100644
--- a/docs/api/sql/Constructor.md
+++ b/docs/api/sql/Constructor.md
@@ -274,6 +274,26 @@ Output:
 POINT(40.7128 -74.006)
 ```
 
+## ST_GeomFromEWKT
+
+Introduction: Construct a Geometry from OGC Extended WKT
+
+Format:
+`ST_GeomFromEWKT (EWkt:string)`
+
+Since: `v1.5.0`
+
+SQL example:
+```sql
+SELECT ST_AsText(ST_GeomFromEWKT('SRID=4269;POINT(40.7128 -74.0060)'))
+```
+
+Output:
+
+```
+POINT(40.7128 -74.006)
+```
+
 ## ST_LineFromText
 
 Introduction: Construct a Line from Wkt text
diff --git a/flink/src/main/java/org/apache/sedona/flink/Catalog.java 
b/flink/src/main/java/org/apache/sedona/flink/Catalog.java
index 89c350e4..7cc1f1f9 100644
--- a/flink/src/main/java/org/apache/sedona/flink/Catalog.java
+++ b/flink/src/main/java/org/apache/sedona/flink/Catalog.java
@@ -32,6 +32,7 @@ public class Catalog {
                 new Constructors.ST_PolygonFromText(),
                 new Constructors.ST_PolygonFromEnvelope(),
                 new Constructors.ST_GeomFromWKT(),
+                new Constructors.ST_GeomFromEWKT(),
                 new Constructors.ST_GeomFromText(),
                 new Constructors.ST_GeomFromWKB(),
                 new Constructors.ST_GeomFromGeoJSON(),
diff --git 
a/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java 
b/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java
index fcc426a0..ec9f8766 100644
--- a/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java
+++ b/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java
@@ -134,6 +134,13 @@ public class Constructors {
         }
     }
 
+    public static class ST_GeomFromEWKT extends ScalarFunction {
+        @DataTypeHint(value = "RAW", bridgedTo = 
org.locationtech.jts.geom.Geometry.class)
+        public Geometry eval(@DataTypeHint("String") String wktString) throws 
ParseException {
+            return 
org.apache.sedona.common.Constructors.geomFromEWKT(wktString);
+        }
+    }
+
     public static class ST_GeomFromText extends ScalarFunction {
         @DataTypeHint(value = "RAW", bridgedTo = 
org.locationtech.jts.geom.Geometry.class)
         public Geometry eval(@DataTypeHint("String") String wktString) throws 
ParseException {
diff --git a/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java 
b/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java
index a39a8a04..12b33aa4 100644
--- a/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java
+++ b/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java
@@ -148,6 +148,23 @@ public class ConstructorTest extends TestBase{
         assertEquals(data.get(data.size() - 1).getField(0).toString(), 
result.getField(0).toString());
     }
 
+    @Test
+    public void testGeomFromEWKT() {
+        List<Row> data = new ArrayList<>();
+        data.add(Row.of("SRID=123;MULTILINESTRING((1 2, 3 4), (4 5, 6 7))", 
"multiline", 0L));
+
+        Table geomTable = createTextTable(data, multilinestringColNames);
+        geomTable = geomTable
+                
.select(call(Constructors.ST_GeomFromEWKT.class.getSimpleName(),
+                        $(multilinestringColNames[0]))
+                        .as(multilinestringColNames[0]), 
$(multilinestringColNames[1]));
+        String result = first(geomTable)
+                .getFieldAs(0)
+                .toString();
+        String expectedGeom = "MULTILINESTRING ((1 2, 3 4), (4 5, 6 7))";
+        assertEquals(expectedGeom, result);
+    }
+
     @Test
     public void testGeomFromText() {
         List<Row> data = createPolygonWKT(testDataSize);
diff --git a/python/sedona/sql/st_constructors.py 
b/python/sedona/sql/st_constructors.py
index 4981894e..87725103 100644
--- a/python/sedona/sql/st_constructors.py
+++ b/python/sedona/sql/st_constructors.py
@@ -31,6 +31,7 @@ __all__ = [
     "ST_GeomFromText",
     "ST_GeomFromWKB",
     "ST_GeomFromWKT",
+    "ST_GeomFromEWKT",
     "ST_LineFromText",
     "ST_LineStringFromText",
     "ST_Point",
@@ -132,6 +133,17 @@ def ST_GeomFromWKT(wkt: ColumnOrName) -> Column:
     """
     return _call_constructor_function("ST_GeomFromWKT", wkt)
 
+@validate_argument_types
+def ST_GeomFromEWKT(ewkt: ColumnOrName) -> Column:
+    """Generate a geometry column from a OGC Extended Well-Known Text (WKT) 
string column.
+
+    :param ewkt: OGC Extended WKT string column to generate from.
+    :type ewkt: ColumnOrName
+    :return: Geometry column representing the EWKT string.
+    :rtype: Column
+    """
+    return _call_constructor_function("ST_GeomFromEWKT", ewkt)
+
 
 @validate_argument_types
 def ST_LineFromText(wkt: ColumnOrName) -> Column:
diff --git a/python/tests/sql/test_constructor_test.py 
b/python/tests/sql/test_constructor_test.py
index 2ed0d6ad..93af3d7b 100644
--- a/python/tests/sql/test_constructor_test.py
+++ b/python/tests/sql/test_constructor_test.py
@@ -60,6 +60,12 @@ class TestConstructors(TestBase):
         polygon_df.show(10)
         assert polygon_df.count() == 100
 
+    def test_st_geom_from_ewkt(self):
+        input_df = self.spark.createDataFrame([("SRID=4269;LineString(1 2, 3 
4)",)], ["ewkt"])
+        input_df.createOrReplaceTempView("input_ewkt")
+        line_df = self.spark.sql("select ST_GeomFromEWKT(ewkt) as geom from 
input_ewkt")
+        assert line_df.count() == 1
+
     def test_st_geom_from_wkt_3d(self):
         input_df = self.spark.createDataFrame([
             ("Point(21 52 87)",),
diff --git a/python/tests/sql/test_dataframe_api.py 
b/python/tests/sql/test_dataframe_api.py
index 2c84e147..82aff413 100644
--- a/python/tests/sql/test_dataframe_api.py
+++ b/python/tests/sql/test_dataframe_api.py
@@ -40,6 +40,7 @@ test_configurations = [
     (stc.ST_GeomFromText, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3 
4)"),
     (stc.ST_GeomFromWKB, ("wkb",), "constructor", "ST_ReducePrecision(geom, 
2)", "LINESTRING (-2.1 -0.35, -1.5 -0.67)"),
     (stc.ST_GeomFromWKT, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3 
4)"),
+    (stc.ST_GeomFromEWKT, ("ewkt",), "linestring_ewkt", "", "LINESTRING (1 2, 
3 4)"),
     (stc.ST_LineFromText, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3 
4)"),
     (stc.ST_LineStringFromText, ("multiple_point", lambda: f.lit(',')), 
"constructor", "", "LINESTRING (0 0, 1 0, 1 1, 0 0)"),
     (stc.ST_Point, ("x", "y"), "constructor", "", "POINT (0 1)"),
@@ -364,6 +365,8 @@ class TestDataFrameAPI(TestBase):
             return TestDataFrameAPI.spark.sql("SELECT 
ST_GeomFromWKT('LINESTRING (0 0, 1 0, 2 0, 3 0, 4 0, 5 0)') AS line")
         elif request.param == "linestring_wkt":
             return TestDataFrameAPI.spark.sql("SELECT 'LINESTRING (1 2, 3 4)' 
AS wkt")
+        elif request.param == "linestring_ewkt":
+            return TestDataFrameAPI.spark.sql("SELECT 'SRID=4269;LINESTRING (1 
2, 3 4)' AS ewkt")
         elif request.param == "min_max_x_y":
             return TestDataFrameAPI.spark.sql("SELECT 0.0 AS minx, 1.0 AS 
miny, 2.0 AS maxx, 3.0 AS maxy")
         elif request.param == "multipoint_geom":
diff --git a/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala 
b/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index b392399a..19510e1b 100644
--- a/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -43,6 +43,7 @@ object Catalog {
     function[ST_GeomFromText](0),
     function[ST_LineFromText](),
     function[ST_GeomFromWKT](0),
+    function[ST_GeomFromEWKT](),
     function[ST_GeomFromWKB](),
     function[ST_GeomFromGeoJSON](),
     function[ST_GeomFromGML](),
diff --git 
a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala
 
b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala
index ccc7ffe9..79d1fb92 100644
--- 
a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala
+++ 
b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala
@@ -91,6 +91,19 @@ case class ST_GeomFromWKT(inputExpressions: Seq[Expression])
   }
 }
 
+/**
+  * Return a Geometry from a OGC Extended WKT string
+  *
+  * @param inputExpressions This function takes a geometry string. The string 
format must be OGC Extended Well-Known text (EWKT) representation.
+  */
+case class ST_GeomFromEWKT(inputExpressions: Seq[Expression])
+  extends InferredExpression(Constructors.geomFromEWKT _) {
+
+  protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = 
{
+    copy(inputExpressions = newChildren)
+  }
+}
+
 
 /**
   * Return a Geometry from a WKT string
diff --git 
a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
 
b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
index aa9eada8..900c6f97 100644
--- 
a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
+++ 
b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
@@ -55,6 +55,9 @@ object st_constructors extends DataFrameAPI {
 
   def ST_GeomFromWKT(wkt: String, srid: Int): Column = 
wrapExpression[ST_GeomFromWKT](wkt, srid)
 
+  def ST_GeomFromEWKT(wkt: Column): Column = 
wrapExpression[ST_GeomFromEWKT](wkt)
+  def ST_GeomFromEWKT(wkt: String): Column = 
wrapExpression[ST_GeomFromEWKT](wkt)
+
   def ST_LineFromText(wkt: Column): Column = 
wrapExpression[ST_LineFromText](wkt)
   def ST_LineFromText(wkt: String): Column = 
wrapExpression[ST_LineFromText](wkt)
 
diff --git 
a/sql/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala 
b/sql/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala
index 69b1a166..66c92da0 100644
--- a/sql/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala
+++ b/sql/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala
@@ -93,6 +93,29 @@ class constructorTestScala extends TestBaseScala {
       assert(thrown.getMessage == "Unknown geometry type: NOT (line 1)")
     }
 
+    it("Passed ST_GeomFromEWKT") {
+      var polygonWktDf = sparkSession.read.format("csv").option("delimiter", 
"\t").option("header", "false").load(mixedWktGeometryInputLocation)
+      polygonWktDf.createOrReplaceTempView("polygontable")
+      var polygonDf = sparkSession.sql("select 
ST_GeomFromEWKT(polygontable._c0) as countyshape from polygontable")
+      assert(polygonDf.count() == 100)
+      val nullGeom = sparkSession.sql("select ST_GeomFromEWKT(null)")
+      assert(nullGeom.first().isNullAt(0))
+      val pointDf = sparkSession.sql("select 
ST_GeomFromEWKT('SRID=4269;POINT(-71.064544 42.28787)')")
+      assert(pointDf.count() == 1)
+      // Fail on wrong input type
+      intercept[Exception] {
+        sparkSession.sql("SELECT ST_GeomFromEWKT(0)").collect()
+      }
+    }
+
+    it("Passed ST_GeomFromEWKT invalid input") {
+      // Fail on non wkt strings
+      val thrown = intercept[Exception] {
+        sparkSession.sql("SELECT ST_GeomFromEWKT('not wkt')").collect()
+      }
+      assert(thrown.getMessage == "Unknown geometry type: NOT (line 1)")
+    }
+
     it("Passed ST_LineFromText") {
       val geometryDf = Seq("Linestring(1 2, 3 4)").map(wkt => 
Tuple1(wkt)).toDF("geom")
       geometryDf.createOrReplaceTempView("linetable")
diff --git 
a/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala 
b/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
index 44664529..2651c33a 100644
--- 
a/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
+++ 
b/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
@@ -94,6 +94,13 @@ class dataFrameAPITestScala extends TestBaseScala {
       assert(actualResult.getSRID == 4326)
     }
 
+    it("passed st_geomfromewkt") {
+      val df = sparkSession.sql("SELECT 'SRID=4269;POINT(0.0 1.0)' AS 
wkt").select(ST_GeomFromEWKT("wkt"))
+      val actualResult = df.take(1)(0).get(0).asInstanceOf[Geometry]
+      assert(actualResult.toText == "POINT (0 1)")
+      assert(actualResult.getSRID == 4269)
+    }
+
     it("passed st_geomfromtext") {
       val df = sparkSession.sql("SELECT 'POINT(0.0 1.0)' AS 
wkt").select(ST_GeomFromText("wkt"))
       val actualResult = df.take(1)(0).get(0).asInstanceOf[Geometry].toText()

Reply via email to