This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new c340a976 [SEDONA-333] Implement ST_GeomFromEWKT (#937)
c340a976 is described below
commit c340a9763f2d580ec59473da8e97fc18955b0ae4
Author: Junhao Liu <[email protected]>
AuthorDate: Wed Aug 2 11:51:18 2023 +0800
[SEDONA-333] Implement ST_GeomFromEWKT (#937)
---
.../org/apache/sedona/common/Constructors.java | 22 +++++++++++++++++++++
.../org/apache/sedona/common/ConstructorsTest.java | 21 ++++++++++++++++++++
docs/api/flink/Constructor.md | 20 +++++++++++++++++++
docs/api/sql/Constructor.md | 20 +++++++++++++++++++
.../main/java/org/apache/sedona/flink/Catalog.java | 1 +
.../sedona/flink/expressions/Constructors.java | 7 +++++++
.../org/apache/sedona/flink/ConstructorTest.java | 17 ++++++++++++++++
python/sedona/sql/st_constructors.py | 12 +++++++++++
python/tests/sql/test_constructor_test.py | 6 ++++++
python/tests/sql/test_dataframe_api.py | 3 +++
.../scala/org/apache/sedona/sql/UDF/Catalog.scala | 1 +
.../sql/sedona_sql/expressions/Constructors.scala | 13 ++++++++++++
.../sedona_sql/expressions/st_constructors.scala | 3 +++
.../apache/sedona/sql/constructorTestScala.scala | 23 ++++++++++++++++++++++
.../apache/sedona/sql/dataFrameAPITestScala.scala | 7 +++++++
15 files changed, 176 insertions(+)
diff --git a/common/src/main/java/org/apache/sedona/common/Constructors.java
b/common/src/main/java/org/apache/sedona/common/Constructors.java
index c391d77f..b82ba014 100644
--- a/common/src/main/java/org/apache/sedona/common/Constructors.java
+++ b/common/src/main/java/org/apache/sedona/common/Constructors.java
@@ -43,6 +43,28 @@ public class Constructors {
return new WKTReader(geometryFactory).read(wkt);
}
+ public static Geometry geomFromEWKT(String ewkt) throws ParseException {
+ if (ewkt == null) {
+ return null;
+ }
+ int SRID = 0;
+ String wkt = ewkt;
+
+ int index = ewkt.indexOf("SRID=");
+ if (index != -1) {
+ int semicolonIndex = ewkt.indexOf(';', index);
+ if (semicolonIndex != -1) {
+ SRID = Integer.parseInt(ewkt.substring(index + 5,
semicolonIndex));
+ wkt = ewkt.substring(semicolonIndex + 1);
+ }
+ else {
+ throw new ParseException("Invalid EWKT string");
+ }
+ }
+ GeometryFactory geometryFactory = new GeometryFactory(new
PrecisionModel(), SRID);
+ return new WKTReader(geometryFactory).read(wkt);
+ }
+
public static Geometry geomFromWKB(byte[] wkb) throws ParseException {
return new WKBReader().read(wkb);
}
diff --git
a/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java
b/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java
index 1caa93aa..0e2da3fc 100644
--- a/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/ConstructorsTest.java
@@ -37,6 +37,27 @@ public class ConstructorsTest {
ParseException invalid = assertThrows(ParseException.class, () ->
Constructors.geomFromWKT("not valid", 0));
assertEquals("Unknown geometry type: NOT (line 1)",
invalid.getMessage());
}
+
+ @Test
+ public void geomFromEWKT() throws ParseException {
+ assertNull(Constructors.geomFromEWKT(null));
+
+ Geometry geom = Constructors.geomFromEWKT("POINT (1 1)");
+ assertEquals(0, geom.getSRID());
+ assertEquals("POINT (1 1)", geom.toText());
+
+ geom = Constructors.geomFromEWKT("SRID=4269; POINT (1 1)");
+ assertEquals(4269, geom.getSRID());
+ assertEquals("POINT (1 1)", geom.toText());
+
+ geom = Constructors.geomFromEWKT("SRID=4269;POINT (1 1)");
+ assertEquals(4269, geom.getSRID());
+ assertEquals("POINT (1 1)", geom.toText());
+
+ ParseException invalid = assertThrows(ParseException.class, () ->
Constructors.geomFromEWKT("not valid"));
+ assertEquals("Unknown geometry type: NOT (line 1)",
invalid.getMessage());
+ }
+
@Test
public void mLineFromWKT() throws ParseException {
assertNull(Constructors.mLineFromText(null, 0));
diff --git a/docs/api/flink/Constructor.md b/docs/api/flink/Constructor.md
index d76fe44c..2015ffc3 100644
--- a/docs/api/flink/Constructor.md
+++ b/docs/api/flink/Constructor.md
@@ -230,6 +230,26 @@ Output:
POINT(40.7128 -74.006)
```
+## ST_GeomFromEWKT
+
+Introduction: Construct a Geometry from OGC Extended WKT
+
+Format:
+`ST_GeomFromEWKT (EWkt:string)`
+
+Since: `v1.5.0`
+
+SQL example:
+```sql
+SELECT ST_AsText(ST_GeomFromEWKT('SRID=4269;POINT(40.7128 -74.0060)'))
+```
+
+Output:
+
+```
+POINT(40.7128 -74.006)
+```
+
## ST_LineFromText
Introduction: Construct a LineString from Text
diff --git a/docs/api/sql/Constructor.md b/docs/api/sql/Constructor.md
index 92bc14d8..d023ec2f 100644
--- a/docs/api/sql/Constructor.md
+++ b/docs/api/sql/Constructor.md
@@ -274,6 +274,26 @@ Output:
POINT(40.7128 -74.006)
```
+## ST_GeomFromEWKT
+
+Introduction: Construct a Geometry from OGC Extended WKT
+
+Format:
+`ST_GeomFromEWKT (EWkt:string)`
+
+Since: `v1.5.0`
+
+SQL example:
+```sql
+SELECT ST_AsText(ST_GeomFromEWKT('SRID=4269;POINT(40.7128 -74.0060)'))
+```
+
+Output:
+
+```
+POINT(40.7128 -74.006)
+```
+
## ST_LineFromText
Introduction: Construct a Line from Wkt text
diff --git a/flink/src/main/java/org/apache/sedona/flink/Catalog.java
b/flink/src/main/java/org/apache/sedona/flink/Catalog.java
index 89c350e4..7cc1f1f9 100644
--- a/flink/src/main/java/org/apache/sedona/flink/Catalog.java
+++ b/flink/src/main/java/org/apache/sedona/flink/Catalog.java
@@ -32,6 +32,7 @@ public class Catalog {
new Constructors.ST_PolygonFromText(),
new Constructors.ST_PolygonFromEnvelope(),
new Constructors.ST_GeomFromWKT(),
+ new Constructors.ST_GeomFromEWKT(),
new Constructors.ST_GeomFromText(),
new Constructors.ST_GeomFromWKB(),
new Constructors.ST_GeomFromGeoJSON(),
diff --git
a/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java
b/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java
index fcc426a0..ec9f8766 100644
--- a/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java
+++ b/flink/src/main/java/org/apache/sedona/flink/expressions/Constructors.java
@@ -134,6 +134,13 @@ public class Constructors {
}
}
+ public static class ST_GeomFromEWKT extends ScalarFunction {
+ @DataTypeHint(value = "RAW", bridgedTo =
org.locationtech.jts.geom.Geometry.class)
+ public Geometry eval(@DataTypeHint("String") String wktString) throws
ParseException {
+ return
org.apache.sedona.common.Constructors.geomFromEWKT(wktString);
+ }
+ }
+
public static class ST_GeomFromText extends ScalarFunction {
@DataTypeHint(value = "RAW", bridgedTo =
org.locationtech.jts.geom.Geometry.class)
public Geometry eval(@DataTypeHint("String") String wktString) throws
ParseException {
diff --git a/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java
b/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java
index a39a8a04..12b33aa4 100644
--- a/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java
+++ b/flink/src/test/java/org/apache/sedona/flink/ConstructorTest.java
@@ -148,6 +148,23 @@ public class ConstructorTest extends TestBase{
assertEquals(data.get(data.size() - 1).getField(0).toString(),
result.getField(0).toString());
}
+ @Test
+ public void testGeomFromEWKT() {
+ List<Row> data = new ArrayList<>();
+ data.add(Row.of("SRID=123;MULTILINESTRING((1 2, 3 4), (4 5, 6 7))",
"multiline", 0L));
+
+ Table geomTable = createTextTable(data, multilinestringColNames);
+ geomTable = geomTable
+
.select(call(Constructors.ST_GeomFromEWKT.class.getSimpleName(),
+ $(multilinestringColNames[0]))
+ .as(multilinestringColNames[0]),
$(multilinestringColNames[1]));
+ String result = first(geomTable)
+ .getFieldAs(0)
+ .toString();
+ String expectedGeom = "MULTILINESTRING ((1 2, 3 4), (4 5, 6 7))";
+ assertEquals(expectedGeom, result);
+ }
+
@Test
public void testGeomFromText() {
List<Row> data = createPolygonWKT(testDataSize);
diff --git a/python/sedona/sql/st_constructors.py
b/python/sedona/sql/st_constructors.py
index 4981894e..87725103 100644
--- a/python/sedona/sql/st_constructors.py
+++ b/python/sedona/sql/st_constructors.py
@@ -31,6 +31,7 @@ __all__ = [
"ST_GeomFromText",
"ST_GeomFromWKB",
"ST_GeomFromWKT",
+ "ST_GeomFromEWKT",
"ST_LineFromText",
"ST_LineStringFromText",
"ST_Point",
@@ -132,6 +133,17 @@ def ST_GeomFromWKT(wkt: ColumnOrName) -> Column:
"""
return _call_constructor_function("ST_GeomFromWKT", wkt)
+@validate_argument_types
+def ST_GeomFromEWKT(ewkt: ColumnOrName) -> Column:
+ """Generate a geometry column from a OGC Extended Well-Known Text (WKT)
string column.
+
+ :param ewkt: OGC Extended WKT string column to generate from.
+ :type ewkt: ColumnOrName
+ :return: Geometry column representing the EWKT string.
+ :rtype: Column
+ """
+ return _call_constructor_function("ST_GeomFromEWKT", ewkt)
+
@validate_argument_types
def ST_LineFromText(wkt: ColumnOrName) -> Column:
diff --git a/python/tests/sql/test_constructor_test.py
b/python/tests/sql/test_constructor_test.py
index 2ed0d6ad..93af3d7b 100644
--- a/python/tests/sql/test_constructor_test.py
+++ b/python/tests/sql/test_constructor_test.py
@@ -60,6 +60,12 @@ class TestConstructors(TestBase):
polygon_df.show(10)
assert polygon_df.count() == 100
+ def test_st_geom_from_ewkt(self):
+ input_df = self.spark.createDataFrame([("SRID=4269;LineString(1 2, 3
4)",)], ["ewkt"])
+ input_df.createOrReplaceTempView("input_ewkt")
+ line_df = self.spark.sql("select ST_GeomFromEWKT(ewkt) as geom from
input_ewkt")
+ assert line_df.count() == 1
+
def test_st_geom_from_wkt_3d(self):
input_df = self.spark.createDataFrame([
("Point(21 52 87)",),
diff --git a/python/tests/sql/test_dataframe_api.py
b/python/tests/sql/test_dataframe_api.py
index 2c84e147..82aff413 100644
--- a/python/tests/sql/test_dataframe_api.py
+++ b/python/tests/sql/test_dataframe_api.py
@@ -40,6 +40,7 @@ test_configurations = [
(stc.ST_GeomFromText, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3
4)"),
(stc.ST_GeomFromWKB, ("wkb",), "constructor", "ST_ReducePrecision(geom,
2)", "LINESTRING (-2.1 -0.35, -1.5 -0.67)"),
(stc.ST_GeomFromWKT, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3
4)"),
+ (stc.ST_GeomFromEWKT, ("ewkt",), "linestring_ewkt", "", "LINESTRING (1 2,
3 4)"),
(stc.ST_LineFromText, ("wkt",), "linestring_wkt", "", "LINESTRING (1 2, 3
4)"),
(stc.ST_LineStringFromText, ("multiple_point", lambda: f.lit(',')),
"constructor", "", "LINESTRING (0 0, 1 0, 1 1, 0 0)"),
(stc.ST_Point, ("x", "y"), "constructor", "", "POINT (0 1)"),
@@ -364,6 +365,8 @@ class TestDataFrameAPI(TestBase):
return TestDataFrameAPI.spark.sql("SELECT
ST_GeomFromWKT('LINESTRING (0 0, 1 0, 2 0, 3 0, 4 0, 5 0)') AS line")
elif request.param == "linestring_wkt":
return TestDataFrameAPI.spark.sql("SELECT 'LINESTRING (1 2, 3 4)'
AS wkt")
+ elif request.param == "linestring_ewkt":
+ return TestDataFrameAPI.spark.sql("SELECT 'SRID=4269;LINESTRING (1
2, 3 4)' AS ewkt")
elif request.param == "min_max_x_y":
return TestDataFrameAPI.spark.sql("SELECT 0.0 AS minx, 1.0 AS
miny, 2.0 AS maxx, 3.0 AS maxy")
elif request.param == "multipoint_geom":
diff --git a/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
b/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index b392399a..19510e1b 100644
--- a/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/sql/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -43,6 +43,7 @@ object Catalog {
function[ST_GeomFromText](0),
function[ST_LineFromText](),
function[ST_GeomFromWKT](0),
+ function[ST_GeomFromEWKT](),
function[ST_GeomFromWKB](),
function[ST_GeomFromGeoJSON](),
function[ST_GeomFromGML](),
diff --git
a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala
b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala
index ccc7ffe9..79d1fb92 100644
---
a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala
+++
b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Constructors.scala
@@ -91,6 +91,19 @@ case class ST_GeomFromWKT(inputExpressions: Seq[Expression])
}
}
+/**
+ * Return a Geometry from a OGC Extended WKT string
+ *
+ * @param inputExpressions This function takes a geometry string. The string
format must be OGC Extended Well-Known text (EWKT) representation.
+ */
+case class ST_GeomFromEWKT(inputExpressions: Seq[Expression])
+ extends InferredExpression(Constructors.geomFromEWKT _) {
+
+ protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) =
{
+ copy(inputExpressions = newChildren)
+ }
+}
+
/**
* Return a Geometry from a WKT string
diff --git
a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
index aa9eada8..900c6f97 100644
---
a/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
+++
b/sql/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
@@ -55,6 +55,9 @@ object st_constructors extends DataFrameAPI {
def ST_GeomFromWKT(wkt: String, srid: Int): Column =
wrapExpression[ST_GeomFromWKT](wkt, srid)
+ def ST_GeomFromEWKT(wkt: Column): Column =
wrapExpression[ST_GeomFromEWKT](wkt)
+ def ST_GeomFromEWKT(wkt: String): Column =
wrapExpression[ST_GeomFromEWKT](wkt)
+
def ST_LineFromText(wkt: Column): Column =
wrapExpression[ST_LineFromText](wkt)
def ST_LineFromText(wkt: String): Column =
wrapExpression[ST_LineFromText](wkt)
diff --git
a/sql/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala
b/sql/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala
index 69b1a166..66c92da0 100644
--- a/sql/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala
+++ b/sql/common/src/test/scala/org/apache/sedona/sql/constructorTestScala.scala
@@ -93,6 +93,29 @@ class constructorTestScala extends TestBaseScala {
assert(thrown.getMessage == "Unknown geometry type: NOT (line 1)")
}
+ it("Passed ST_GeomFromEWKT") {
+ var polygonWktDf = sparkSession.read.format("csv").option("delimiter",
"\t").option("header", "false").load(mixedWktGeometryInputLocation)
+ polygonWktDf.createOrReplaceTempView("polygontable")
+ var polygonDf = sparkSession.sql("select
ST_GeomFromEWKT(polygontable._c0) as countyshape from polygontable")
+ assert(polygonDf.count() == 100)
+ val nullGeom = sparkSession.sql("select ST_GeomFromEWKT(null)")
+ assert(nullGeom.first().isNullAt(0))
+ val pointDf = sparkSession.sql("select
ST_GeomFromEWKT('SRID=4269;POINT(-71.064544 42.28787)')")
+ assert(pointDf.count() == 1)
+ // Fail on wrong input type
+ intercept[Exception] {
+ sparkSession.sql("SELECT ST_GeomFromEWKT(0)").collect()
+ }
+ }
+
+ it("Passed ST_GeomFromEWKT invalid input") {
+ // Fail on non wkt strings
+ val thrown = intercept[Exception] {
+ sparkSession.sql("SELECT ST_GeomFromEWKT('not wkt')").collect()
+ }
+ assert(thrown.getMessage == "Unknown geometry type: NOT (line 1)")
+ }
+
it("Passed ST_LineFromText") {
val geometryDf = Seq("Linestring(1 2, 3 4)").map(wkt =>
Tuple1(wkt)).toDF("geom")
geometryDf.createOrReplaceTempView("linetable")
diff --git
a/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
b/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
index 44664529..2651c33a 100644
---
a/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
+++
b/sql/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
@@ -94,6 +94,13 @@ class dataFrameAPITestScala extends TestBaseScala {
assert(actualResult.getSRID == 4326)
}
+ it("passed st_geomfromewkt") {
+ val df = sparkSession.sql("SELECT 'SRID=4269;POINT(0.0 1.0)' AS
wkt").select(ST_GeomFromEWKT("wkt"))
+ val actualResult = df.take(1)(0).get(0).asInstanceOf[Geometry]
+ assert(actualResult.toText == "POINT (0 1)")
+ assert(actualResult.getSRID == 4269)
+ }
+
it("passed st_geomfromtext") {
val df = sparkSession.sql("SELECT 'POINT(0.0 1.0)' AS
wkt").select(ST_GeomFromText("wkt"))
val actualResult = df.take(1)(0).get(0).asInstanceOf[Geometry].toText()