This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 16e6a254 [SEDONA-120] Add ST_BuildArea function and tests (#624)
16e6a254 is described below
commit 16e6a254cd9e6c5c0802da57e7c78d7e6501737d
Author: R B Krishna <[email protected]>
AuthorDate: Tue Apr 26 08:45:27 2022 -0700
[SEDONA-120] Add ST_BuildArea function and tests (#624)
---
.../org/apache/sedona/core/utils/GeomUtils.java | 69 +++++++++++++++++++++-
docs/api/flink/Function.md | 19 ++++++
docs/api/sql/Function.md | 28 ++++++++-
.../main/java/org/apache/sedona/flink/Catalog.java | 1 +
.../apache/sedona/flink/expressions/Functions.java | 8 +++
.../java/org/apache/sedona/flink/FunctionTest.java | 9 +++
python/tests/sql/test_function.py | 36 +++++++++--
.../scala/org/apache/sedona/sql/UDF/Catalog.scala | 1 +
.../sql/sedona_sql/expressions/Functions.scala | 35 ++++++++++-
.../org/apache/sedona/sql/functionTestScala.scala | 37 +++++++++++-
10 files changed, 232 insertions(+), 11 deletions(-)
diff --git a/core/src/main/java/org/apache/sedona/core/utils/GeomUtils.java
b/core/src/main/java/org/apache/sedona/core/utils/GeomUtils.java
index edc7c3c0..f5b9b119 100644
--- a/core/src/main/java/org/apache/sedona/core/utils/GeomUtils.java
+++ b/core/src/main/java/org/apache/sedona/core/utils/GeomUtils.java
@@ -20,7 +20,10 @@ import org.locationtech.jts.geom.CoordinateSequence;
import org.locationtech.jts.geom.CoordinateSequenceFilter;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.io.WKTWriter;
-import java.util.Objects;
+import org.locationtech.jts.operation.polygonize.Polygonizer;
+import org.locationtech.jts.operation.union.UnaryUnionOp;
+
+import java.util.*;
import static org.locationtech.jts.geom.Coordinate.NULL_ORDINATE;
@@ -154,4 +157,66 @@ public class GeomUtils
geom.geometryChanged();
return geom;
}
-}
+
+ public static Geometry buildArea(Geometry geom) {
+ if (geom == null || geom.isEmpty()) {
+ return geom;
+ }
+ Polygonizer polygonizer = new Polygonizer();
+ polygonizer.add(geom);
+ List<Polygon> polygons = (List<Polygon>) polygonizer.getPolygons();
+ if (polygons.isEmpty()) {
+ return null;
+ } else if (polygons.size() == 1) {
+ return polygons.get(0);
+ }
+ int srid = geom.getSRID();
+ Map<Polygon, Polygon> parentMap = findFaceHoles(polygons);
+ List<Polygon> facesWithEvenAncestors = new ArrayList<>();
+ for (Polygon face : polygons) {
+ face.normalize();
+ if (countParents(parentMap, face) % 2 == 0) {
+ facesWithEvenAncestors.add(face);
+ }
+ }
+ UnaryUnionOp unaryUnionOp = new UnaryUnionOp(facesWithEvenAncestors);
+ Geometry outputGeom = unaryUnionOp.union();
+ if (outputGeom != null) {
+ outputGeom.normalize();
+ outputGeom.setSRID(srid);
+ }
+ return outputGeom;
+ }
+
+ private static Map<Polygon, Polygon> findFaceHoles(List<Polygon> faces) {
+ Map<Polygon, Polygon> parentMap = new HashMap<>();
+ faces.sort(Comparator.comparing((Polygon p) ->
p.getEnvelope().getArea()).reversed());
+ for (int i = 0; i < faces.size(); i++) {
+ Polygon face = faces.get(i);
+ int nHoles = face.getNumInteriorRing();
+ for (int h = 0; h < nHoles; h++) {
+ Geometry hole = face.getInteriorRingN(h);
+ for (int j = i + 1; j < faces.size(); j++) {
+ Polygon face2 = faces.get(j);
+ if (parentMap.containsKey(face2)) {
+ continue;
+ }
+ Geometry face2ExteriorRing = face2.getExteriorRing();
+ if (face2ExteriorRing.equals(hole)) {
+ parentMap.put(face2, face);
+ }
+ }
+ }
+ }
+ return parentMap;
+ }
+
+ private static int countParents(Map<Polygon, Polygon> parentMap, Polygon
face) {
+ int pCount = 0;
+ while (parentMap.containsKey(face)) {
+ pCount++;
+ face = parentMap.get(face);
+ }
+ return pCount;
+ }
+}
\ No newline at end of file
diff --git a/docs/api/flink/Function.md b/docs/api/flink/Function.md
index bac2942f..f4a0590f 100644
--- a/docs/api/flink/Function.md
+++ b/docs/api/flink/Function.md
@@ -317,3 +317,22 @@ FROM df
Input: `POLYGON ((-1 -11, 0 10, 1 11, 2 12, -1 -11))`
Output: `-1`
+
+## ST_BuildArea
+
+Introduction: Returns the areal geometry formed by the constituent linework of
the input geometry.
+
+Format: `ST_BuildArea (A:geometry)`
+
+Since: `v1.2.1`
+
+Example:
+
+```SQL
+SELECT ST_BuildArea(ST_Collect(smallDf, bigDf)) AS geom
+FROM smallDf, bigDf
+```
+
+Input: `MULTILINESTRING((0 0, 10 0, 10 10, 0 10, 0 0),(10 10, 20 10, 20 20, 10
20, 10 10))`
+
+Output: `MULTIPOLYGON(((0 0,0 10,10 10,10 0,0 0)),((10 10,10 20,20 20,20 10,10
10)))`
\ No newline at end of file
diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md
index f4c35430..282b4d18 100644
--- a/docs/api/sql/Function.md
+++ b/docs/api/sql/Function.md
@@ -1240,4 +1240,30 @@ FROM df
Input: `POLYGON ((-1 -11, 0 10, 1 11, 2 12, -1 -11))`
-Output: `-1`
\ No newline at end of file
+Output: `-1`
+
+## ST_BuildArea
+
+Introduction: Returns the areal geometry formed by the constituent linework of
the input geometry.
+
+Format: `ST_BuildArea (A:geometry)`
+
+Since: `v1.2.1`
+
+Example:
+
+```SQL
+SELECT ST_BuildArea(
+ ST_GeomFromText('MULTILINESTRING((0 0, 20 0, 20 20, 0 20, 0 0),(2 2, 18 2,
18 18, 2 18, 2 2))')
+) AS geom
+```
+
+Result:
+
+```
++----------------------------------------------------------------------------+
+|geom |
++----------------------------------------------------------------------------+
+|POLYGON((0 0,0 20,20 20,20 0,0 0),(2 2,18 2,18 18,2 18,2 2)) |
++----------------------------------------------------------------------------+
+```
\ No newline at end of file
diff --git a/flink/src/main/java/org/apache/sedona/flink/Catalog.java
b/flink/src/main/java/org/apache/sedona/flink/Catalog.java
index 1c326c57..f5628ee5 100644
--- a/flink/src/main/java/org/apache/sedona/flink/Catalog.java
+++ b/flink/src/main/java/org/apache/sedona/flink/Catalog.java
@@ -42,6 +42,7 @@ public class Catalog {
new Functions.ST_YMin(),
new Functions.ST_XMax(),
new Functions.ST_XMin(),
+ new Functions.ST_BuildArea()
};
}
diff --git
a/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java
b/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java
index 4d369e17..c0542afa 100644
--- a/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java
+++ b/flink/src/main/java/org/apache/sedona/flink/expressions/Functions.java
@@ -206,4 +206,12 @@ public class Functions {
return min;
}
}
+
+ public static class ST_BuildArea extends ScalarFunction {
+ @DataTypeHint(value = "RAW", bridgedTo =
org.locationtech.jts.geom.Geometry.class)
+ public Geometry eval(@DataTypeHint(value = "RAW", bridgedTo =
org.locationtech.jts.geom.Geometry.class) Object o) {
+ Geometry geom = (Geometry) o;
+ return GeomUtils.buildArea(geom);
+ }
+ }
}
diff --git a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
index 97176551..ffe8e10c 100644
--- a/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
+++ b/flink/src/test/java/org/apache/sedona/flink/FunctionTest.java
@@ -30,6 +30,7 @@ import java.util.Optional;
import static org.apache.flink.table.api.Expressions.$;
import static org.apache.flink.table.api.Expressions.call;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
public class FunctionTest extends TestBase{
@BeforeClass
@@ -175,5 +176,13 @@ public class FunctionTest extends TestBase{
double result = (double) first(MinTable).getField(0);
assertEquals(-0.5, result,0);
}
+
+ @Test
+ public void testBuildArea() {
+ Table polygonTable = createPolygonTable(1);
+ Table arealGeomTable =
polygonTable.select(call(Functions.ST_BuildArea.class.getSimpleName(),
$(polygonColNames[0])));
+ Geometry result = (Geometry) first(arealGeomTable).getField(0);
+ assertEquals("POLYGON ((-0.5 -0.5, -0.5 0.5, 0.5 0.5, 0.5 -0.5, -0.5
-0.5))", result.toString());
+ }
}
diff --git a/python/tests/sql/test_function.py
b/python/tests/sql/test_function.py
index bfa7477b..9b674999 100644
--- a/python/tests/sql/test_function.py
+++ b/python/tests/sql/test_function.py
@@ -16,20 +16,18 @@
# under the License.
import math
-from typing import List
-
from pyspark.sql import DataFrame
from pyspark.sql.functions import col
from pyspark.sql.functions import explode, expr
from pyspark.sql.types import StructType, StructField, IntegerType
+from sedona.sql.types import GeometryType
from shapely import wkt
from shapely.wkt import loads
-
-from sedona.sql.types import GeometryType
from tests import mixed_wkt_geometry_input_location
from tests.sql.resource.sample_data import create_sample_points,
create_simple_polygons_df, \
create_sample_points_df, create_sample_polygons_df, create_sample_lines_df
from tests.test_base import TestBase
+from typing import List
class TestPredicateJoin(TestBase):
@@ -989,3 +987,33 @@ class TestPredicateJoin(TestBase):
geom_2d = self.spark.sql(
"select
ST_AsText(ST_Force_2D(ST_GeomFromText({})))".format(input_geom))
assert geom_2d.take(1)[0][0] == expected_geom
+
+ def test_st_buildarea(self):
+ tests = {
+ "'MULTILINESTRING((0 0, 10 0, 10 10, 0 10, 0 0),(10 10, 20 10, 20
20, 10 20, 10 10))'":
+ "MULTIPOLYGON (((0 0, 0 10, 10 10, 10 0, 0 0)), ((10 10, 10
20, 20 20, 20 10, 10 10)))",
+ "'MULTILINESTRING((0 0, 10 0, 10 10, 0 10, 0 0),(10 10, 20 10, 20
0, 10 0, 10 10))'":
+ "POLYGON ((0 0, 0 10, 10 10, 20 10, 20 0, 10 0, 0 0))",
+ "'MULTILINESTRING((0 0, 20 0, 20 20, 0 20, 0 0),(2 2, 18 2, 18 18,
2 18, 2 2))'":
+ "POLYGON ((0 0, 0 20, 20 20, 20 0, 0 0), (2 2, 18 2, 18 18, 2
18, 2 2))",
+ "'MULTILINESTRING((0 0, 20 0, 20 20, 0 20, 0 0), (2 2, 18 2, 18
18, 2 18, 2 2), (8 8, 8 12, 12 12, 12 8, 8 8))'":
+ "MULTIPOLYGON (((0 0, 0 20, 20 20, 20 0, 0 0), (2 2, 18 2, 18
18, 2 18, 2 2)), ((8 8, 8 12, 12 12, 12 8, 8 8)))",
+ "'MULTILINESTRING((0 0, 20 0, 20 20, 0 20, 0 0),(2 2, 18 2, 18 18,
2 18, 2 2), " \
+ "(8 8, 8 9, 8 10, 8 11, 8 12, 9 12, 10 12, 11 12, 12 12, 12 11, 12
10, 12 9, 12 8, 11 8, 10 8, 9 8, 8 8))'":
+ "MULTIPOLYGON (((0 0, 0 20, 20 20, 20 0, 0 0), (2 2, 18 2, 18
18, 2 18, 2 2)), " \
+ "((8 8, 8 9, 8 10, 8 11, 8 12, 9 12, 10 12, 11 12, 12 12, 12
11, 12 10, 12 9, 12 8, 11 8, 10 8, 9 8, 8 8)))",
+ "'MULTILINESTRING((0 0, 20 0, 20 20, 0 20, 0 0),(2 2, 18 2, 18 18,
2 18, 2 2),(8 8, 8 12, 12 12, 12 8, 8 8),(10 8, 10 12))'":
+ "MULTIPOLYGON (((0 0, 0 20, 20 20, 20 0, 0 0), (2 2, 18 2, 18
18, 2 18, 2 2)), ((8 8, 8 12, 12 12, 12 8, 8 8)))",
+ "'MULTILINESTRING((0 0, 20 0, 20 20, 0 20, 0 0),(2 2, 18 2, 18 18,
2 18, 2 2),(10 2, 10 18))'":
+ "POLYGON ((0 0, 0 20, 20 20, 20 0, 0 0), (2 2, 18 2, 18 18, 2
18, 2 2))",
+ "'MULTILINESTRING( (0 0, 70 0, 70 70, 0 70, 0 0), (10 10, 10 60,
40 60, 40 10, 10 10), " \
+ "(20 20, 20 30, 30 30, 30 20, 20 20), (20 30, 30 30, 30 50, 20 50,
20 30), (50 20, 60 20, 60 40, 50 40, 50 20), " \
+ "(50 40, 60 40, 60 60, 50 60, 50 40), (80 0, 110 0, 110 70, 80 70,
80 0), (90 60, 100 60, 100 50, 90 50, 90 60))'":
+ "MULTIPOLYGON (((0 0, 0 70, 70 70, 70 0, 0 0), (10 10, 40 10,
40 60, 10 60, 10 10), (50 20, 60 20, 60 40, 60 60, 50 60, 50 40, 50 20)), " \
+ "((20 20, 20 30, 20 50, 30 50, 30 30, 30 20, 20 20)), " \
+ "((80 0, 80 70, 110 70, 110 0, 80 0), (90 50, 100 50, 100 60,
90 60, 90 50)))"
+ }
+
+ for input_geom, expected_geom in tests.items():
+ areal_geom = self.spark.sql("select
ST_AsText(ST_BuildArea(ST_GeomFromText({})))".format(input_geom))
+ assert areal_geom.take(1)[0][0] == expected_geom
\ No newline at end of file
diff --git a/sql/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
b/sql/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index eba06450..5ebd580e 100644
--- a/sql/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/sql/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -113,6 +113,7 @@ object Catalog {
ST_YMin,
ST_XMax,
ST_XMin,
+ ST_BuildArea,
// Expression for rasters
RS_NormalizedDifference,
RS_Mean,
diff --git
a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
index 919e3385..96b9f31f 100644
---
a/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
+++
b/sql/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
@@ -1763,10 +1763,10 @@ case class ST_XMin(inputExpressions: Seq[Expression])
override protected def nullSafeEval(geometry: Geometry): Any = {
- var coord:Array[Coordinate] = geometry.getCoordinates()
+ var coord: Array[Coordinate] = geometry.getCoordinates()
var minval = Double.MaxValue
- for (point<-coord) {
- if(point.getX()<minval){
+ for (point <- coord) {
+ if (point.getX() < minval) {
minval = point.getX()
}
}
@@ -1782,3 +1782,32 @@ case class ST_XMin(inputExpressions: Seq[Expression])
copy(inputExpressions = newChildren)
}
}
+
+
+/**
+ * Returns the areal geometry formed by the constituent linework of the input
geometry assuming all inner geometries represent holes
+ *
+ * @param inputExpressions
+ */
+case class ST_BuildArea(inputExpressions: Seq[Expression])
+ extends Expression with CodegenFallback {
+ assert(inputExpressions.length == 1)
+
+ override def nullable: Boolean = true
+
+ override def eval(input: InternalRow): Any = {
+ val geometry = inputExpressions.head.toGeometry(input)
+ geometry match {
+ case geom: Geometry => new
GenericArrayData(GeometrySerializer.serialize(GeomUtils.buildArea(geom)))
+ case _ => null
+ }
+ }
+
+ override def dataType: DataType = GeometryUDT
+
+ override def children: Seq[Expression] = inputExpressions
+
+ protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]):
Expression = {
+ copy(inputExpressions = newChildren)
+ }
+}
diff --git a/sql/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
b/sql/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
index 2831f77e..a75d70aa 100644
--- a/sql/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
+++ b/sql/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
@@ -1424,6 +1424,39 @@ class functionTestScala extends TestBaseScala with
Matchers with GeometrySample
}
+ it ("Should pass ST_BuildArea") {
+ val geomTestCases = Map(
+ "'MULTILINESTRING((0 0, 10 0, 10 10, 0 10, 0 0),(10 10, 20 10, 20 20, 10
20, 10 10))'"
+ -> "MULTIPOLYGON (((0 0, 0 10, 10 10, 10 0, 0 0)), ((10 10, 10 20, 20
20, 20 10, 10 10)))",
+ "'MULTILINESTRING((0 0, 10 0, 10 10, 0 10, 0 0),(10 10, 20 10, 20 0, 10
0, 10 10))'"
+ -> "POLYGON ((0 0, 0 10, 10 10, 20 10, 20 0, 10 0, 0 0))",
+ "'MULTILINESTRING((0 0, 20 0, 20 20, 0 20, 0 0),(2 2, 18 2, 18 18, 2 18,
2 2))'"
+ -> "POLYGON ((0 0, 0 20, 20 20, 20 0, 0 0), (2 2, 18 2, 18 18, 2 18, 2
2))",
+ "'MULTILINESTRING((0 0, 20 0, 20 20, 0 20, 0 0), (2 2, 18 2, 18 18, 2
18, 2 2), (8 8, 8 12, 12 12, 12 8, 8 8))'"
+ -> "MULTIPOLYGON (((0 0, 0 20, 20 20, 20 0, 0 0), (2 2, 18 2, 18 18, 2
18, 2 2)), ((8 8, 8 12, 12 12, 12 8, 8 8)))",
+ """'MULTILINESTRING((0 0, 20 0, 20 20, 0 20, 0 0),(2 2, 18 2, 18 18, 2
18, 2 2),
+ |(8 8, 8 9, 8 10, 8 11, 8 12, 9 12, 10 12, 11 12, 12 12, 12 11, 12 10,
12 9, 12 8, 11 8, 10 8, 9 8, 8 8))'""".stripMargin.replaceAll("\n", " ")
+ -> """MULTIPOLYGON (((0 0, 0 20, 20 20, 20 0, 0 0), (2 2, 18 2, 18 18,
2 18, 2 2)),
+ |((8 8, 8 9, 8 10, 8 11, 8 12, 9 12, 10 12, 11 12, 12 12, 12 11,
12 10, 12 9, 12 8, 11 8, 10 8, 9 8, 8 8)))""".stripMargin.replaceAll("\n", " "),
+ "'MULTILINESTRING((0 0, 20 0, 20 20, 0 20, 0 0),(2 2, 18 2, 18 18, 2 18,
2 2),(8 8, 8 12, 12 12, 12 8, 8 8),(10 8, 10 12))'"
+ -> "MULTIPOLYGON (((0 0, 0 20, 20 20, 20 0, 0 0), (2 2, 18 2, 18 18, 2
18, 2 2)), ((8 8, 8 12, 12 12, 12 8, 8 8)))",
+ "'MULTILINESTRING((0 0, 20 0, 20 20, 0 20, 0 0),(2 2, 18 2, 18 18, 2 18,
2 2),(10 2, 10 18))'"
+ -> "POLYGON ((0 0, 0 20, 20 20, 20 0, 0 0), (2 2, 18 2, 18 18, 2 18, 2
2))",
+ """'MULTILINESTRING( (0 0, 70 0, 70 70, 0 70, 0 0), (10 10, 10 60, 40
60, 40 10, 10 10),
+ |(20 20, 20 30, 30 30, 30 20, 20 20), (20 30, 30 30, 30 50, 20 50, 20
30), (50 20, 60 20, 60 40, 50 40, 50 20),
+ |(50 40, 60 40, 60 60, 50 60, 50 40), (80 0, 110 0, 110 70, 80 70, 80
0), (90 60, 100 60, 100 50, 90 50, 90 60))'""".stripMargin.replaceAll("\n", " ")
+ -> """MULTIPOLYGON (((0 0, 0 70, 70 70, 70 0, 0 0), (10 10, 40 10, 40
60, 10 60, 10 10), (50 20, 60 20, 60 40, 60 60, 50 60, 50 40, 50 20)),
+ |((20 20, 20 30, 20 50, 30 50, 30 30, 30 20, 20 20)),
+ |((80 0, 80 70, 110 70, 110 0, 80 0), (90 50, 100 50, 100 60, 90 60,
90 50)))""".stripMargin.replaceAll("\n", " ")
+ )
+
+ for ((inputGeom, expectedGeom) <- geomTestCases) {
+ val df = sparkSession.sql(s"select
ST_AsText(ST_BuildArea(ST_GeomFromText($inputGeom)))")
+ val result = df.collect()
+ assert(result.head.get(0).asInstanceOf[String] == expectedGeom)
+ }
+ }
+
it("handles nulls") {
var functionDf: DataFrame = null
functionDf = sparkSession.sql("select ST_Distance(null, null)")
@@ -1538,5 +1571,7 @@ class functionTestScala extends TestBaseScala with
Matchers with GeometrySample
assert(functionDf.first().get(0) == null)
functionDf = sparkSession.sql("select ST_Force_2D(null)")
assert(functionDf.first().get(0) == null)
+ functionDf = sparkSession.sql("select ST_BuildArea(null)")
+ assert(functionDf.first().get(0) == null)
}
-}
+}
\ No newline at end of file