This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 4d4123009 [SEDONA-606] Add ST_IsValidDetail (#1474)
4d4123009 is described below
commit 4d41230094249a650a027fd550aeefda25e6c6fe
Author: Furqaan Khan <[email protected]>
AuthorDate: Tue Jun 11 12:44:22 2024 -0400
[SEDONA-606] Add ST_IsValidDetail (#1474)
* feat: Add ST_IsValidDetail
* fix: some other snowflake test
* fix: some other snowflake test
* fix: some other snowflake test
* chore: remove toString method
* add: change examples in docs and simplify equals check
* docs: add correct snowflake example.
---
.../java/org/apache/sedona/common/Functions.java | 33 +++++++++++--
.../apache/sedona/common/utils/ValidDetail.java | 42 +++++++++++++++++
.../org/apache/sedona/common/FunctionsTest.java | 39 ++++++++++++++++
docs/api/snowflake/vector-data/Function.md | 35 ++++++++++++++
docs/api/sql/Function.md | 39 ++++++++++++++++
python/sedona/sql/st_functions.py | 17 +++++++
python/tests/sql/test_dataframe_api.py | 3 ++
python/tests/sql/test_function.py | 14 +++++-
.../sedona/snowflake/snowsql/TestConstructors.java | 1 +
.../snowflake/snowsql/TestTableFunctions.java | 19 ++++++++
.../snowflake/snowsql/ddl/UDTFDDLGenerator.java | 1 +
.../snowflake/snowsql/udtfs/ST_IsValidDetail.java | 54 ++++++++++++++++++++++
.../scala/org/apache/sedona/sql/UDF/Catalog.scala | 1 +
.../sql/sedona_sql/expressions/Functions.scala | 52 ++++++++++++++++++++-
.../sql/sedona_sql/expressions/st_functions.scala | 9 +++-
.../apache/sedona/sql/dataFrameAPITestScala.scala | 22 +++++++++
.../org/apache/sedona/sql/functionTestScala.scala | 27 +++++++++++
17 files changed, 400 insertions(+), 8 deletions(-)
diff --git a/common/src/main/java/org/apache/sedona/common/Functions.java
b/common/src/main/java/org/apache/sedona/common/Functions.java
index 3625fabc4..e85e8d330 100644
--- a/common/src/main/java/org/apache/sedona/common/Functions.java
+++ b/common/src/main/java/org/apache/sedona/common/Functions.java
@@ -1828,11 +1828,7 @@ public class Functions {
return GeomUtils.getHausdorffDistance(g1, g2, -1);
}
- public static String isValidReason(Geometry geom) {
- return isValidReason(geom, OGC_SFS_VALIDITY);
- }
-
- public static String isValidReason(Geometry geom, int flag) {
+ private static IsValidOp getIsValidOpObject(Geometry geom, int flag) {
IsValidOp isValidOp = new IsValidOp(geom);
// Set the validity model based on flags
@@ -1841,6 +1837,33 @@ public class Functions {
} else {
isValidOp.setSelfTouchingRingFormingHoleValid(false);
}
+ return isValidOp;
+ }
+
+ public static ValidDetail isValidDetail(Geometry geom) {
+ return isValidDetail(geom, OGC_SFS_VALIDITY);
+ }
+
+ public static ValidDetail isValidDetail(Geometry geom, int flag) {
+ IsValidOp isValidOp = getIsValidOpObject(geom, flag);
+
+ if (isValidOp.isValid()) {
+ return new ValidDetail(true, null, null);
+ } else {
+ TopologyValidationError error = isValidOp.getValidationError();
+ String reason = error.toString();
+ Geometry location =
geom.getFactory().createPoint(error.getCoordinate());
+ return new ValidDetail(false, reason, location);
+ }
+ }
+
+ public static String isValidReason(Geometry geom) {
+ return isValidReason(geom, OGC_SFS_VALIDITY);
+ }
+
+ public static String isValidReason(Geometry geom, int flag) {
+ IsValidOp isValidOp = getIsValidOpObject(geom, flag);
+
if (isValidOp.isValid()) {
return "Valid Geometry";
diff --git
a/common/src/main/java/org/apache/sedona/common/utils/ValidDetail.java
b/common/src/main/java/org/apache/sedona/common/utils/ValidDetail.java
new file mode 100644
index 000000000..5b5578b8d
--- /dev/null
+++ b/common/src/main/java/org/apache/sedona/common/utils/ValidDetail.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.utils;
+
+import org.apache.sedona.common.Functions;
+import org.locationtech.jts.geom.Geometry;
+
+import java.util.Objects;
+
+public class ValidDetail {
+ public final boolean valid;
+ public final String reason;
+ public final Geometry location;
+
+ public ValidDetail(boolean valid, String reason, Geometry location) {
+ this.valid = valid;
+ this.reason = reason;
+ this.location = location;
+ }
+
+ public boolean equals(ValidDetail other) {
+ return this.valid == other.valid &&
+ Objects.equals(this.reason, other.reason) &&
+ Objects.equals(this.location, other.location);
+ }
+}
diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
index cdf1dea02..c5c759e65 100644
--- a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
@@ -20,6 +20,7 @@ import org.apache.sedona.common.sphere.Haversine;
import org.apache.sedona.common.sphere.Spheroid;
import org.apache.sedona.common.utils.GeomUtils;
import org.apache.sedona.common.utils.S2Utils;
+import org.apache.sedona.common.utils.ValidDetail;
import org.geotools.referencing.CRS;
import org.geotools.referencing.operation.projection.ProjectionException;
import org.junit.Test;
@@ -2861,6 +2862,44 @@ public class FunctionsTest extends TestBase {
assertEquals("Polygon geometry type not supported, supported types
are: (Multi)Point and (Multi)LineString.", e.getMessage());
}
+ @Test
+ public void isValidDetail() throws ParseException {
+ // Valid geometry
+ Geometry validGeom = GEOMETRY_FACTORY.createPolygon(coordArray(30, 10,
40, 40, 20, 40, 10, 20, 30, 10));
+ ValidDetail actualValidDetail = Functions.isValidDetail(validGeom);
+ ValidDetail expectedValidDetail = new ValidDetail(true, null, null);
+ assertTrue(expectedValidDetail.equals(actualValidDetail));
+
+ Integer OGC_SFS_VALIDITY = 0;
+ Integer ESRI_VALIDITY = 1;
+
+ actualValidDetail = Functions.isValidDetail(validGeom,
OGC_SFS_VALIDITY);
+ assertTrue(expectedValidDetail.equals(actualValidDetail));
+
+ actualValidDetail = Functions.isValidDetail(validGeom, ESRI_VALIDITY);
+ assertTrue(expectedValidDetail.equals(actualValidDetail));
+
+ // Invalid geometry (self-intersection)
+ Geometry invalidGeom = GEOMETRY_FACTORY.createPolygon(coordArray(30,
10, 40, 40, 20, 40, 30, 10, 10, 20, 30, 10));
+ actualValidDetail = Functions.isValidDetail(invalidGeom);
+ expectedValidDetail = new ValidDetail(false,
+ "Ring Self-intersection at or near point (30.0, 10.0, NaN)",
+ Constructors.geomFromEWKT("POINT (30 10)"));
+ assertTrue(expectedValidDetail.equals(actualValidDetail));
+
+ actualValidDetail = Functions.isValidDetail(invalidGeom,
OGC_SFS_VALIDITY);
+ expectedValidDetail = new ValidDetail(false,
+ "Ring Self-intersection at or near point (30.0, 10.0, NaN)",
+ Constructors.geomFromEWKT("POINT (30 10)"));
+ assertTrue(expectedValidDetail.equals(actualValidDetail));
+
+ actualValidDetail = Functions.isValidDetail(invalidGeom,
ESRI_VALIDITY);
+ expectedValidDetail = new ValidDetail(false,
+ "Self-intersection at or near point (10.0, 20.0, NaN)",
+ Constructors.geomFromEWKT("POINT (10 20)"));
+ assertTrue(expectedValidDetail.equals(actualValidDetail));
+ }
+
@Test
public void isValidReason() {
// Valid geometry
diff --git a/docs/api/snowflake/vector-data/Function.md
b/docs/api/snowflake/vector-data/Function.md
index ba4574d64..5da53224d 100644
--- a/docs/api/snowflake/vector-data/Function.md
+++ b/docs/api/snowflake/vector-data/Function.md
@@ -1486,6 +1486,41 @@ Output:
false
```
+## ST_IsValidDetail
+
+Introduction: Returns a row, containing a boolean `valid` stating if a
geometry is valid, a string `reason` stating why it is invalid and a geometry
`location` pointing out where it is invalid.
+
+This function is a combination of [ST_IsValid](#st_isvalid) and
[ST_IsValidReason](#st_isvalidreason).
+
+The flags parameter is a bitfield with the following options:
+
+- 0: Use usual OGC SFS (Simple Features Specification) validity semantics.
+- 1: "ESRI flag", Accepts certain self-touching rings as valid, which are
considered invalid under OGC standards.
+
+Format:
+
+```sql
+SELECT valid, reason, Sedonm.ST_AsText(location) AS location
+FROM table(Sedona.ST_IsValidDetail(geom: Geometry, flag: Integer))
+```
+
+SQL Example:
+
+```sql
+SELECT valid, reason, Sedonm.ST_AsText(location) AS location
+ FROM table(Sedona.ST_IsValidDetail(Sedona.ST_GeomFromWKT('POLYGON ((30
10, 40 40, 20 40, 30 10, 10 20, 30 10))'), 0))
+```
+
+Output:
+
+```
++-----+---------------------------------------------------------+-------------+
+|valid|reason |location |
++-----+---------------------------------------------------------+-------------+
+|false|Ring Self-intersection at or near point (30.0, 10.0, NaN)|POINT (30 10)|
++-----+---------------------------------------------------------+-------------+
+```
+
## ST_IsValidReason
Introduction: Returns text stating if the geometry is valid. If not, it
provides a reason why it is invalid. The function can be invoked with just the
geometry or with an additional flag. The flag alters the validity checking
behavior. The flags parameter is a bitfield with the following options:
diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md
index 858412a20..591312b8e 100644
--- a/docs/api/sql/Function.md
+++ b/docs/api/sql/Function.md
@@ -2111,6 +2111,45 @@ Output:
false
```
+## ST_IsValidDetail
+
+Introduction: Returns a row, containing a boolean `valid` stating if a
geometry is valid, a string `reason` stating why it is invalid and a geometry
`location` pointing out where it is invalid.
+
+This function is a combination of [ST_IsValid](#st_isvalid) and
[ST_IsValidReason](#st_isvalidreason).
+
+The flags parameter is a bitfield with the following options:
+
+- 0 (default): Use usual OGC SFS (Simple Features Specification) validity
semantics.
+- 1: "ESRI flag", Accepts certain self-touching rings as valid, which are
considered invalid under OGC standards.
+
+Formats:
+
+```sql
+ST_IsValidDetail(geom: Geometry)
+```
+
+```sql
+ST_IsValidDetail(geom: Geometry, flag: Integer)
+```
+
+Since: `v1.6.1`
+
+SQL Example:
+
+```sql
+SELECT ST_IsValidDetail(ST_GeomFromWKT('POLYGON ((30 10, 40 40, 20 40, 30 10,
10 20, 30 10))'))
+```
+
+Output:
+
+```
++-----+---------------------------------------------------------+-------------+
+|valid|reason |location |
++-----+---------------------------------------------------------+-------------+
+|false|Ring Self-intersection at or near point (30.0, 10.0, NaN)|POINT (30 10)|
++-----+---------------------------------------------------------+-------------+
+```
+
## ST_IsValidReason
Introduction: Returns text stating if the geometry is valid. If not, it
provides a reason why it is invalid. The function can be invoked with just the
geometry or with an additional flag. The flag alters the validity checking
behavior. The flags parameter is a bitfield with the following options:
diff --git a/python/sedona/sql/st_functions.py
b/python/sedona/sql/st_functions.py
index 9ab2747d8..305ab8681 100644
--- a/python/sedona/sql/st_functions.py
+++ b/python/sedona/sql/st_functions.py
@@ -779,6 +779,23 @@ def ST_IsValid(geometry: ColumnOrName, flag:
Optional[Union[ColumnOrName, int]]
args = (geometry,) if flag is None else (geometry, flag)
return _call_st_function("ST_IsValid", args)
+@validate_argument_types
+def ST_IsValidDetail(geometry: ColumnOrName, flag:
Optional[Union[ColumnOrName, int]] = None) -> Column:
+ """
+ Return a row of valid, reason and location. valid defines the validity of
geometry, reason defines the
+ reason why it is not valid and location defines the location where it is
not valid
+ If the geometry is valid then it will return null for reason and location
+
+ :param geometry: Geometry column to validate.
+ :type geometry: ColumnOrName
+ :param flag: Optional flag to modify behavior of the validity check.
+ :type flag: Optional[Union[ColumnOrName, int]]
+ :return: Row of valid, reason and location
+ :rtype: Column
+ """
+ args = (geometry,) if flag is None else (geometry, flag)
+ return _call_st_function("ST_IsValidDetail", args)
+
@validate_argument_types
def ST_IsValidReason(geometry: ColumnOrName, flag:
Optional[Union[ColumnOrName, int]] = None) -> Column:
"""
diff --git a/python/tests/sql/test_dataframe_api.py
b/python/tests/sql/test_dataframe_api.py
index 506b698f8..70d491595 100644
--- a/python/tests/sql/test_dataframe_api.py
+++ b/python/tests/sql/test_dataframe_api.py
@@ -152,6 +152,8 @@ test_configurations = [
(stf.ST_IsValid, ("geom",), "triangle_geom", "", True),
(stf.ST_IsValid, ("geom", 1), "triangle_geom", "", True),
(stf.ST_IsValid, ("geom", 0), "triangle_geom", "", True),
+ (stf.ST_IsValidDetail, ("geom",), "triangle_geom", "", Row(valid=True,
reason=None, location=None).asDict()),
+ (stf.ST_IsValidDetail, ("geom", 1), "triangle_geom", "", Row(valid=True,
reason=None, location=None).asDict()),
(stf.ST_Length, ("line",), "linestring_geom", "", 5.0),
(stf.ST_Length2D, ("line",), "linestring_geom", "", 5.0),
(stf.ST_LengthSpheroid, ("point",), "point_geom", "", 0.0),
@@ -354,6 +356,7 @@ wrong_type_configurations = [
(stf.ST_IsPolygonCCW, (None,)),
(stf.ST_IsRing, (None,)),
(stf.ST_IsSimple, (None,)),
+ (stf.ST_IsValidDetail, (None,)),
(stf.ST_IsValid, (None,)),
(stf.ST_IsValidReason, (None,)),
(stf.ST_Length, (None,)),
diff --git a/python/tests/sql/test_function.py
b/python/tests/sql/test_function.py
index 44de51214..fe32bb950 100644
--- a/python/tests/sql/test_function.py
+++ b/python/tests/sql/test_function.py
@@ -16,7 +16,7 @@
# under the License.
import math
-from pyspark.sql import DataFrame
+from pyspark.sql import DataFrame, Row
from pyspark.sql.functions import col
from pyspark.sql.functions import explode, expr
from pyspark.sql.types import StructType, StructField, IntegerType
@@ -310,6 +310,18 @@ class TestPredicateJoin(TestBase):
intersects = self.spark.sql("select ST_Intersection(a,b) from
testtable")
assert intersects.take(1)[0][0].wkt == "POLYGON EMPTY"
+ def test_st_is_valid_detail(self):
+ baseDf = self.spark.sql("SELECT ST_GeomFromText('POLYGON ((0 0, 2 0, 2
2, 0 2, 1 1, 0 0))') AS geom")
+ actual = baseDf.selectExpr("ST_IsValidDetail(geom)").first()[0]
+ expected = Row(valid=True, reason=None, location=None)
+ assert expected == actual
+
+ baseDf = self.spark.sql("SELECT ST_GeomFromText('POLYGON ((0 0, 2 0, 1
1, 2 2, 0 2, 1 1, 0 0))') AS geom")
+ actual = baseDf.selectExpr("ST_IsValidDetail(geom)").first()[0]
+ expected = Row(valid=False, reason="Ring Self-intersection at or near
point (1.0, 1.0, NaN)", location=
+ self.spark.sql("SELECT ST_GeomFromText('POINT (1 1)')").first()[0])
+ assert expected == actual
+
def test_st_is_valid(self):
test_table = self.spark.sql(
"SELECT ST_IsValid(ST_GeomFromWKT('POLYGON((0 0, 10 0, 10 10, 0
10, 0 0), (15 15, 15 20, 20 20, 20 15, 15 15))')) AS a, " +
diff --git
a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java
b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java
index 5a85df2a5..a9e2001b5 100644
---
a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java
+++
b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java
@@ -239,6 +239,7 @@ public class TestConstructors extends TestBase{
"GEOMETRYCOLLECTION (POINT (50 50), LINESTRING (20 30, 40 60,
80 90), POLYGON ((30 10, 40 20, 30 20, 30 10), (35 15, 45 15, 40 25, 35 15)))"
);
registerUDF("ST_GeomCollFromText", String.class, int.class);
+ registerUDF("ST_SRID", byte[].class);
verifySqlSingleRes(
"select
sedona.ST_SRID(sedona.ST_GeomCollFromText('GEOMETRYCOLLECTION (POINT (50 50),
LINESTRING (20 30, 40 60, 80 90), POLYGON ((30 10, 40 20, 30 20, 30 10), (35
15, 45 15, 40 25, 35 15)))',4269))",
4269
diff --git
a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestTableFunctions.java
b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestTableFunctions.java
index a2df1b02f..fc8de74f2 100644
---
a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestTableFunctions.java
+++
b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestTableFunctions.java
@@ -51,6 +51,25 @@ public class TestTableFunctions extends TestBase{
Constructors.geomFromWKT("POLYGON ((0.5 1, 1 1, 1 0.5, 0.5
0.5, 0.5 1))", 0)
);
}
+
+ @Test
+ public void test_ST_IsValidDetail() {
+ registerUDTF(ST_IsValidDetail.class);
+ verifySqlSingleRes(
+ "select reason from
table(sedona.ST_IsValidDetail(sedona.ST_GeomFromText('POLYGON ((30 10, 40 40,
20 40, 30 10, 10 20, 30 10))'), 0))",
+ "Ring Self-intersection at or near point (30.0, 10.0, NaN)"
+ );
+ verifySqlSingleRes(
+ "select valid from
table(sedona.ST_IsValidDetail(sedona.ST_GeomFromText('POLYGON ((30 10, 40 40,
20 40, 30 10, 10 20, 30 10))'), 0))",
+ false
+ );
+ verifySqlSingleRes(
+ "select sedona.ST_AsText(location) from
table(sedona.ST_IsValidDetail(sedona.ST_GeomFromText('POLYGON ((30 10, 40 40,
20 40, 30 10, 10 20, 30 10))'), 0))",
+ "POINT (30 10)"
+ );
+
+ }
+
@Test
public void test_ST_SubDivideExplode() {
registerUDTF(ST_SubDivideExplode.class);
diff --git
a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/ddl/UDTFDDLGenerator.java
b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/ddl/UDTFDDLGenerator.java
index 3424d3661..a599d9d69 100644
---
a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/ddl/UDTFDDLGenerator.java
+++
b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/ddl/UDTFDDLGenerator.java
@@ -28,6 +28,7 @@ public class UDTFDDLGenerator {
ST_MinimumBoundingRadius.class,
ST_Intersection_Aggr.class,
ST_SubDivideExplode.class,
+ ST_IsValidDetail.class,
ST_Envelope_Aggr.class,
ST_Union_Aggr.class,
ST_Collect.class,
diff --git
a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_IsValidDetail.java
b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_IsValidDetail.java
new file mode 100644
index 000000000..ca74aed9b
--- /dev/null
+++
b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_IsValidDetail.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sedona.snowflake.snowsql.udtfs;
+
+import org.apache.sedona.common.Functions;
+import org.apache.sedona.common.utils.ValidDetail;
+import org.apache.sedona.snowflake.snowsql.GeometrySerde;
+import org.apache.sedona.snowflake.snowsql.annotations.UDTFAnnotations;
+import org.locationtech.jts.io.ParseException;
+
+import java.util.stream.Stream;
+
[email protected](name = "ST_IsValidDetail", argNames = {"geom",
"flag"})
+public class ST_IsValidDetail {
+
+ public static class OutputRow {
+ public final boolean valid;
+ public final String reason;
+ public final byte[] location;
+
+ public OutputRow(ValidDetail validDetail) {
+ this.valid = validDetail.valid;
+ this.reason = validDetail.reason;
+ this.location = GeometrySerde.serialize(validDetail.location);
+ }
+ }
+
+ public static Class getOutputClass() {
+ return OutputRow.class;
+ }
+
+ public ST_IsValidDetail() {
+ }
+
+ public Stream<OutputRow> process(byte[] geometry, Integer flag) throws
ParseException {
+ ValidDetail validDetail = Functions.isValidDetail(
+ GeometrySerde.deserialize(geometry),
+ flag
+ );
+
+ return Stream.of(new OutputRow(validDetail));
+ }
+}
diff --git
a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index d0229a146..95c950a9d 100644
--- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -89,6 +89,7 @@ object Catalog {
function[ST_SymDifference](),
function[ST_UnaryUnion](),
function[ST_Union](),
+ function[ST_IsValidDetail](),
function[ST_IsValid](),
function[ST_IsEmpty](),
function[ST_ReducePrecision](),
diff --git
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
index e20e034ca..1db690ace 100644
---
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
+++
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
@@ -20,9 +20,11 @@ package org.apache.spark.sql.sedona_sql.expressions
import org.apache.sedona.common.{Functions, FunctionsGeoTools}
import org.apache.sedona.common.sphere.{Haversine, Spheroid}
+import org.apache.sedona.common.utils.ValidDetail
+import org.apache.sedona.sql.utils.GeometrySerializer
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.catalyst.expressions.{Expression, Generator}
+import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes,
Expression, Generator}
import org.apache.spark.sql.catalyst.util.ArrayData
import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
import org.apache.spark.sql.sedona_sql.expressions.implicits._
@@ -30,6 +32,7 @@ import org.apache.spark.sql.types._
import org.locationtech.jts.algorithm.MinimumBoundingCircle
import org.locationtech.jts.geom._
import
org.apache.spark.sql.sedona_sql.expressions.InferrableFunctionConverter._
+import org.apache.spark.unsafe.types.UTF8String
/**
* Return the distance between two geometries.
@@ -290,6 +293,53 @@ case class ST_MakeValid(inputExpressions: Seq[Expression])
}
}
+case class ST_IsValidDetail(children: Seq[Expression])
+ extends Expression with ExpectsInputTypes with CodegenFallback {
+
+ private val nArgs = children.length
+
+ override def inputTypes: Seq[AbstractDataType] = {
+ if (nArgs == 2) {
+ Seq(GeometryUDT, IntegerType)
+ } else if (nArgs == 1) {
+ Seq(GeometryUDT)
+ } else {
+ throw new IllegalArgumentException(s"Invalid number of arguments:
$nArgs")
+ }
+ }
+
+ override def eval(input: InternalRow): Any = {
+ val geometry = children.head.toGeometry(input)
+ var validDetail: ValidDetail = null
+ if (nArgs == 1) {
+ validDetail = Functions.isValidDetail(geometry)
+ } else if (nArgs == 2) {
+ val flag = children(1).eval(input).asInstanceOf[Int]
+ validDetail = Functions.isValidDetail(geometry, flag)
+ } else {
+ throw new IllegalArgumentException(s"Invalid number of arguments:
$nArgs")
+ }
+
+ if (validDetail.location == null) {
+ return InternalRow.fromSeq(Seq(validDetail.valid, null, null))
+ }
+
+ val serLocation = GeometrySerializer.serialize(validDetail.location)
+ InternalRow.fromSeq(Seq(validDetail.valid,
UTF8String.fromString(validDetail.reason), serLocation))
+ }
+
+ protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]):
Expression = {
+ copy(children = newChildren)
+ }
+
+ override def nullable: Boolean = true
+
+ override def dataType: DataType = new StructType()
+ .add("valid", BooleanType, nullable = false)
+ .add("reason", StringType, nullable = true)
+ .add("location", GeometryUDT, nullable = true)
+}
+
/**
* Test if Geometry is valid.
*
diff --git
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
index 093ca58cd..9d9476a0b 100644
---
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
+++
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
@@ -19,7 +19,7 @@
package org.apache.spark.sql.sedona_sql.expressions
import org.apache.spark.sql.Column
-import org.apache.spark.sql.sedona_sql.expressions.collect.{ST_Collect}
+import org.apache.spark.sql.sedona_sql.expressions.collect.ST_Collect
import org.locationtech.jts.operation.buffer.BufferParameters
object st_functions extends DataFrameAPI {
@@ -204,6 +204,13 @@ object st_functions extends DataFrameAPI {
def ST_IsValidReason(geometry: Column, flag: Column): Column =
wrapExpression[ST_IsValidReason](geometry, flag)
def ST_IsValidReason(geometry: String, flag: Integer): Column =
wrapExpression[ST_IsValidReason](geometry, flag)
+ def ST_IsValidDetail(geometry: Column, flag: Column): Column =
wrapExpression[ST_IsValidDetail](geometry, flag)
+ def ST_IsValidDetail(geometry: String, flag: Integer): Column =
wrapExpression[ST_IsValidDetail](geometry, flag)
+ def ST_IsValidDetail(geometry: String, flag: String): Column =
wrapExpression[ST_IsValidDetail](geometry, flag)
+ def ST_IsValidDetail(geometry: Column): Column =
wrapExpression[ST_IsValidDetail](geometry)
+ def ST_IsValidDetail(geometry: String): Column =
wrapExpression[ST_IsValidDetail](geometry)
+
+
def ST_Length(geometry: Column): Column = wrapExpression[ST_Length](geometry)
def ST_Length(geometry: String): Column = wrapExpression[ST_Length](geometry)
diff --git
a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
index 74ef0b519..e7d0972a3 100644
---
a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
+++
b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
@@ -19,6 +19,7 @@
package org.apache.sedona.sql
import org.apache.commons.codec.binary.Hex
+import org.apache.spark.sql.Row
import org.apache.spark.sql.functions.{array, col, element_at, lit}
import org.apache.spark.sql.sedona_sql.expressions.st_aggregates._
import org.apache.spark.sql.sedona_sql.expressions.st_constructors._
@@ -1768,6 +1769,27 @@ class dataFrameAPITestScala extends TestBaseScala {
assertTrue(actual)
}
+ it("Passed ST_IsValidDetail") {
+ // Valid Geometry
+ var baseDf = sparkSession.sql("SELECT ST_GeomFromText('POLYGON ((0 0, 2
0, 2 2, 0 2, 1 1, 0 0))') AS geom")
+ var actual =
baseDf.select(ST_IsValidDetail($"geom")).first().getAs[Row](0)
+ var expected = Row(true, null, null)
+ assert(expected.equals(actual))
+
+ // Geometry that is invalid under both OGC and ESRI standards, but with
different reasons
+ baseDf = sparkSession.sql("SELECT ST_GeomFromText('POLYGON ((0 0, 2 0, 1
1, 2 2, 0 2, 1 1, 0 0))') AS geom")
+
+ // Test with OGC flag (OGC_SFS_VALIDITY = 0)
+ actual = baseDf.select(ST_IsValidDetail("geom", 0)).first().getAs[Row](0)
+ expected = Row(false, "Ring Self-intersection at or near point (1.0,
1.0, NaN)", sparkSession.sql("SELECT ST_GeomFromText('POINT (1
1)')").first().get(0).asInstanceOf[Geometry])
+ assert(expected.equals(actual))
+
+ // Test with ESRI flag (ESRI_VALIDITY = 1)
+ actual = baseDf.select(ST_IsValidDetail($"geom",
lit(1))).first().getAs[Row](0)
+ expected = Row(false, "Interior is disconnected at or near point (1.0,
1.0, NaN)", sparkSession.sql("SELECT ST_GeomFromText('POINT (1
1)')").first().get(0).asInstanceOf[Geometry])
+ assert(expected.equals(actual))
+ }
+
it("Passed ST_IsValidReason") {
// Valid Geometry
val validPolygonWKT = "POLYGON ((0 0, 2 0, 2 2, 0 2, 1 1, 0 0))"
diff --git
a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
index 6a04a46ad..4576fa716 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
@@ -2670,6 +2670,33 @@ class functionTestScala extends TestBaseScala with
Matchers with GeometrySample
}
+ it("Should pass ST_IsValidDetail") {
+ val testData = Seq(
+ (5330, "POLYGON ((0 0, 3 3, 0 3, 3 0, 0 0))"),
+ (5340, "POLYGON ((100 100, 300 300, 100 300, 300 100, 100 100))"),
+ (5350, "POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0), (20 20, 20 30, 30 30, 30
20, 20 20))"),
+ (5360, "LINESTRING (220227 150406, 2220227 150407, 222020 150410)")
+ )
+
+ var df = sparkSession.createDataFrame(testData).toDF("gid", "wkt")
+ .select($"gid", expr("ST_GeomFromWKT(wkt) as geom"))
+
+ val expectedResults = Map(
+ 5330 -> Row(false, "Self-intersection at or near point (1.5, 1.5, NaN)",
sparkSession.sql("SELECT ST_GeomFromWKT('POINT (1.5
1.5)')").first().get(0).asInstanceOf[Geometry]),
+ 5340 -> Row(false, "Self-intersection at or near point (200.0, 200.0,
NaN)", sparkSession.sql("SELECT ST_GeomFromWKT('POINT (200
200)')").first().get(0).asInstanceOf[Geometry]),
+ 5350 -> Row(false, "Hole lies outside shell at or near point (20.0,
20.0)", sparkSession.sql("SELECT ST_GeomFromWKT('POINT (20
20)')").first().get(0).asInstanceOf[Geometry]),
+ 5360 -> Row(true, null, null)
+ )
+
+ df = df.selectExpr("gid", "ST_IsValidDetail(geom) as validDetail")
+
+ df.collect().foreach{ row =>
+ val gid = row.getAs[Int]("gid")
+ val validDetailRow = row.getAs[Row]("validDetail")
+ assert(expectedResults(gid).equals(validDetailRow))
+ }
+ }
+
it ("ST_IsValidReason should provide reasons for invalid geometries") {
val testData = Seq(
(5330, "POLYGON ((0 0, 3 3, 0 3, 3 0, 0 0))"),