This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d4123009 [SEDONA-606] Add ST_IsValidDetail (#1474)
4d4123009 is described below

commit 4d41230094249a650a027fd550aeefda25e6c6fe
Author: Furqaan Khan <[email protected]>
AuthorDate: Tue Jun 11 12:44:22 2024 -0400

    [SEDONA-606] Add ST_IsValidDetail (#1474)
    
    * feat: Add ST_IsValidDetail
    
    * fix: some other snowflake test
    
    * fix: some other snowflake test
    
    * fix: some other snowflake test
    
    * chore: remove toString method
    
    * add: change examples in docs and simplify equals check
    
    * docs: add correct snowflake example.
---
 .../java/org/apache/sedona/common/Functions.java   | 33 +++++++++++--
 .../apache/sedona/common/utils/ValidDetail.java    | 42 +++++++++++++++++
 .../org/apache/sedona/common/FunctionsTest.java    | 39 ++++++++++++++++
 docs/api/snowflake/vector-data/Function.md         | 35 ++++++++++++++
 docs/api/sql/Function.md                           | 39 ++++++++++++++++
 python/sedona/sql/st_functions.py                  | 17 +++++++
 python/tests/sql/test_dataframe_api.py             |  3 ++
 python/tests/sql/test_function.py                  | 14 +++++-
 .../sedona/snowflake/snowsql/TestConstructors.java |  1 +
 .../snowflake/snowsql/TestTableFunctions.java      | 19 ++++++++
 .../snowflake/snowsql/ddl/UDTFDDLGenerator.java    |  1 +
 .../snowflake/snowsql/udtfs/ST_IsValidDetail.java  | 54 ++++++++++++++++++++++
 .../scala/org/apache/sedona/sql/UDF/Catalog.scala  |  1 +
 .../sql/sedona_sql/expressions/Functions.scala     | 52 ++++++++++++++++++++-
 .../sql/sedona_sql/expressions/st_functions.scala  |  9 +++-
 .../apache/sedona/sql/dataFrameAPITestScala.scala  | 22 +++++++++
 .../org/apache/sedona/sql/functionTestScala.scala  | 27 +++++++++++
 17 files changed, 400 insertions(+), 8 deletions(-)

diff --git a/common/src/main/java/org/apache/sedona/common/Functions.java 
b/common/src/main/java/org/apache/sedona/common/Functions.java
index 3625fabc4..e85e8d330 100644
--- a/common/src/main/java/org/apache/sedona/common/Functions.java
+++ b/common/src/main/java/org/apache/sedona/common/Functions.java
@@ -1828,11 +1828,7 @@ public class Functions {
         return GeomUtils.getHausdorffDistance(g1, g2, -1);
     }
 
-    public static String isValidReason(Geometry geom) {
-        return isValidReason(geom, OGC_SFS_VALIDITY);
-    }
-
-    public static String isValidReason(Geometry geom, int flag) {
+    private static IsValidOp getIsValidOpObject(Geometry geom, int flag) {
         IsValidOp isValidOp = new IsValidOp(geom);
 
         // Set the validity model based on flags
@@ -1841,6 +1837,33 @@ public class Functions {
         } else {
             isValidOp.setSelfTouchingRingFormingHoleValid(false);
         }
+        return isValidOp;
+    }
+
+    public static ValidDetail isValidDetail(Geometry geom) {
+        return isValidDetail(geom, OGC_SFS_VALIDITY);
+    }
+
+    public static ValidDetail isValidDetail(Geometry geom, int flag) {
+        IsValidOp isValidOp = getIsValidOpObject(geom, flag);
+
+        if (isValidOp.isValid()) {
+            return new ValidDetail(true, null, null);
+        } else {
+            TopologyValidationError error = isValidOp.getValidationError();
+            String reason = error.toString();
+            Geometry location = 
geom.getFactory().createPoint(error.getCoordinate());
+            return new ValidDetail(false, reason, location);
+        }
+    }
+
+    public static String isValidReason(Geometry geom) {
+        return isValidReason(geom, OGC_SFS_VALIDITY);
+    }
+
+    public static String isValidReason(Geometry geom, int flag) {
+        IsValidOp isValidOp = getIsValidOpObject(geom, flag);
+
 
         if (isValidOp.isValid()) {
             return "Valid Geometry";
diff --git 
a/common/src/main/java/org/apache/sedona/common/utils/ValidDetail.java 
b/common/src/main/java/org/apache/sedona/common/utils/ValidDetail.java
new file mode 100644
index 000000000..5b5578b8d
--- /dev/null
+++ b/common/src/main/java/org/apache/sedona/common/utils/ValidDetail.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sedona.common.utils;
+
+import org.apache.sedona.common.Functions;
+import org.locationtech.jts.geom.Geometry;
+
+import java.util.Objects;
+
+public class ValidDetail {
+    public final boolean valid;
+    public final String reason;
+    public final Geometry location;
+
+    public ValidDetail(boolean valid, String reason, Geometry location) {
+        this.valid = valid;
+        this.reason = reason;
+        this.location = location;
+    }
+
+    public boolean equals(ValidDetail other) {
+        return this.valid == other.valid &&
+                Objects.equals(this.reason, other.reason) &&
+                Objects.equals(this.location, other.location);
+    }
+}
diff --git a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java 
b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
index cdf1dea02..c5c759e65 100644
--- a/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
+++ b/common/src/test/java/org/apache/sedona/common/FunctionsTest.java
@@ -20,6 +20,7 @@ import org.apache.sedona.common.sphere.Haversine;
 import org.apache.sedona.common.sphere.Spheroid;
 import org.apache.sedona.common.utils.GeomUtils;
 import org.apache.sedona.common.utils.S2Utils;
+import org.apache.sedona.common.utils.ValidDetail;
 import org.geotools.referencing.CRS;
 import org.geotools.referencing.operation.projection.ProjectionException;
 import org.junit.Test;
@@ -2861,6 +2862,44 @@ public class FunctionsTest extends TestBase {
         assertEquals("Polygon geometry type not supported, supported types 
are: (Multi)Point and (Multi)LineString.", e.getMessage());
     }
 
+    @Test
+    public void isValidDetail() throws ParseException {
+        // Valid geometry
+        Geometry validGeom = GEOMETRY_FACTORY.createPolygon(coordArray(30, 10, 
40, 40, 20, 40, 10, 20, 30, 10));
+        ValidDetail actualValidDetail = Functions.isValidDetail(validGeom);
+        ValidDetail expectedValidDetail = new ValidDetail(true, null, null);
+        assertTrue(expectedValidDetail.equals(actualValidDetail));
+
+        Integer OGC_SFS_VALIDITY = 0;
+        Integer ESRI_VALIDITY = 1;
+
+        actualValidDetail = Functions.isValidDetail(validGeom, 
OGC_SFS_VALIDITY);
+        assertTrue(expectedValidDetail.equals(actualValidDetail));
+
+        actualValidDetail = Functions.isValidDetail(validGeom, ESRI_VALIDITY);
+        assertTrue(expectedValidDetail.equals(actualValidDetail));
+
+        // Invalid geometry (self-intersection)
+        Geometry invalidGeom = GEOMETRY_FACTORY.createPolygon(coordArray(30, 
10, 40, 40, 20, 40, 30, 10, 10, 20, 30, 10));
+        actualValidDetail = Functions.isValidDetail(invalidGeom);
+        expectedValidDetail = new ValidDetail(false,
+                "Ring Self-intersection at or near point (30.0, 10.0, NaN)",
+                Constructors.geomFromEWKT("POINT (30 10)"));
+        assertTrue(expectedValidDetail.equals(actualValidDetail));
+
+        actualValidDetail = Functions.isValidDetail(invalidGeom, 
OGC_SFS_VALIDITY);
+        expectedValidDetail = new ValidDetail(false,
+                "Ring Self-intersection at or near point (30.0, 10.0, NaN)",
+                Constructors.geomFromEWKT("POINT (30 10)"));
+        assertTrue(expectedValidDetail.equals(actualValidDetail));
+
+        actualValidDetail = Functions.isValidDetail(invalidGeom, 
ESRI_VALIDITY);
+        expectedValidDetail = new ValidDetail(false,
+                "Self-intersection at or near point (10.0, 20.0, NaN)",
+                Constructors.geomFromEWKT("POINT (10 20)"));
+        assertTrue(expectedValidDetail.equals(actualValidDetail));
+    }
+
     @Test
     public void isValidReason() {
         // Valid geometry
diff --git a/docs/api/snowflake/vector-data/Function.md 
b/docs/api/snowflake/vector-data/Function.md
index ba4574d64..5da53224d 100644
--- a/docs/api/snowflake/vector-data/Function.md
+++ b/docs/api/snowflake/vector-data/Function.md
@@ -1486,6 +1486,41 @@ Output:
 false
 ```
 
+## ST_IsValidDetail
+
+Introduction: Returns a row, containing a boolean `valid` stating if a 
geometry is valid, a string `reason` stating why it is invalid and a geometry 
`location` pointing out where it is invalid.
+
+This function is a combination of [ST_IsValid](#st_isvalid) and 
[ST_IsValidReason](#st_isvalidreason).
+
+The flags parameter is a bitfield with the following options:
+
+- 0: Use usual OGC SFS (Simple Features Specification) validity semantics.
+- 1: "ESRI flag", Accepts certain self-touching rings as valid, which are 
considered invalid under OGC standards.
+
+Format:
+
+```sql
+SELECT valid, reason, Sedonm.ST_AsText(location) AS location
+FROM table(Sedona.ST_IsValidDetail(geom: Geometry, flag: Integer))
+```
+
+SQL Example:
+
+```sql
+SELECT valid, reason, Sedonm.ST_AsText(location) AS location
+     FROM table(Sedona.ST_IsValidDetail(Sedona.ST_GeomFromWKT('POLYGON ((30 
10, 40 40, 20 40, 30 10, 10 20, 30 10))'), 0))
+```
+
+Output:
+
+```
++-----+---------------------------------------------------------+-------------+
+|valid|reason                                                   |location     |
++-----+---------------------------------------------------------+-------------+
+|false|Ring Self-intersection at or near point (30.0, 10.0, NaN)|POINT (30 10)|
++-----+---------------------------------------------------------+-------------+
+```
+
 ## ST_IsValidReason
 
 Introduction: Returns text stating if the geometry is valid. If not, it 
provides a reason why it is invalid. The function can be invoked with just the 
geometry or with an additional flag. The flag alters the validity checking 
behavior. The flags parameter is a bitfield with the following options:
diff --git a/docs/api/sql/Function.md b/docs/api/sql/Function.md
index 858412a20..591312b8e 100644
--- a/docs/api/sql/Function.md
+++ b/docs/api/sql/Function.md
@@ -2111,6 +2111,45 @@ Output:
 false
 ```
 
+## ST_IsValidDetail
+
+Introduction: Returns a row, containing a boolean `valid` stating if a 
geometry is valid, a string `reason` stating why it is invalid and a geometry 
`location` pointing out where it is invalid.
+
+This function is a combination of [ST_IsValid](#st_isvalid) and 
[ST_IsValidReason](#st_isvalidreason).
+
+The flags parameter is a bitfield with the following options:
+
+- 0 (default): Use usual OGC SFS (Simple Features Specification) validity 
semantics.
+- 1: "ESRI flag", Accepts certain self-touching rings as valid, which are 
considered invalid under OGC standards.
+
+Formats:
+
+```sql
+ST_IsValidDetail(geom: Geometry)
+```
+
+```sql
+ST_IsValidDetail(geom: Geometry, flag: Integer)
+```
+
+Since: `v1.6.1`
+
+SQL Example:
+
+```sql
+SELECT ST_IsValidDetail(ST_GeomFromWKT('POLYGON ((30 10, 40 40, 20 40, 30 10, 
10 20, 30 10))'))
+```
+
+Output:
+
+```
++-----+---------------------------------------------------------+-------------+
+|valid|reason                                                   |location     |
++-----+---------------------------------------------------------+-------------+
+|false|Ring Self-intersection at or near point (30.0, 10.0, NaN)|POINT (30 10)|
++-----+---------------------------------------------------------+-------------+
+```
+
 ## ST_IsValidReason
 
 Introduction: Returns text stating if the geometry is valid. If not, it 
provides a reason why it is invalid. The function can be invoked with just the 
geometry or with an additional flag. The flag alters the validity checking 
behavior. The flags parameter is a bitfield with the following options:
diff --git a/python/sedona/sql/st_functions.py 
b/python/sedona/sql/st_functions.py
index 9ab2747d8..305ab8681 100644
--- a/python/sedona/sql/st_functions.py
+++ b/python/sedona/sql/st_functions.py
@@ -779,6 +779,23 @@ def ST_IsValid(geometry: ColumnOrName, flag: 
Optional[Union[ColumnOrName, int]]
     args = (geometry,) if flag is None else (geometry, flag)
     return _call_st_function("ST_IsValid", args)
 
+@validate_argument_types
+def ST_IsValidDetail(geometry: ColumnOrName, flag: 
Optional[Union[ColumnOrName, int]] = None) -> Column:
+    """
+    Return a row of valid, reason and location. valid defines the validity of 
geometry, reason defines the
+    reason why it is not valid and location defines the location where it is 
not valid
+    If the geometry is valid then it will return null for reason and location
+
+    :param geometry: Geometry column to validate.
+    :type geometry: ColumnOrName
+    :param flag: Optional flag to modify behavior of the validity check.
+    :type flag: Optional[Union[ColumnOrName, int]]
+    :return: Row of valid, reason and location
+    :rtype: Column
+    """
+    args = (geometry,) if flag is None else (geometry, flag)
+    return _call_st_function("ST_IsValidDetail", args)
+
 @validate_argument_types
 def ST_IsValidReason(geometry: ColumnOrName, flag: 
Optional[Union[ColumnOrName, int]] = None) -> Column:
     """
diff --git a/python/tests/sql/test_dataframe_api.py 
b/python/tests/sql/test_dataframe_api.py
index 506b698f8..70d491595 100644
--- a/python/tests/sql/test_dataframe_api.py
+++ b/python/tests/sql/test_dataframe_api.py
@@ -152,6 +152,8 @@ test_configurations = [
     (stf.ST_IsValid, ("geom",), "triangle_geom", "", True),
     (stf.ST_IsValid, ("geom", 1), "triangle_geom", "", True),
     (stf.ST_IsValid, ("geom", 0), "triangle_geom", "", True),
+    (stf.ST_IsValidDetail, ("geom",), "triangle_geom", "", Row(valid=True, 
reason=None, location=None).asDict()),
+    (stf.ST_IsValidDetail, ("geom", 1), "triangle_geom", "", Row(valid=True, 
reason=None, location=None).asDict()),
     (stf.ST_Length, ("line",), "linestring_geom", "", 5.0),
     (stf.ST_Length2D, ("line",), "linestring_geom", "", 5.0),
     (stf.ST_LengthSpheroid, ("point",), "point_geom", "", 0.0),
@@ -354,6 +356,7 @@ wrong_type_configurations = [
     (stf.ST_IsPolygonCCW, (None,)),
     (stf.ST_IsRing, (None,)),
     (stf.ST_IsSimple, (None,)),
+    (stf.ST_IsValidDetail, (None,)),
     (stf.ST_IsValid, (None,)),
     (stf.ST_IsValidReason, (None,)),
     (stf.ST_Length, (None,)),
diff --git a/python/tests/sql/test_function.py 
b/python/tests/sql/test_function.py
index 44de51214..fe32bb950 100644
--- a/python/tests/sql/test_function.py
+++ b/python/tests/sql/test_function.py
@@ -16,7 +16,7 @@
 #  under the License.
 
 import math
-from pyspark.sql import DataFrame
+from pyspark.sql import DataFrame, Row
 from pyspark.sql.functions import col
 from pyspark.sql.functions import explode, expr
 from pyspark.sql.types import StructType, StructField, IntegerType
@@ -310,6 +310,18 @@ class TestPredicateJoin(TestBase):
         intersects = self.spark.sql("select ST_Intersection(a,b) from 
testtable")
         assert intersects.take(1)[0][0].wkt == "POLYGON EMPTY"
 
+    def test_st_is_valid_detail(self):
+        baseDf = self.spark.sql("SELECT ST_GeomFromText('POLYGON ((0 0, 2 0, 2 
2, 0 2, 1 1, 0 0))') AS geom")
+        actual = baseDf.selectExpr("ST_IsValidDetail(geom)").first()[0]
+        expected = Row(valid=True, reason=None, location=None)
+        assert expected == actual
+
+        baseDf = self.spark.sql("SELECT ST_GeomFromText('POLYGON ((0 0, 2 0, 1 
1, 2 2, 0 2, 1 1, 0 0))') AS geom")
+        actual = baseDf.selectExpr("ST_IsValidDetail(geom)").first()[0]
+        expected = Row(valid=False, reason="Ring Self-intersection at or near 
point (1.0, 1.0, NaN)", location=
+        self.spark.sql("SELECT ST_GeomFromText('POINT (1 1)')").first()[0])
+        assert expected == actual
+
     def test_st_is_valid(self):
         test_table = self.spark.sql(
             "SELECT ST_IsValid(ST_GeomFromWKT('POLYGON((0 0, 10 0, 10 10, 0 
10, 0 0), (15 15, 15 20, 20 20, 20 15, 15 15))')) AS a, " +
diff --git 
a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java
 
b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java
index 5a85df2a5..a9e2001b5 100644
--- 
a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java
+++ 
b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestConstructors.java
@@ -239,6 +239,7 @@ public class TestConstructors extends TestBase{
                 "GEOMETRYCOLLECTION (POINT (50 50), LINESTRING (20 30, 40 60, 
80 90), POLYGON ((30 10, 40 20, 30 20, 30 10), (35 15, 45 15, 40 25, 35 15)))"
         );
         registerUDF("ST_GeomCollFromText", String.class, int.class);
+        registerUDF("ST_SRID", byte[].class);
         verifySqlSingleRes(
                 "select 
sedona.ST_SRID(sedona.ST_GeomCollFromText('GEOMETRYCOLLECTION (POINT (50 50), 
LINESTRING (20 30, 40 60, 80 90), POLYGON ((30 10, 40 20, 30 20, 30 10), (35 
15, 45 15, 40 25, 35 15)))',4269))",
                 4269
diff --git 
a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestTableFunctions.java
 
b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestTableFunctions.java
index a2df1b02f..fc8de74f2 100644
--- 
a/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestTableFunctions.java
+++ 
b/snowflake-tester/src/test/java/org/apache/sedona/snowflake/snowsql/TestTableFunctions.java
@@ -51,6 +51,25 @@ public class TestTableFunctions extends TestBase{
                 Constructors.geomFromWKT("POLYGON ((0.5 1, 1 1, 1 0.5, 0.5 
0.5, 0.5 1))", 0)
         );
     }
+
+    @Test
+    public void test_ST_IsValidDetail() {
+        registerUDTF(ST_IsValidDetail.class);
+        verifySqlSingleRes(
+                "select reason from 
table(sedona.ST_IsValidDetail(sedona.ST_GeomFromText('POLYGON ((30 10, 40 40, 
20 40, 30 10, 10 20, 30 10))'), 0))",
+                "Ring Self-intersection at or near point (30.0, 10.0, NaN)"
+        );
+        verifySqlSingleRes(
+                "select valid from 
table(sedona.ST_IsValidDetail(sedona.ST_GeomFromText('POLYGON ((30 10, 40 40, 
20 40, 30 10, 10 20, 30 10))'), 0))",
+                false
+        );
+        verifySqlSingleRes(
+                "select sedona.ST_AsText(location) from 
table(sedona.ST_IsValidDetail(sedona.ST_GeomFromText('POLYGON ((30 10, 40 40, 
20 40, 30 10, 10 20, 30 10))'), 0))",
+                "POINT (30 10)"
+        );
+
+    }
+
     @Test
     public void test_ST_SubDivideExplode() {
         registerUDTF(ST_SubDivideExplode.class);
diff --git 
a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/ddl/UDTFDDLGenerator.java
 
b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/ddl/UDTFDDLGenerator.java
index 3424d3661..a599d9d69 100644
--- 
a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/ddl/UDTFDDLGenerator.java
+++ 
b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/ddl/UDTFDDLGenerator.java
@@ -28,6 +28,7 @@ public class UDTFDDLGenerator {
             ST_MinimumBoundingRadius.class,
             ST_Intersection_Aggr.class,
             ST_SubDivideExplode.class,
+            ST_IsValidDetail.class,
             ST_Envelope_Aggr.class,
             ST_Union_Aggr.class,
             ST_Collect.class,
diff --git 
a/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_IsValidDetail.java
 
b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_IsValidDetail.java
new file mode 100644
index 000000000..ca74aed9b
--- /dev/null
+++ 
b/snowflake/src/main/java/org/apache/sedona/snowflake/snowsql/udtfs/ST_IsValidDetail.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.sedona.snowflake.snowsql.udtfs;
+
+import org.apache.sedona.common.Functions;
+import org.apache.sedona.common.utils.ValidDetail;
+import org.apache.sedona.snowflake.snowsql.GeometrySerde;
+import org.apache.sedona.snowflake.snowsql.annotations.UDTFAnnotations;
+import org.locationtech.jts.io.ParseException;
+
+import java.util.stream.Stream;
+
[email protected](name = "ST_IsValidDetail", argNames = {"geom", 
"flag"})
+public class ST_IsValidDetail {
+
+    public static class OutputRow {
+        public final boolean valid;
+        public final String reason;
+        public final byte[] location;
+
+        public OutputRow(ValidDetail validDetail) {
+            this.valid = validDetail.valid;
+            this.reason = validDetail.reason;
+            this.location = GeometrySerde.serialize(validDetail.location);
+        }
+    }
+
+    public static Class getOutputClass() {
+        return OutputRow.class;
+    }
+
+    public ST_IsValidDetail() {
+    }
+
+    public Stream<OutputRow> process(byte[] geometry, Integer flag) throws 
ParseException {
+        ValidDetail validDetail = Functions.isValidDetail(
+                GeometrySerde.deserialize(geometry),
+                flag
+        );
+
+        return Stream.of(new OutputRow(validDetail));
+    }
+}
diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala 
b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index d0229a146..95c950a9d 100644
--- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -89,6 +89,7 @@ object Catalog {
     function[ST_SymDifference](),
     function[ST_UnaryUnion](),
     function[ST_Union](),
+    function[ST_IsValidDetail](),
     function[ST_IsValid](),
     function[ST_IsEmpty](),
     function[ST_ReducePrecision](),
diff --git 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
index e20e034ca..1db690ace 100644
--- 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
+++ 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/Functions.scala
@@ -20,9 +20,11 @@ package org.apache.spark.sql.sedona_sql.expressions
 
 import org.apache.sedona.common.{Functions, FunctionsGeoTools}
 import org.apache.sedona.common.sphere.{Haversine, Spheroid}
+import org.apache.sedona.common.utils.ValidDetail
+import org.apache.sedona.sql.utils.GeometrySerializer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.catalyst.expressions.{Expression, Generator}
+import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, 
Expression, Generator}
 import org.apache.spark.sql.catalyst.util.ArrayData
 import org.apache.spark.sql.sedona_sql.UDT.GeometryUDT
 import org.apache.spark.sql.sedona_sql.expressions.implicits._
@@ -30,6 +32,7 @@ import org.apache.spark.sql.types._
 import org.locationtech.jts.algorithm.MinimumBoundingCircle
 import org.locationtech.jts.geom._
 import 
org.apache.spark.sql.sedona_sql.expressions.InferrableFunctionConverter._
+import org.apache.spark.unsafe.types.UTF8String
 
 /**
   * Return the distance between two geometries.
@@ -290,6 +293,53 @@ case class ST_MakeValid(inputExpressions: Seq[Expression])
   }
 }
 
+case class ST_IsValidDetail(children: Seq[Expression])
+  extends Expression with ExpectsInputTypes with CodegenFallback {
+
+  private val nArgs = children.length
+
+  override def inputTypes: Seq[AbstractDataType] = {
+    if (nArgs == 2) {
+      Seq(GeometryUDT, IntegerType)
+    } else if (nArgs == 1) {
+      Seq(GeometryUDT)
+    } else {
+      throw new IllegalArgumentException(s"Invalid number of arguments: 
$nArgs")
+    }
+  }
+
+  override def eval(input: InternalRow): Any = {
+    val geometry = children.head.toGeometry(input)
+    var validDetail: ValidDetail = null
+    if (nArgs == 1) {
+      validDetail = Functions.isValidDetail(geometry)
+    } else if (nArgs == 2) {
+      val flag = children(1).eval(input).asInstanceOf[Int]
+      validDetail = Functions.isValidDetail(geometry, flag)
+    } else {
+      throw new IllegalArgumentException(s"Invalid number of arguments: 
$nArgs")
+    }
+
+    if (validDetail.location == null) {
+      return InternalRow.fromSeq(Seq(validDetail.valid, null, null))
+    }
+
+    val serLocation = GeometrySerializer.serialize(validDetail.location)
+    InternalRow.fromSeq(Seq(validDetail.valid, 
UTF8String.fromString(validDetail.reason), serLocation))
+  }
+
+  protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): 
Expression = {
+    copy(children = newChildren)
+  }
+
+  override def nullable: Boolean = true
+
+  override def dataType: DataType = new StructType()
+    .add("valid", BooleanType, nullable = false)
+    .add("reason", StringType, nullable = true)
+    .add("location", GeometryUDT, nullable = true)
+}
+
 /**
   * Test if Geometry is valid.
   *
diff --git 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
index 093ca58cd..9d9476a0b 100644
--- 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
+++ 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_functions.scala
@@ -19,7 +19,7 @@
 package org.apache.spark.sql.sedona_sql.expressions
 
 import org.apache.spark.sql.Column
-import org.apache.spark.sql.sedona_sql.expressions.collect.{ST_Collect}
+import org.apache.spark.sql.sedona_sql.expressions.collect.ST_Collect
 import org.locationtech.jts.operation.buffer.BufferParameters
 
 object st_functions extends DataFrameAPI {
@@ -204,6 +204,13 @@ object st_functions extends DataFrameAPI {
   def ST_IsValidReason(geometry: Column, flag: Column): Column = 
wrapExpression[ST_IsValidReason](geometry, flag)
   def ST_IsValidReason(geometry: String, flag: Integer): Column = 
wrapExpression[ST_IsValidReason](geometry, flag)
 
+  def ST_IsValidDetail(geometry: Column, flag: Column): Column = 
wrapExpression[ST_IsValidDetail](geometry, flag)
+  def ST_IsValidDetail(geometry: String, flag: Integer): Column = 
wrapExpression[ST_IsValidDetail](geometry, flag)
+  def ST_IsValidDetail(geometry: String, flag: String): Column = 
wrapExpression[ST_IsValidDetail](geometry, flag)
+  def ST_IsValidDetail(geometry: Column): Column = 
wrapExpression[ST_IsValidDetail](geometry)
+  def ST_IsValidDetail(geometry: String): Column = 
wrapExpression[ST_IsValidDetail](geometry)
+
+
   def ST_Length(geometry: Column): Column = wrapExpression[ST_Length](geometry)
   def ST_Length(geometry: String): Column = wrapExpression[ST_Length](geometry)
 
diff --git 
a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala 
b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
index 74ef0b519..e7d0972a3 100644
--- 
a/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
+++ 
b/spark/common/src/test/scala/org/apache/sedona/sql/dataFrameAPITestScala.scala
@@ -19,6 +19,7 @@
 package org.apache.sedona.sql
 
 import org.apache.commons.codec.binary.Hex
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.functions.{array, col, element_at, lit}
 import org.apache.spark.sql.sedona_sql.expressions.st_aggregates._
 import org.apache.spark.sql.sedona_sql.expressions.st_constructors._
@@ -1768,6 +1769,27 @@ class dataFrameAPITestScala extends TestBaseScala {
       assertTrue(actual)
     }
 
+    it("Passed ST_IsValidDetail") {
+      // Valid Geometry
+      var baseDf = sparkSession.sql("SELECT ST_GeomFromText('POLYGON ((0 0, 2 
0, 2 2, 0 2, 1 1, 0 0))') AS geom")
+      var actual = 
baseDf.select(ST_IsValidDetail($"geom")).first().getAs[Row](0)
+      var expected = Row(true, null, null)
+      assert(expected.equals(actual))
+
+      // Geometry that is invalid under both OGC and ESRI standards, but with 
different reasons
+      baseDf = sparkSession.sql("SELECT ST_GeomFromText('POLYGON ((0 0, 2 0, 1 
1, 2 2, 0 2, 1 1, 0 0))') AS geom")
+
+      // Test with OGC flag (OGC_SFS_VALIDITY = 0)
+      actual = baseDf.select(ST_IsValidDetail("geom", 0)).first().getAs[Row](0)
+      expected = Row(false, "Ring Self-intersection at or near point (1.0, 
1.0, NaN)", sparkSession.sql("SELECT ST_GeomFromText('POINT (1 
1)')").first().get(0).asInstanceOf[Geometry])
+      assert(expected.equals(actual))
+
+      // Test with ESRI flag (ESRI_VALIDITY = 1)
+      actual = baseDf.select(ST_IsValidDetail($"geom", 
lit(1))).first().getAs[Row](0)
+      expected = Row(false, "Interior is disconnected at or near point (1.0, 
1.0, NaN)", sparkSession.sql("SELECT ST_GeomFromText('POINT (1 
1)')").first().get(0).asInstanceOf[Geometry])
+      assert(expected.equals(actual))
+    }
+
     it("Passed ST_IsValidReason") {
       // Valid Geometry
       val validPolygonWKT = "POLYGON ((0 0, 2 0, 2 2, 0 2, 1 1, 0 0))"
diff --git 
a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala 
b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
index 6a04a46ad..4576fa716 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/functionTestScala.scala
@@ -2670,6 +2670,33 @@ class functionTestScala extends TestBaseScala with 
Matchers with GeometrySample
 
   }
 
+  it("Should pass ST_IsValidDetail") {
+    val testData = Seq(
+      (5330, "POLYGON ((0 0, 3 3, 0 3, 3 0, 0 0))"),
+      (5340, "POLYGON ((100 100, 300 300, 100 300, 300 100, 100 100))"),
+      (5350, "POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0), (20 20, 20 30, 30 30, 30 
20, 20 20))"),
+      (5360, "LINESTRING (220227 150406, 2220227 150407, 222020 150410)")
+    )
+
+    var df = sparkSession.createDataFrame(testData).toDF("gid", "wkt")
+      .select($"gid", expr("ST_GeomFromWKT(wkt) as geom"))
+
+    val expectedResults = Map(
+      5330 -> Row(false, "Self-intersection at or near point (1.5, 1.5, NaN)", 
sparkSession.sql("SELECT ST_GeomFromWKT('POINT (1.5 
1.5)')").first().get(0).asInstanceOf[Geometry]),
+      5340 -> Row(false, "Self-intersection at or near point (200.0, 200.0, 
NaN)", sparkSession.sql("SELECT ST_GeomFromWKT('POINT (200 
200)')").first().get(0).asInstanceOf[Geometry]),
+      5350 -> Row(false, "Hole lies outside shell at or near point (20.0, 
20.0)", sparkSession.sql("SELECT ST_GeomFromWKT('POINT (20 
20)')").first().get(0).asInstanceOf[Geometry]),
+      5360 -> Row(true, null, null)
+    )
+
+    df = df.selectExpr("gid", "ST_IsValidDetail(geom) as validDetail")
+
+    df.collect().foreach{ row =>
+      val gid = row.getAs[Int]("gid")
+      val validDetailRow = row.getAs[Row]("validDetail")
+      assert(expectedResults(gid).equals(validDetailRow))
+    }
+  }
+
   it ("ST_IsValidReason should provide reasons for invalid geometries") {
     val testData = Seq(
       (5330, "POLYGON ((0 0, 3 3, 0 3, 3 0, 0 0))"),

Reply via email to