This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 84a3fa07ed5f [SPARK-54162][GEO][SQL] Allow casting from GeographyType 
to GeometryType
84a3fa07ed5f is described below

commit 84a3fa07ed5f7281282287f830ab6deefae5ccb9
Author: Uros Bojanic <[email protected]>
AuthorDate: Tue Nov 4 20:04:02 2025 -0800

    [SPARK-54162][GEO][SQL] Allow casting from GeographyType to GeometryType
    
    ### What changes were proposed in this pull request?
    This PR allows casting `GEOGRAPHY` to `GEOMETRY` if they have the same SRID.
    
    ### Why are the changes needed?
    Enable explicit casting between geospatial types.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, casting `GEOGRAPHY` to `GEOMETRY` is now allowed.
    
    ### How was this patch tested?
    Added new unit tests:
    - `StUtilsSuite`
    - `CastSuiteBase`
    
    Added new e2e SQL tests:
    - `st-functions`
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #52839 from uros-db/geo-cast-geog_geom.
    
    Authored-by: Uros Bojanic <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../apache/spark/sql/catalyst/util/STUtils.java    |  9 ++++++
 .../spark/sql/catalyst/expressions/Cast.scala      | 26 ++++++++++++++++-
 .../sql/catalyst/expressions/CastSuiteBase.scala   | 34 ++++++++++++++++++++++
 .../spark/sql/catalyst/util/StUtilsSuite.java      | 10 +++++++
 .../analyzer-results/nonansi/st-functions.sql.out  | 29 ++++++++++++++++++
 .../analyzer-results/st-functions.sql.out          | 29 ++++++++++++++++++
 .../resources/sql-tests/inputs/st-functions.sql    |  5 ++++
 .../sql-tests/results/nonansi/st-functions.sql.out | 32 ++++++++++++++++++++
 .../sql-tests/results/st-functions.sql.out         | 32 ++++++++++++++++++++
 9 files changed, 205 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
index 9edeee26eb98..9aed051e0639 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/STUtils.java
@@ -48,6 +48,15 @@ public final class STUtils {
     return g.getValue();
   }
 
+  /** Geospatial type casting utility methods. */
+
+  // Cast geography to geometry.
+  public static GeometryVal geographyToGeometry(GeographyVal geographyVal) {
+    // Geographic SRID is always a valid SRID for geometry, so we don't need 
to check it.
+    // Also, all geographic coordinates are valid for geometry, so no need to 
check bounds.
+    return toPhysVal(Geometry.fromBytes(geographyVal.getBytes()));
+  }
+
   /** Geospatial type encoder/decoder utilities. */
 
   public static GeometryVal 
serializeGeomFromWKB(org.apache.spark.sql.types.Geometry geometry,
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 5b76c7d225e1..974cdfe1b012 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -38,7 +38,7 @@ import 
org.apache.spark.sql.catalyst.util.IntervalUtils.{dayTimeIntervalToByte,
 import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
+import org.apache.spark.unsafe.types.{GeographyVal, UTF8String, VariantVal}
 import org.apache.spark.unsafe.types.UTF8String.{IntWrapper, LongWrapper}
 import org.apache.spark.util.ArrayImplicits._
 
@@ -164,6 +164,10 @@ object Cast extends QueryErrorsBase {
 
     case (udt1: UserDefinedType[_], udt2: UserDefinedType[_]) if 
udt2.acceptsType(udt1) => true
 
+    // Casting from GEOGRAPHY to GEOMETRY with the same SRID is allowed.
+    case (geog: GeographyType, geom: GeometryType) if geog.srid == geom.srid =>
+      true
+
     case _ => false
   }
 
@@ -290,6 +294,10 @@ object Cast extends QueryErrorsBase {
 
     case (udt1: UserDefinedType[_], udt2: UserDefinedType[_]) if 
udt2.acceptsType(udt1) => true
 
+    // Casting from GEOGRAPHY to GEOMETRY with the same SRID is allowed.
+    case (geog: GeographyType, geom: GeometryType) if geog.srid == geom.srid =>
+      true
+
     case _ => false
   }
 
@@ -1139,6 +1147,12 @@ case class Cast(
       b => numeric.toFloat(b)
   }
 
+  // GeometryConverter
+  private[this] def castToGeometry(from: DataType): Any => Any = from match {
+    case _: GeographyType =>
+      buildCast[GeographyVal](_, STUtils.geographyToGeometry)
+  }
+
   private[this] def castArray(fromType: DataType, toType: DataType): Any => 
Any = {
     val elementCast = cast(fromType, toType)
     // TODO: Could be faster?
@@ -1218,6 +1232,7 @@ case class Cast(
         case FloatType => castToFloat(from)
         case LongType => castToLong(from)
         case DoubleType => castToDouble(from)
+        case _: GeometryType => castToGeometry(from)
         case array: ArrayType =>
           castArray(from.asInstanceOf[ArrayType].elementType, 
array.elementType)
         case map: MapType => castMap(from.asInstanceOf[MapType], map)
@@ -1326,6 +1341,7 @@ case class Cast(
     case FloatType => castToFloatCode(from, ctx)
     case LongType => castToLongCode(from, ctx)
     case DoubleType => castToDoubleCode(from, ctx)
+    case _: GeometryType => castToGeometryCode(from)
 
     case array: ArrayType =>
       castArrayCode(from.asInstanceOf[ArrayType].elementType, 
array.elementType, ctx)
@@ -2172,6 +2188,14 @@ case class Cast(
     }
   }
 
+  private[this] def castToGeometryCode(from: DataType): CastFunction = {
+    from match {
+      case _: GeographyType =>
+        (c, evPrim, _) =>
+          code"$evPrim = 
org.apache.spark.sql.catalyst.util.STUtils.geographyToGeometry($c);"
+    }
+  }
+
   private[this] def castArrayCode(
       fromType: DataType, toType: DataType, ctx: CodegenContext): CastFunction 
= {
     val elementCast = nullSafeCastFunction(fromType, toType, ctx)
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index 7a87c86b63c0..bf28e2e7eeb7 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -1489,6 +1489,40 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
     }
   }
 
+  // The following tests are confirming the behavior of casting between 
geospatial types.
+
+  test("Casting GeographyType to GeometryType") {
+    // Casting from GEOGRAPHY to GEOMETRY is only allowed if the SRIDs are the 
same.
+
+    // Valid cast test cases.
+    val canAnsiCastTestCases: Seq[(DataType, DataType)] = Seq(
+      (GeographyType(4326), GeometryType(4326)),
+      (GeographyType("ANY"), GeometryType("ANY"))
+    )
+    // Iterate over the test cases and verify casting.
+    canAnsiCastTestCases.foreach { case (fromType, toType) =>
+      // Cast can be performed from `fromType` to `toType`.
+      assert(Cast.canCast(fromType, toType))
+      assert(Cast.canAnsiCast(fromType, toType))
+    }
+
+    // Invalid cast test cases.
+    val cannotAnsiCastTestCases: Seq[(DataType, DataType)] = Seq(
+      (GeographyType(4326), GeometryType(0)),
+      (GeographyType(4326), GeometryType(3857)),
+      (GeographyType(4326), GeometryType("ANY")),
+      (GeographyType("ANY"), GeometryType(0)),
+      (GeographyType("ANY"), GeometryType(3857)),
+      (GeographyType("ANY"), GeometryType(4326))
+    )
+    // Iterate over the test cases and verify casting.
+    cannotAnsiCastTestCases.foreach { case (fromType, toType) =>
+      // Cast cannot be performed from `fromType` to `toType`.
+      assert(!Cast.canCast(fromType, toType))
+      assert(!Cast.canAnsiCast(fromType, toType))
+    }
+  }
+
   test("cast string to time") {
     checkEvaluation(cast(Literal.create("0:0:0"), TimeType()), 0L)
     checkEvaluation(cast(Literal.create(" 01:2:3.01   "), TimeType(2)), 
localTime(1, 2, 3, 10000))
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
index 8ad4d4c36e45..0e18f8bdf919 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StUtilsSuite.java
@@ -62,6 +62,16 @@ class STUtilsSuite {
     System.arraycopy(testWkb, 0, testGeometryBytes, sridLen, wkbLen);
   }
 
+  /** Geospatial type casting utility methods. */
+
+  @Test
+  void testGeographyToGeometry() {
+    GeographyVal geographyVal = GeographyVal.fromBytes(testGeographyBytes);
+    GeometryVal geometryVal = STUtils.geographyToGeometry(geographyVal);
+    assertNotNull(geometryVal);
+    assertArrayEquals(geographyVal.getBytes(), geometryVal.getBytes());
+  }
+
   /** Tests for ST expression utility methods. */
 
   // ST_AsBinary
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
index fe2dda3f1967..dfc0b6b89a84 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
@@ -66,6 +66,35 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT 
hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')
 AS GEOMETRY(4326)))) AS result
+-- !query analysis
+Project 
[hex(st_asbinary(cast(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040)
 as geometry(4326)))) AS result#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOMETRY(ANY)) AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : 
"\"CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS 
GEOMETRY(ANY))\"",
+    "srcType" : "\"GEOGRAPHY(4326)\"",
+    "targetType" : "\"GEOMETRY(ANY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 91,
+    "fragment" : 
"CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOMETRY(ANY))"
+  } ]
+}
+
+
 -- !query
 SELECT 
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) 
AS result
 -- !query analysis
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
index fe2dda3f1967..dfc0b6b89a84 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
@@ -66,6 +66,35 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT 
hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')
 AS GEOMETRY(4326)))) AS result
+-- !query analysis
+Project 
[hex(st_asbinary(cast(st_geogfromwkb(0x0101000000000000000000F03F0000000000000040)
 as geometry(4326)))) AS result#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOMETRY(ANY)) AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : 
"\"CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS 
GEOMETRY(ANY))\"",
+    "srcType" : "\"GEOGRAPHY(4326)\"",
+    "targetType" : "\"GEOMETRY(ANY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 91,
+    "fragment" : 
"CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOMETRY(ANY))"
+  } ]
+}
+
+
 -- !query
 SELECT 
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) 
AS result
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql 
b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
index dc688e4a8994..6785a5b5254b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
@@ -13,6 +13,11 @@ INSERT INTO geodata VALUES
 SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
STRING) AS result;
 SELECT CAST(X'0101000000000000000000f03f0000000000000040' AS GEOMETRY(4326)) 
AS result;
 
+-- Casting GEOGRAPHY to GEOMETRY is allowed only if SRIDs match.
+SELECT 
hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')
 AS GEOMETRY(4326)))) AS result;
+-- Error handling: mismatched SRIDs.
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOMETRY(ANY)) AS result;
+
 ---- ST reader/writer expressions
 
 -- WKB (Well-Known Binary) round-trip tests for GEOGRAPHY and GEOMETRY types.
diff --git 
a/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
index e75d4ba419e2..f1e876600a09 100644
--- a/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
@@ -73,6 +73,38 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT 
hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')
 AS GEOMETRY(4326)))) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+0101000000000000000000F03F0000000000000040
+
+
+-- !query
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOMETRY(ANY)) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : 
"\"CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS 
GEOMETRY(ANY))\"",
+    "srcType" : "\"GEOGRAPHY(4326)\"",
+    "targetType" : "\"GEOMETRY(ANY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 91,
+    "fragment" : 
"CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOMETRY(ANY))"
+  } ]
+}
+
+
 -- !query
 SELECT 
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) 
AS result
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
index e75d4ba419e2..f1e876600a09 100644
--- a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
@@ -73,6 +73,38 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT 
hex(ST_AsBinary(CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')
 AS GEOMETRY(4326)))) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+0101000000000000000000F03F0000000000000040
+
+
+-- !query
+SELECT CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOMETRY(ANY)) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : 
"\"CAST(st_geogfromwkb(X'0101000000000000000000F03F0000000000000040') AS 
GEOMETRY(ANY))\"",
+    "srcType" : "\"GEOGRAPHY(4326)\"",
+    "targetType" : "\"GEOMETRY(ANY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 91,
+    "fragment" : 
"CAST(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040') AS 
GEOMETRY(ANY))"
+  } ]
+}
+
+
 -- !query
 SELECT 
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) 
AS result
 -- !query schema


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to