This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new fcab26b98f9e [SPARK-55541][GEO][SQL] Support Geometry and Geography in 
catalyst type converters
fcab26b98f9e is described below

commit fcab26b98f9ecdc5150a9519c67d6623a3d44d31
Author: Uros Bojanic <[email protected]>
AuthorDate: Tue Feb 17 01:19:21 2026 +0800

    [SPARK-55541][GEO][SQL] Support Geometry and Geography in catalyst type 
converters
    
    ### What changes were proposed in this pull request?
    Add `Geometry` and `Geography` handling in `CatalystTypeConverters`, which 
fixes code paths that route through `convertToCatalyst - `Literal.create`, 
`ExpressionEvalHelper.create_row`, and recursive conversion of geospatial 
values nested inside Seq, Array, Map, or Row.
    
    ### Why are the changes needed?
    Previously, `Geometry` and `Geography` fell through to default conversion 
ending up returned as raw client objects instead of the proper internal 
Catalyst representations (`GeometryVal` and `GeographyVal`). Without these 
changes, certain code paths would silently pass through unconverted client-side 
objects, leading to type mismatches/exceptions downstream.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Added unit tests for geospatial type conversion in 
`CatalystTypeConvertersSuite` and `LiteralExpressionSuite`.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    Yes, Claude Opus 4.6.
    
    Closes #54333 from uros-db/geo-catalyst-type-converters.
    
    Authored-by: Uros Bojanic <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../sql/catalyst/CatalystTypeConverters.scala      |   2 +
 .../sql/catalyst/CatalystTypeConvertersSuite.scala | 211 ++++++++++++++++++++-
 .../expressions/LiteralExpressionSuite.scala       |  32 +++-
 3 files changed, 242 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index b800b84e35ce..f8612fa3cbfc 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -646,6 +646,8 @@ object CatalystTypeConverters {
     case seq: Seq[Any] => new 
GenericArrayData(seq.map(convertToCatalyst).toArray)
     case r: Row => InternalRow(r.toSeq.map(convertToCatalyst): _*)
     case arr: Array[Byte] => arr
+    case g: org.apache.spark.sql.types.Geometry => 
STUtils.stGeomFromWKB(g.getBytes, g.getSrid)
+    case g: org.apache.spark.sql.types.Geography => 
STUtils.stGeogFromWKB(g.getBytes)
     case arr: Array[Char] => StringConverter.toCatalyst(arr)
     case arr: Array[_] => new GenericArrayData(arr.map(convertToCatalyst))
     case map: Map[_, _] =>
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index 44796e9fa381..1b46825b3414 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -24,12 +24,12 @@ import org.apache.spark.{SparkFunSuite, 
SparkIllegalArgumentException}
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData
 import org.apache.spark.sql.catalyst.plans.SQLHelper
-import org.apache.spark.sql.catalyst.util.{DateTimeConstants, DateTimeUtils, 
GenericArrayData, IntervalUtils}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, 
DateTimeConstants, DateTimeUtils, GenericArrayData, IntervalUtils, STUtils}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType._
 import org.apache.spark.sql.types.YearMonthIntervalType._
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, UTF8String}
 
 class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
 
@@ -454,4 +454,211 @@ class CatalystTypeConvertersSuite extends SparkFunSuite 
with SQLHelper {
       assert(CatalystTypeConverters.createToScalaConverter(TimeType())(nanos) 
=== localTime)
     }
   }
+
+  // WKB bytes for POINT (17 7), reused across geospatial tests.
+  private val pointWkb: Array[Byte] = 
"010100000000000000000031400000000000001C40"
+    .grouped(2).map(Integer.parseInt(_, 16).toByte).toArray
+
+  test("converting Geometry to GeometryType via convertToCatalyst") {
+    val geom = Geometry.fromWKB(pointWkb, 0)
+    val result = CatalystTypeConverters.convertToCatalyst(geom)
+    assert(result.isInstanceOf[GeometryVal])
+    val resultVal = result.asInstanceOf[GeometryVal]
+    assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
+    assert(STUtils.stSrid(resultVal) === 0)
+  }
+
+  test("converting Geometry with non-default SRID via convertToCatalyst") {
+    val geom = Geometry.fromWKB(pointWkb, 4326)
+    val result = CatalystTypeConverters.convertToCatalyst(geom)
+    assert(result.isInstanceOf[GeometryVal])
+    val resultVal = result.asInstanceOf[GeometryVal]
+    assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
+    assert(STUtils.stSrid(resultVal) === 4326)
+  }
+
+  test("converting Geography to GeographyType via convertToCatalyst") {
+    val geog = Geography.fromWKB(pointWkb, 4326)
+    val result = CatalystTypeConverters.convertToCatalyst(geog)
+    assert(result.isInstanceOf[GeographyVal])
+    val resultVal = result.asInstanceOf[GeographyVal]
+    assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
+    assert(STUtils.stSrid(resultVal) === 4326)
+  }
+
+  test("convertToCatalyst null handling for geospatial types") {
+    assert(CatalystTypeConverters.convertToCatalyst(null: Geometry) === null)
+    assert(CatalystTypeConverters.convertToCatalyst(null: Geography) === null)
+  }
+
+  test("convertToCatalyst with Geometry with invalid SRID") {
+    val geom = Geometry.fromWKB(pointWkb, 1)
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        CatalystTypeConverters.convertToCatalyst(geom)
+      },
+      condition = "ST_INVALID_SRID_VALUE",
+      parameters = Map("srid" -> "1"))
+  }
+
+  test("createToCatalystConverter for GeometryType") {
+    val gt = GeometryType(0)
+    val converter = CatalystTypeConverters.createToCatalystConverter(gt)
+    val geom = Geometry.fromWKB(pointWkb, 0)
+    val result = converter(geom)
+    assert(result.isInstanceOf[GeometryVal])
+    val resultVal = result.asInstanceOf[GeometryVal]
+    assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
+    assert(STUtils.stSrid(resultVal) === 0)
+  }
+
+  test("createToCatalystConverter for GeographyType") {
+    val gt = GeographyType(4326)
+    val converter = CatalystTypeConverters.createToCatalystConverter(gt)
+    val geog = Geography.fromWKB(pointWkb, 4326)
+    val result = converter(geog)
+    assert(result.isInstanceOf[GeographyVal])
+    val resultVal = result.asInstanceOf[GeographyVal]
+    assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
+    assert(STUtils.stSrid(resultVal) === 4326)
+  }
+
+  test("createToScalaConverter for GeometryType") {
+    val gt = GeometryType(0)
+    val geom = Geometry.fromWKB(pointWkb, 0)
+    val catalystVal = STUtils.serializeGeomFromWKB(geom, gt)
+    val result = CatalystTypeConverters.createToScalaConverter(gt)(catalystVal)
+    assert(result.isInstanceOf[Geometry])
+    assert(result === geom)
+  }
+
+  test("createToScalaConverter for GeographyType") {
+    val gt = GeographyType(4326)
+    val geog = Geography.fromWKB(pointWkb, 4326)
+    val catalystVal = STUtils.serializeGeogFromWKB(geog, gt)
+    val result = CatalystTypeConverters.createToScalaConverter(gt)(catalystVal)
+    assert(result.isInstanceOf[Geography])
+    assert(result === geog)
+  }
+
+  test("null handling for geospatial individual values") {
+    
assert(CatalystTypeConverters.createToScalaConverter(GeometryType(0))(null) === 
null)
+    
assert(CatalystTypeConverters.createToScalaConverter(GeographyType(4326))(null) 
=== null)
+  }
+
+  test("converting a wrong value to GeometryType") {
+    val gt = GeometryType(0)
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        CatalystTypeConverters.createToCatalystConverter(gt)("test")
+      },
+      condition = "_LEGACY_ERROR_TEMP_3219",
+      parameters = Map(
+        "other" -> "test",
+        "otherClass" -> "java.lang.String",
+        "dataType" -> "STRING"))
+  }
+
+  test("converting a wrong value to GeographyType") {
+    val gt = GeographyType(4326)
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        CatalystTypeConverters.createToCatalystConverter(gt)("test")
+      },
+      condition = "_LEGACY_ERROR_TEMP_3219",
+      parameters = Map(
+        "other" -> "test",
+        "otherClass" -> "java.lang.String",
+        "dataType" -> "STRING"))
+  }
+
+  test("convertToCatalyst with Geometry nested in Seq") {
+    val geom = Geometry.fromWKB(pointWkb, 0)
+    val result = CatalystTypeConverters.convertToCatalyst(Seq(geom))
+    assert(result.isInstanceOf[GenericArrayData])
+    val array = result.asInstanceOf[GenericArrayData]
+    assert(array.numElements() === 1)
+    val element = array.get(0, GeometryType("ANY"))
+    assert(element.isInstanceOf[GeometryVal])
+    assert(java.util.Arrays.equals(
+      STUtils.stAsBinary(element.asInstanceOf[GeometryVal]), pointWkb))
+  }
+
+  test("convertToCatalyst with Geometry nested in Array") {
+    val geom = Geometry.fromWKB(pointWkb, 0)
+    val result = CatalystTypeConverters.convertToCatalyst(Array(geom))
+    assert(result.isInstanceOf[GenericArrayData])
+    val array = result.asInstanceOf[GenericArrayData]
+    assert(array.numElements() === 1)
+    val element = array.get(0, GeometryType("ANY"))
+    assert(element.isInstanceOf[GeometryVal])
+    assert(java.util.Arrays.equals(
+      STUtils.stAsBinary(element.asInstanceOf[GeometryVal]), pointWkb))
+  }
+
+  test("convertToCatalyst with Geometry nested in Map") {
+    val geom = Geometry.fromWKB(pointWkb, 0)
+    val result = CatalystTypeConverters.convertToCatalyst(Map("key" -> geom))
+    assert(result.isInstanceOf[ArrayBasedMapData])
+    val mapData = result.asInstanceOf[ArrayBasedMapData]
+    val value = mapData.valueArray.get(0, GeometryType("ANY"))
+    assert(value.isInstanceOf[GeometryVal])
+    assert(java.util.Arrays.equals(
+      STUtils.stAsBinary(value.asInstanceOf[GeometryVal]), pointWkb))
+  }
+
+  test("convertToCatalyst with Geometry nested in Row") {
+    val geom = Geometry.fromWKB(pointWkb, 0)
+    val result = CatalystTypeConverters.convertToCatalyst(Row(geom))
+    assert(result.isInstanceOf[InternalRow])
+    val element = result.asInstanceOf[InternalRow].get(0, GeometryType("ANY"))
+    assert(element.isInstanceOf[GeometryVal])
+    assert(java.util.Arrays.equals(
+      STUtils.stAsBinary(element.asInstanceOf[GeometryVal]), pointWkb))
+  }
+
+  test("convertToCatalyst with Geography nested in Seq") {
+    val geog = Geography.fromWKB(pointWkb, 4326)
+    val result = CatalystTypeConverters.convertToCatalyst(Seq(geog))
+    assert(result.isInstanceOf[GenericArrayData])
+    val array = result.asInstanceOf[GenericArrayData]
+    assert(array.numElements() === 1)
+    val element = array.get(0, GeographyType("ANY"))
+    assert(element.isInstanceOf[GeographyVal])
+    assert(java.util.Arrays.equals(
+      STUtils.stAsBinary(element.asInstanceOf[GeographyVal]), pointWkb))
+  }
+
+  test("convertToCatalyst with Geography nested in Array") {
+    val geog = Geography.fromWKB(pointWkb, 4326)
+    val result = CatalystTypeConverters.convertToCatalyst(Array(geog))
+    assert(result.isInstanceOf[GenericArrayData])
+    val array = result.asInstanceOf[GenericArrayData]
+    assert(array.numElements() === 1)
+    val element = array.get(0, GeographyType("ANY"))
+    assert(element.isInstanceOf[GeographyVal])
+    assert(java.util.Arrays.equals(
+      STUtils.stAsBinary(element.asInstanceOf[GeographyVal]), pointWkb))
+  }
+
+  test("convertToCatalyst with Geography nested in Map") {
+    val geog = Geography.fromWKB(pointWkb, 4326)
+    val result = CatalystTypeConverters.convertToCatalyst(Map("key" -> geog))
+    assert(result.isInstanceOf[ArrayBasedMapData])
+    val mapData = result.asInstanceOf[ArrayBasedMapData]
+    val value = mapData.valueArray.get(0, GeographyType("ANY"))
+    assert(value.isInstanceOf[GeographyVal])
+    assert(java.util.Arrays.equals(
+      STUtils.stAsBinary(value.asInstanceOf[GeographyVal]), pointWkb))
+  }
+
+  test("convertToCatalyst with Geography nested in Row") {
+    val geog = Geography.fromWKB(pointWkb, 4326)
+    val result = CatalystTypeConverters.convertToCatalyst(Row(geog))
+    assert(result.isInstanceOf[InternalRow])
+    val element = result.asInstanceOf[InternalRow].get(0, GeographyType("ANY"))
+    assert(element.isInstanceOf[GeographyVal])
+    assert(java.util.Arrays.equals(
+      STUtils.stAsBinary(element.asInstanceOf[GeographyVal]), pointWkb))
+  }
 }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 15c21ad089cb..6b642e874636 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType._
 import org.apache.spark.sql.types.YearMonthIntervalType._
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.{CalendarInterval, GeographyVal, 
GeometryVal, UTF8String}
 
 class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
 
@@ -655,4 +655,34 @@ class LiteralExpressionSuite extends SparkFunSuite with 
ExpressionEvalHelper {
         s"Expression $expr should not be context independent foldable")
     }
   }
+
+  test("Literal.create with null Geography value") {
+    val lit = Literal.create(null, GeographyType(4326))
+    assert(lit.dataType === GeographyType(4326))
+    assert(lit.value === null)
+  }
+
+  test("Literal.create with Geography value") {
+    val pointBytes = "010100000000000000000031400000000000001C40"
+      .grouped(2).map(Integer.parseInt(_, 16).toByte).toArray
+    val geog = Geography.fromWKB(pointBytes, 4326)
+    val lit = Literal.create(geog, GeographyType(4326))
+    assert(lit.dataType === GeographyType(4326))
+    assert(lit.value.isInstanceOf[GeographyVal])
+  }
+
+  test("Literal.create with null Geometry value") {
+    val lit = Literal.create(null, GeometryType(0))
+    assert(lit.dataType === GeometryType(0))
+    assert(lit.value === null)
+  }
+
+  test("Literal.create with Geometry value") {
+    val pointBytes = "010100000000000000000031400000000000001C40"
+      .grouped(2).map(Integer.parseInt(_, 16).toByte).toArray
+    val geom = Geometry.fromWKB(pointBytes, 0)
+    val lit = Literal.create(geom, GeometryType(0))
+    assert(lit.dataType === GeometryType(0))
+    assert(lit.value.isInstanceOf[GeometryVal])
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to