This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new fcab26b98f9e [SPARK-55541][GEO][SQL] Support Geometry and Geography in
catalyst type converters
fcab26b98f9e is described below
commit fcab26b98f9ecdc5150a9519c67d6623a3d44d31
Author: Uros Bojanic <[email protected]>
AuthorDate: Tue Feb 17 01:19:21 2026 +0800
[SPARK-55541][GEO][SQL] Support Geometry and Geography in catalyst type
converters
### What changes were proposed in this pull request?
Add `Geometry` and `Geography` handling in `CatalystTypeConverters`, which
fixes code paths that route through `convertToCatalyst - `Literal.create`,
`ExpressionEvalHelper.create_row`, and recursive conversion of geospatial
values nested inside Seq, Array, Map, or Row.
### Why are the changes needed?
Previously, `Geometry` and `Geography` fell through to default conversion
ending up returned as raw client objects instead of the proper internal
Catalyst representations (`GeometryVal` and `GeographyVal`). Without these
changes, certain code paths would silently pass through unconverted client-side
objects, leading to type mismatches/exceptions downstream.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added unit tests for geospatial type conversion in
`CatalystTypeConvertersSuite` and `LiteralExpressionSuite`.
### Was this patch authored or co-authored using generative AI tooling?
Yes, Claude Opus 4.6.
Closes #54333 from uros-db/geo-catalyst-type-converters.
Authored-by: Uros Bojanic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../sql/catalyst/CatalystTypeConverters.scala | 2 +
.../sql/catalyst/CatalystTypeConvertersSuite.scala | 211 ++++++++++++++++++++-
.../expressions/LiteralExpressionSuite.scala | 32 +++-
3 files changed, 242 insertions(+), 3 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index b800b84e35ce..f8612fa3cbfc 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -646,6 +646,8 @@ object CatalystTypeConverters {
case seq: Seq[Any] => new
GenericArrayData(seq.map(convertToCatalyst).toArray)
case r: Row => InternalRow(r.toSeq.map(convertToCatalyst): _*)
case arr: Array[Byte] => arr
+ case g: org.apache.spark.sql.types.Geometry =>
STUtils.stGeomFromWKB(g.getBytes, g.getSrid)
+ case g: org.apache.spark.sql.types.Geography =>
STUtils.stGeogFromWKB(g.getBytes)
case arr: Array[Char] => StringConverter.toCatalyst(arr)
case arr: Array[_] => new GenericArrayData(arr.map(convertToCatalyst))
case map: Map[_, _] =>
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index 44796e9fa381..1b46825b3414 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -24,12 +24,12 @@ import org.apache.spark.{SparkFunSuite,
SparkIllegalArgumentException}
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData
import org.apache.spark.sql.catalyst.plans.SQLHelper
-import org.apache.spark.sql.catalyst.util.{DateTimeConstants, DateTimeUtils,
GenericArrayData, IntervalUtils}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData,
DateTimeConstants, DateTimeUtils, GenericArrayData, IntervalUtils, STUtils}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.DayTimeIntervalType._
import org.apache.spark.sql.types.YearMonthIntervalType._
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{GeographyVal, GeometryVal, UTF8String}
class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
@@ -454,4 +454,211 @@ class CatalystTypeConvertersSuite extends SparkFunSuite
with SQLHelper {
assert(CatalystTypeConverters.createToScalaConverter(TimeType())(nanos)
=== localTime)
}
}
+
+ // WKB bytes for POINT (17 7), reused across geospatial tests.
+ private val pointWkb: Array[Byte] =
"010100000000000000000031400000000000001C40"
+ .grouped(2).map(Integer.parseInt(_, 16).toByte).toArray
+
+ test("converting Geometry to GeometryType via convertToCatalyst") {
+ val geom = Geometry.fromWKB(pointWkb, 0)
+ val result = CatalystTypeConverters.convertToCatalyst(geom)
+ assert(result.isInstanceOf[GeometryVal])
+ val resultVal = result.asInstanceOf[GeometryVal]
+ assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
+ assert(STUtils.stSrid(resultVal) === 0)
+ }
+
+ test("converting Geometry with non-default SRID via convertToCatalyst") {
+ val geom = Geometry.fromWKB(pointWkb, 4326)
+ val result = CatalystTypeConverters.convertToCatalyst(geom)
+ assert(result.isInstanceOf[GeometryVal])
+ val resultVal = result.asInstanceOf[GeometryVal]
+ assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
+ assert(STUtils.stSrid(resultVal) === 4326)
+ }
+
+ test("converting Geography to GeographyType via convertToCatalyst") {
+ val geog = Geography.fromWKB(pointWkb, 4326)
+ val result = CatalystTypeConverters.convertToCatalyst(geog)
+ assert(result.isInstanceOf[GeographyVal])
+ val resultVal = result.asInstanceOf[GeographyVal]
+ assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
+ assert(STUtils.stSrid(resultVal) === 4326)
+ }
+
+ test("convertToCatalyst null handling for geospatial types") {
+ assert(CatalystTypeConverters.convertToCatalyst(null: Geometry) === null)
+ assert(CatalystTypeConverters.convertToCatalyst(null: Geography) === null)
+ }
+
+ test("convertToCatalyst with Geometry with invalid SRID") {
+ val geom = Geometry.fromWKB(pointWkb, 1)
+ checkError(
+ exception = intercept[SparkIllegalArgumentException] {
+ CatalystTypeConverters.convertToCatalyst(geom)
+ },
+ condition = "ST_INVALID_SRID_VALUE",
+ parameters = Map("srid" -> "1"))
+ }
+
+ test("createToCatalystConverter for GeometryType") {
+ val gt = GeometryType(0)
+ val converter = CatalystTypeConverters.createToCatalystConverter(gt)
+ val geom = Geometry.fromWKB(pointWkb, 0)
+ val result = converter(geom)
+ assert(result.isInstanceOf[GeometryVal])
+ val resultVal = result.asInstanceOf[GeometryVal]
+ assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
+ assert(STUtils.stSrid(resultVal) === 0)
+ }
+
+ test("createToCatalystConverter for GeographyType") {
+ val gt = GeographyType(4326)
+ val converter = CatalystTypeConverters.createToCatalystConverter(gt)
+ val geog = Geography.fromWKB(pointWkb, 4326)
+ val result = converter(geog)
+ assert(result.isInstanceOf[GeographyVal])
+ val resultVal = result.asInstanceOf[GeographyVal]
+ assert(java.util.Arrays.equals(STUtils.stAsBinary(resultVal), pointWkb))
+ assert(STUtils.stSrid(resultVal) === 4326)
+ }
+
+ test("createToScalaConverter for GeometryType") {
+ val gt = GeometryType(0)
+ val geom = Geometry.fromWKB(pointWkb, 0)
+ val catalystVal = STUtils.serializeGeomFromWKB(geom, gt)
+ val result = CatalystTypeConverters.createToScalaConverter(gt)(catalystVal)
+ assert(result.isInstanceOf[Geometry])
+ assert(result === geom)
+ }
+
+ test("createToScalaConverter for GeographyType") {
+ val gt = GeographyType(4326)
+ val geog = Geography.fromWKB(pointWkb, 4326)
+ val catalystVal = STUtils.serializeGeogFromWKB(geog, gt)
+ val result = CatalystTypeConverters.createToScalaConverter(gt)(catalystVal)
+ assert(result.isInstanceOf[Geography])
+ assert(result === geog)
+ }
+
+ test("null handling for geospatial individual values") {
+
assert(CatalystTypeConverters.createToScalaConverter(GeometryType(0))(null) ===
null)
+
assert(CatalystTypeConverters.createToScalaConverter(GeographyType(4326))(null)
=== null)
+ }
+
+ test("converting a wrong value to GeometryType") {
+ val gt = GeometryType(0)
+ checkError(
+ exception = intercept[SparkIllegalArgumentException] {
+ CatalystTypeConverters.createToCatalystConverter(gt)("test")
+ },
+ condition = "_LEGACY_ERROR_TEMP_3219",
+ parameters = Map(
+ "other" -> "test",
+ "otherClass" -> "java.lang.String",
+ "dataType" -> "STRING"))
+ }
+
+ test("converting a wrong value to GeographyType") {
+ val gt = GeographyType(4326)
+ checkError(
+ exception = intercept[SparkIllegalArgumentException] {
+ CatalystTypeConverters.createToCatalystConverter(gt)("test")
+ },
+ condition = "_LEGACY_ERROR_TEMP_3219",
+ parameters = Map(
+ "other" -> "test",
+ "otherClass" -> "java.lang.String",
+ "dataType" -> "STRING"))
+ }
+
+ test("convertToCatalyst with Geometry nested in Seq") {
+ val geom = Geometry.fromWKB(pointWkb, 0)
+ val result = CatalystTypeConverters.convertToCatalyst(Seq(geom))
+ assert(result.isInstanceOf[GenericArrayData])
+ val array = result.asInstanceOf[GenericArrayData]
+ assert(array.numElements() === 1)
+ val element = array.get(0, GeometryType("ANY"))
+ assert(element.isInstanceOf[GeometryVal])
+ assert(java.util.Arrays.equals(
+ STUtils.stAsBinary(element.asInstanceOf[GeometryVal]), pointWkb))
+ }
+
+ test("convertToCatalyst with Geometry nested in Array") {
+ val geom = Geometry.fromWKB(pointWkb, 0)
+ val result = CatalystTypeConverters.convertToCatalyst(Array(geom))
+ assert(result.isInstanceOf[GenericArrayData])
+ val array = result.asInstanceOf[GenericArrayData]
+ assert(array.numElements() === 1)
+ val element = array.get(0, GeometryType("ANY"))
+ assert(element.isInstanceOf[GeometryVal])
+ assert(java.util.Arrays.equals(
+ STUtils.stAsBinary(element.asInstanceOf[GeometryVal]), pointWkb))
+ }
+
+ test("convertToCatalyst with Geometry nested in Map") {
+ val geom = Geometry.fromWKB(pointWkb, 0)
+ val result = CatalystTypeConverters.convertToCatalyst(Map("key" -> geom))
+ assert(result.isInstanceOf[ArrayBasedMapData])
+ val mapData = result.asInstanceOf[ArrayBasedMapData]
+ val value = mapData.valueArray.get(0, GeometryType("ANY"))
+ assert(value.isInstanceOf[GeometryVal])
+ assert(java.util.Arrays.equals(
+ STUtils.stAsBinary(value.asInstanceOf[GeometryVal]), pointWkb))
+ }
+
+ test("convertToCatalyst with Geometry nested in Row") {
+ val geom = Geometry.fromWKB(pointWkb, 0)
+ val result = CatalystTypeConverters.convertToCatalyst(Row(geom))
+ assert(result.isInstanceOf[InternalRow])
+ val element = result.asInstanceOf[InternalRow].get(0, GeometryType("ANY"))
+ assert(element.isInstanceOf[GeometryVal])
+ assert(java.util.Arrays.equals(
+ STUtils.stAsBinary(element.asInstanceOf[GeometryVal]), pointWkb))
+ }
+
+ test("convertToCatalyst with Geography nested in Seq") {
+ val geog = Geography.fromWKB(pointWkb, 4326)
+ val result = CatalystTypeConverters.convertToCatalyst(Seq(geog))
+ assert(result.isInstanceOf[GenericArrayData])
+ val array = result.asInstanceOf[GenericArrayData]
+ assert(array.numElements() === 1)
+ val element = array.get(0, GeographyType("ANY"))
+ assert(element.isInstanceOf[GeographyVal])
+ assert(java.util.Arrays.equals(
+ STUtils.stAsBinary(element.asInstanceOf[GeographyVal]), pointWkb))
+ }
+
+ test("convertToCatalyst with Geography nested in Array") {
+ val geog = Geography.fromWKB(pointWkb, 4326)
+ val result = CatalystTypeConverters.convertToCatalyst(Array(geog))
+ assert(result.isInstanceOf[GenericArrayData])
+ val array = result.asInstanceOf[GenericArrayData]
+ assert(array.numElements() === 1)
+ val element = array.get(0, GeographyType("ANY"))
+ assert(element.isInstanceOf[GeographyVal])
+ assert(java.util.Arrays.equals(
+ STUtils.stAsBinary(element.asInstanceOf[GeographyVal]), pointWkb))
+ }
+
+ test("convertToCatalyst with Geography nested in Map") {
+ val geog = Geography.fromWKB(pointWkb, 4326)
+ val result = CatalystTypeConverters.convertToCatalyst(Map("key" -> geog))
+ assert(result.isInstanceOf[ArrayBasedMapData])
+ val mapData = result.asInstanceOf[ArrayBasedMapData]
+ val value = mapData.valueArray.get(0, GeographyType("ANY"))
+ assert(value.isInstanceOf[GeographyVal])
+ assert(java.util.Arrays.equals(
+ STUtils.stAsBinary(value.asInstanceOf[GeographyVal]), pointWkb))
+ }
+
+ test("convertToCatalyst with Geography nested in Row") {
+ val geog = Geography.fromWKB(pointWkb, 4326)
+ val result = CatalystTypeConverters.convertToCatalyst(Row(geog))
+ assert(result.isInstanceOf[InternalRow])
+ val element = result.asInstanceOf[InternalRow].get(0, GeographyType("ANY"))
+ assert(element.isInstanceOf[GeographyVal])
+ assert(java.util.Arrays.equals(
+ STUtils.stAsBinary(element.asInstanceOf[GeographyVal]), pointWkb))
+ }
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 15c21ad089cb..6b642e874636 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.DayTimeIntervalType._
import org.apache.spark.sql.types.YearMonthIntervalType._
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.{CalendarInterval, GeographyVal,
GeometryVal, UTF8String}
class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -655,4 +655,34 @@ class LiteralExpressionSuite extends SparkFunSuite with
ExpressionEvalHelper {
s"Expression $expr should not be context independent foldable")
}
}
+
+ test("Literal.create with null Geography value") {
+ val lit = Literal.create(null, GeographyType(4326))
+ assert(lit.dataType === GeographyType(4326))
+ assert(lit.value === null)
+ }
+
+ test("Literal.create with Geography value") {
+ val pointBytes = "010100000000000000000031400000000000001C40"
+ .grouped(2).map(Integer.parseInt(_, 16).toByte).toArray
+ val geog = Geography.fromWKB(pointBytes, 4326)
+ val lit = Literal.create(geog, GeographyType(4326))
+ assert(lit.dataType === GeographyType(4326))
+ assert(lit.value.isInstanceOf[GeographyVal])
+ }
+
+ test("Literal.create with null Geometry value") {
+ val lit = Literal.create(null, GeometryType(0))
+ assert(lit.dataType === GeometryType(0))
+ assert(lit.value === null)
+ }
+
+ test("Literal.create with Geometry value") {
+ val pointBytes = "010100000000000000000031400000000000001C40"
+ .grouped(2).map(Integer.parseInt(_, 16).toByte).toArray
+ val geom = Geometry.fromWKB(pointBytes, 0)
+ val lit = Literal.create(geom, GeometryType(0))
+ assert(lit.dataType === GeometryType(0))
+ assert(lit.value.isInstanceOf[GeometryVal])
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]