This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new f2d594de6195 [SPARK-54243][SQL] Introduce type coercion support for 
GEOGRAPHY data types
f2d594de6195 is described below

commit f2d594de6195b31ccad9de233ad5321be1107a1a
Author: Uros Bojanic <[email protected]>
AuthorDate: Fri Nov 7 19:42:48 2025 -0800

    [SPARK-54243][SQL] Introduce type coercion support for GEOGRAPHY data types
    
    ### What changes were proposed in this pull request?
    Implement least common type (LCT) logic for `GEOGRAPHY` types, as follows:
    
    - LCT for GeographyType(`srid_1`) and GeographyType(`srid_1`) is: 
GeographyType(`srid_1`)
    - LCT for GeographyType(`srid_1`) and GeographyType(`srid_2`) is: 
GeographyType(`ANY`)
    - LCT for GeographyType(`srid_1`) and GeographyType(`ANY`) is: 
GeographyType(`ANY`)
    - LCT for GeographyType(`ANY`) and GeographyType(`ANY`) is: 
GeographyType(`ANY`)
    
    In other words, the mixed SRID `GEOGRAPHY` type is the *common type* for 
all GEOGRAPHY types.
    
    ### Why are the changes needed?
    Introducing LCT and type coercion logic in the geospatial data type system.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, type coercion is now supported for `GeographyType`.
    
    ### How was this patch tested?
    Added tests for geography type coercion:
    - `AnsiTypeCoercionSuite`
    - `TypeCoercionSuite`
    
    Added appropriate Scala suite unit tests:
    - `STExpressionsSuite`
    
    Added appropriate end-to-end SQL tests:
    - `st-functions`
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #52944 from uros-db/geo-coercion-geog.
    
    Authored-by: Uros Bojanic <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../sql/catalyst/analysis/AnsiTypeCoercion.scala   |   4 +
 .../spark/sql/catalyst/analysis/TypeCoercion.scala |   4 +
 .../catalyst/analysis/AnsiTypeCoercionSuite.scala  |   7 +
 .../sql/catalyst/analysis/TypeCoercionSuite.scala  |   7 +
 .../analyzer-results/nonansi/st-functions.sql.out  |  72 ++++++++++
 .../analyzer-results/st-functions.sql.out          |  72 ++++++++++
 .../resources/sql-tests/inputs/st-functions.sql    |  22 +++
 .../sql-tests/results/nonansi/st-functions.sql.out |  81 +++++++++++
 .../sql-tests/results/st-functions.sql.out         |  81 +++++++++++
 .../org/apache/spark/sql/STExpressionsSuite.scala  | 148 +++++++++++++++++++++
 10 files changed, 498 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index 13b554eb53d4..ea4d04ff6e77 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -130,6 +130,10 @@ object AnsiTypeCoercion extends TypeCoercionBase {
     case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
       Some(YearMonthIntervalType(t1.startField.min(t2.startField), 
t1.endField.max(t2.endField)))
 
+    // We allow coercion from GEOGRAPHY(<srid>) types (i.e. fixed SRID types) 
to the
+    // GEOGRAPHY(ANY) type (i.e. mixed SRID type). This coercion is always 
safe to do.
+    case (t1: GeographyType, t2: GeographyType) if t1 != t2 => 
Some(GeographyType("ANY"))
+
     case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType)
   }
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 3e5f14810935..9030de9473de 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -98,6 +98,10 @@ object TypeCoercion extends TypeCoercionBase {
       case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
         Some(YearMonthIntervalType(t1.startField.min(t2.startField), 
t1.endField.max(t2.endField)))
 
+      // We allow coercion from GEOGRAPHY(<srid>) types (i.e. fixed SRID 
types) to the
+      // GEOGRAPHY(ANY) type (i.e. mixed SRID type). This coercion is always 
safe to do.
+      case (t1: GeographyType, t2: GeographyType) if t1 != t2 => 
Some(GeographyType("ANY"))
+
       case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType)
   }
 
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
index 42acc38eee2d..0e5ebcfa313c 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
@@ -174,6 +174,13 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
     widenTest(FloatType, FloatType, Some(FloatType))
     widenTest(DoubleType, DoubleType, Some(DoubleType))
 
+    // Geography with same fixed SRIDs.
+    widenTest(GeographyType(4326), GeographyType(4326), 
Some(GeographyType(4326)))
+    // Geography with mixed SRIDs.
+    widenTest(GeographyType("ANY"), GeographyType("ANY"), 
Some(GeographyType("ANY")))
+    widenTest(GeographyType("ANY"), GeographyType(4326), 
Some(GeographyType("ANY")))
+    widenTest(GeographyType(4326), GeographyType("ANY"), 
Some(GeographyType("ANY")))
+
     // Integral mixed with floating point.
     widenTest(IntegerType, FloatType, Some(DoubleType))
     widenTest(IntegerType, DoubleType, Some(DoubleType))
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 250f20fd0957..0169034c3475 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -597,6 +597,13 @@ class TypeCoercionSuite extends TypeCoercionSuiteBase {
     widenTest(FloatType, FloatType, Some(FloatType))
     widenTest(DoubleType, DoubleType, Some(DoubleType))
 
+    // Geography with same fixed SRIDs.
+    widenTest(GeographyType(4326), GeographyType(4326), 
Some(GeographyType(4326)))
+    // Geography with mixed SRIDs.
+    widenTest(GeographyType("ANY"), GeographyType("ANY"), 
Some(GeographyType("ANY")))
+    widenTest(GeographyType("ANY"), GeographyType(4326), 
Some(GeographyType("ANY")))
+    widenTest(GeographyType(4326), GeographyType("ANY"), 
Some(GeographyType("ANY")))
+
     // Integral mixed with floating point.
     widenTest(IntegerType, FloatType, Some(FloatType))
     widenTest(IntegerType, DoubleType, Some(DoubleType))
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
 
b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
index a564c6a32932..71d5a808e686 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
@@ -153,6 +153,78 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) 
FROM geodata
+-- !query analysis
+Project [typeof(array(cast(st_geogfromwkb(wkb#x) as geography(any)), 
cast(st_geogfromwkb(wkb#x) as geography(any)))) AS 
typeof(array(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(map('a', ST_GeogFromWKB(wkb), 'b', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query analysis
+Project [typeof(map(a, cast(st_geogfromwkb(wkb#x) as geography(any)), b, 
cast(st_geogfromwkb(wkb#x) as geography(any)))) AS typeof(map(a, 
st_geogfromwkb(wkb), b, CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(array(named_struct('g1', ST_GeogFromWKB(wkb), 'g2', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), named_struct('g1', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 'g2', ST_GeogFromWKB(wkb)))) FROM geodata
+-- !query analysis
+Project [typeof(array(cast(named_struct(g1, st_geogfromwkb(wkb#x), g2, 
cast(st_geogfromwkb(wkb#x) as geography(any))) as 
struct<g1:geography(any),g2:geography(any)>), cast(named_struct(g1, 
cast(st_geogfromwkb(wkb#x) as geography(any)), g2, st_geogfromwkb(wkb#x)) as 
struct<g1:geography(any),g2:geography(any)>))) AS typeof(array(named_struct(g1, 
st_geogfromwkb(wkb), g2, CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), 
named_struct(g1, CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), g2, st_geogfr 
[...]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(named_struct('a', array(ST_GeogFromWKB(wkb), 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), 'b', map('g', ST_GeogFromWKB(wkb), 'h', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))) FROM geodata
+-- !query analysis
+Project [typeof(named_struct(a, array(cast(st_geogfromwkb(wkb#x) as 
geography(any)), cast(st_geogfromwkb(wkb#x) as geography(any))), b, map(g, 
cast(st_geogfromwkb(wkb#x) as geography(any)), h, cast(st_geogfromwkb(wkb#x) as 
geography(any))))) AS typeof(named_struct(a, array(st_geogfromwkb(wkb), 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), b, map(g, st_geogfromwkb(wkb), h, 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(nvl(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) 
FROM geodata
+-- !query analysis
+Project [typeof(nvl(st_geogfromwkb(wkb#x), cast(st_geogfromwkb(wkb#x) as 
geography(any)))) AS typeof(nvl(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) 
AS GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(nvl2(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query analysis
+Project [typeof(nvl2(st_geogfromwkb(wkb#x), cast(st_geogfromwkb(wkb#x) as 
geography(any)), st_geogfromwkb(wkb#x))) AS typeof(nvl2(st_geogfromwkb(wkb), 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), st_geogfromwkb(wkb)))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(CASE WHEN wkb IS NOT NULL THEN 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY) ELSE ST_GeogFromWKB(wkb) END) FROM geodata
+-- !query analysis
+Project [typeof(CASE WHEN isnotnull(wkb#x) THEN cast(st_geogfromwkb(wkb#x) as 
geography(any)) ELSE cast(st_geogfromwkb(wkb#x) as geography(any)) END) AS 
typeof(CASE WHEN (wkb IS NOT NULL) THEN CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)) ELSE st_geogfromwkb(wkb) END)#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(coalesce(ST_GeogFromWKB(wkb), 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query analysis
+Project [typeof(coalesce(cast(st_geogfromwkb(wkb#x) as geography(any)), 
cast(st_geogfromwkb(wkb#x) as geography(any)))) AS 
typeof(coalesce(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(IF(wkb IS NOT NULL, ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query analysis
+Project [typeof(if (isnotnull(wkb#x)) cast(st_geogfromwkb(wkb#x) as 
geography(any)) else cast(st_geogfromwkb(wkb#x) as geography(any))) AS 
typeof((IF((wkb IS NOT NULL), CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), 
st_geogfromwkb(wkb))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
 -- !query
 SELECT 
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) 
AS result
 -- !query analysis
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
index a564c6a32932..71d5a808e686 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
@@ -153,6 +153,78 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) 
FROM geodata
+-- !query analysis
+Project [typeof(array(cast(st_geogfromwkb(wkb#x) as geography(any)), 
cast(st_geogfromwkb(wkb#x) as geography(any)))) AS 
typeof(array(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(map('a', ST_GeogFromWKB(wkb), 'b', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query analysis
+Project [typeof(map(a, cast(st_geogfromwkb(wkb#x) as geography(any)), b, 
cast(st_geogfromwkb(wkb#x) as geography(any)))) AS typeof(map(a, 
st_geogfromwkb(wkb), b, CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(array(named_struct('g1', ST_GeogFromWKB(wkb), 'g2', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), named_struct('g1', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 'g2', ST_GeogFromWKB(wkb)))) FROM geodata
+-- !query analysis
+Project [typeof(array(cast(named_struct(g1, st_geogfromwkb(wkb#x), g2, 
cast(st_geogfromwkb(wkb#x) as geography(any))) as 
struct<g1:geography(any),g2:geography(any)>), cast(named_struct(g1, 
cast(st_geogfromwkb(wkb#x) as geography(any)), g2, st_geogfromwkb(wkb#x)) as 
struct<g1:geography(any),g2:geography(any)>))) AS typeof(array(named_struct(g1, 
st_geogfromwkb(wkb), g2, CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), 
named_struct(g1, CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), g2, st_geogfr 
[...]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(named_struct('a', array(ST_GeogFromWKB(wkb), 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), 'b', map('g', ST_GeogFromWKB(wkb), 'h', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))) FROM geodata
+-- !query analysis
+Project [typeof(named_struct(a, array(cast(st_geogfromwkb(wkb#x) as 
geography(any)), cast(st_geogfromwkb(wkb#x) as geography(any))), b, map(g, 
cast(st_geogfromwkb(wkb#x) as geography(any)), h, cast(st_geogfromwkb(wkb#x) as 
geography(any))))) AS typeof(named_struct(a, array(st_geogfromwkb(wkb), 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), b, map(g, st_geogfromwkb(wkb), h, 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(nvl(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) 
FROM geodata
+-- !query analysis
+Project [typeof(nvl(st_geogfromwkb(wkb#x), cast(st_geogfromwkb(wkb#x) as 
geography(any)))) AS typeof(nvl(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) 
AS GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(nvl2(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query analysis
+Project [typeof(nvl2(st_geogfromwkb(wkb#x), cast(st_geogfromwkb(wkb#x) as 
geography(any)), st_geogfromwkb(wkb#x))) AS typeof(nvl2(st_geogfromwkb(wkb), 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), st_geogfromwkb(wkb)))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(CASE WHEN wkb IS NOT NULL THEN 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY) ELSE ST_GeogFromWKB(wkb) END) FROM geodata
+-- !query analysis
+Project [typeof(CASE WHEN isnotnull(wkb#x) THEN cast(st_geogfromwkb(wkb#x) as 
geography(any)) ELSE cast(st_geogfromwkb(wkb#x) as geography(any)) END) AS 
typeof(CASE WHEN (wkb IS NOT NULL) THEN CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)) ELSE st_geogfromwkb(wkb) END)#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(coalesce(ST_GeogFromWKB(wkb), 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query analysis
+Project [typeof(coalesce(cast(st_geogfromwkb(wkb#x) as geography(any)), 
cast(st_geogfromwkb(wkb#x) as geography(any)))) AS 
typeof(coalesce(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(IF(wkb IS NOT NULL, ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query analysis
+Project [typeof(if (isnotnull(wkb#x)) cast(st_geogfromwkb(wkb#x) as 
geography(any)) else cast(st_geogfromwkb(wkb#x) as geography(any))) AS 
typeof((IF((wkb IS NOT NULL), CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), 
st_geogfromwkb(wkb))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+   +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
 -- !query
 SELECT 
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) 
AS result
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql 
b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
index ceda71398305..e1c11bb089b5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
@@ -28,6 +28,28 @@ SELECT 
hex(ST_AsBinary(CAST(ST_GeomFromWKB(X'0101000000000000000000f03f000000000
 -- Casting GEOMETRY(ANY) to GEOMETRY(<srid>) is not allowed.
 SELECT 
CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOMETRY(ANY)
 AS GEOMETRY(4326)) AS result;
 
+---- Geospatial type coercion
+
+-- Array
+SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) 
FROM geodata;
+-- Map
+SELECT typeof(map('a', ST_GeogFromWKB(wkb), 'b', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata;
+-- Struct
+SELECT typeof(array(named_struct('g1', ST_GeogFromWKB(wkb), 'g2', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), named_struct('g1', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 'g2', ST_GeogFromWKB(wkb)))) FROM geodata;
+-- Nested
+SELECT typeof(named_struct('a', array(ST_GeogFromWKB(wkb), 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), 'b', map('g', ST_GeogFromWKB(wkb), 'h', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))) FROM geodata;
+
+-- NVL
+SELECT typeof(nvl(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) 
FROM geodata;
+-- NVL2
+SELECT typeof(nvl2(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 
ST_GeogFromWKB(wkb))) FROM geodata;
+-- CASE WHEN
+SELECT typeof(CASE WHEN wkb IS NOT NULL THEN 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY) ELSE ST_GeogFromWKB(wkb) END) FROM geodata;
+-- COALESCE
+SELECT typeof(coalesce(ST_GeogFromWKB(wkb), 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata;
+-- IF
+SELECT typeof(IF(wkb IS NOT NULL, ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 
ST_GeogFromWKB(wkb))) FROM geodata;
+
 ---- ST reader/writer expressions
 
 -- WKB (Well-Known Binary) round-trip tests for GEOGRAPHY and GEOMETRY types.
diff --git 
a/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
index c01534c64e7c..71a7d6eaa5dc 100644
--- a/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
@@ -169,6 +169,87 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) 
FROM geodata
+-- !query schema
+struct<typeof(array(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)))):string>
+-- !query output
+array<geography(any)>
+array<geography(any)>
+
+
+-- !query
+SELECT typeof(map('a', ST_GeogFromWKB(wkb), 'b', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query schema
+struct<typeof(map(a, st_geogfromwkb(wkb), b, CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)))):string>
+-- !query output
+map<string,geography(any)>
+map<string,geography(any)>
+
+
+-- !query
+SELECT typeof(array(named_struct('g1', ST_GeogFromWKB(wkb), 'g2', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), named_struct('g1', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 'g2', ST_GeogFromWKB(wkb)))) FROM geodata
+-- !query schema
+struct<typeof(array(named_struct(g1, st_geogfromwkb(wkb), g2, 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), named_struct(g1, 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), g2, st_geogfromwkb(wkb)))):string>
+-- !query output
+array<struct<g1:geography(any),g2:geography(any)>>
+array<struct<g1:geography(any),g2:geography(any)>>
+
+
+-- !query
+SELECT typeof(named_struct('a', array(ST_GeogFromWKB(wkb), 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), 'b', map('g', ST_GeogFromWKB(wkb), 'h', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))) FROM geodata
+-- !query schema
+struct<typeof(named_struct(a, array(st_geogfromwkb(wkb), 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), b, map(g, st_geogfromwkb(wkb), h, 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))))):string>
+-- !query output
+struct<a:array<geography(any)>,b:map<string,geography(any)>>
+struct<a:array<geography(any)>,b:map<string,geography(any)>>
+
+
+-- !query
+SELECT typeof(nvl(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) 
FROM geodata
+-- !query schema
+struct<typeof(nvl(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(nvl2(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query schema
+struct<typeof(nvl2(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)), st_geogfromwkb(wkb))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(CASE WHEN wkb IS NOT NULL THEN 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY) ELSE ST_GeogFromWKB(wkb) END) FROM geodata
+-- !query schema
+struct<typeof(CASE WHEN (wkb IS NOT NULL) THEN CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)) ELSE st_geogfromwkb(wkb) END):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(coalesce(ST_GeogFromWKB(wkb), 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query schema
+struct<typeof(coalesce(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(IF(wkb IS NOT NULL, ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query schema
+struct<typeof((IF((wkb IS NOT NULL), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)), st_geogfromwkb(wkb)))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
 -- !query
 SELECT 
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) 
AS result
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
index c01534c64e7c..71a7d6eaa5dc 100644
--- a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
@@ -169,6 +169,87 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) 
FROM geodata
+-- !query schema
+struct<typeof(array(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)))):string>
+-- !query output
+array<geography(any)>
+array<geography(any)>
+
+
+-- !query
+SELECT typeof(map('a', ST_GeogFromWKB(wkb), 'b', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query schema
+struct<typeof(map(a, st_geogfromwkb(wkb), b, CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)))):string>
+-- !query output
+map<string,geography(any)>
+map<string,geography(any)>
+
+
+-- !query
+SELECT typeof(array(named_struct('g1', ST_GeogFromWKB(wkb), 'g2', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), named_struct('g1', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 'g2', ST_GeogFromWKB(wkb)))) FROM geodata
+-- !query schema
+struct<typeof(array(named_struct(g1, st_geogfromwkb(wkb), g2, 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), named_struct(g1, 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), g2, st_geogfromwkb(wkb)))):string>
+-- !query output
+array<struct<g1:geography(any),g2:geography(any)>>
+array<struct<g1:geography(any),g2:geography(any)>>
+
+
+-- !query
+SELECT typeof(named_struct('a', array(ST_GeogFromWKB(wkb), 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), 'b', map('g', ST_GeogFromWKB(wkb), 'h', 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))) FROM geodata
+-- !query schema
+struct<typeof(named_struct(a, array(st_geogfromwkb(wkb), 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), b, map(g, st_geogfromwkb(wkb), h, 
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))))):string>
+-- !query output
+struct<a:array<geography(any)>,b:map<string,geography(any)>>
+struct<a:array<geography(any)>,b:map<string,geography(any)>>
+
+
+-- !query
+SELECT typeof(nvl(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) 
FROM geodata
+-- !query schema
+struct<typeof(nvl(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(nvl2(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query schema
+struct<typeof(nvl2(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)), st_geogfromwkb(wkb))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(CASE WHEN wkb IS NOT NULL THEN 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY) ELSE ST_GeogFromWKB(wkb) END) FROM geodata
+-- !query schema
+struct<typeof(CASE WHEN (wkb IS NOT NULL) THEN CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)) ELSE st_geogfromwkb(wkb) END):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(coalesce(ST_GeogFromWKB(wkb), 
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query schema
+struct<typeof(coalesce(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(IF(wkb IS NOT NULL, ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query schema
+struct<typeof((IF((wkb IS NOT NULL), CAST(st_geogfromwkb(wkb) AS 
GEOGRAPHY(ANY)), st_geogfromwkb(wkb)))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
 -- !query
 SELECT 
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040'))) 
AS result
 -- !query schema
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
index 79e7ecbdf4c3..6dd8f4347105 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
@@ -123,6 +123,154 @@ class STExpressionsSuite
     }
   }
 
+  /** Geospatial type coercion. */
+
+  test("CreateArray with GEOGRAPHY literals") {
+    // Test data: WKB representation of POINT(1 2).
+    val wkbString = "0101000000000000000000F03F0000000000000040"
+    // Test with literals, using geographies with different SRID values.
+    val geog1 = s"ST_GeogFromWKB(X'$wkbString')" // Literal with fixed SRID 
(4326).
+    val geographyType1 = GeographyType(4326)
+    val geog2 = s"$geog1::GEOGRAPHY(ANY)" // Literal with mixed SRID (ANY).
+    val geographyType2 = GeographyType("ANY")
+    val geo = "hex(ST_AsBinary(g)), ST_Srid(g)"
+    val row = Row(wkbString, 4326)
+
+    val testCases = Seq(
+      (s"array($geog1)", geographyType1, Seq(row)),
+      (s"array($geog2)", geographyType2, Seq(row)),
+      (s"array($geog1, $geog1)", geographyType1, Seq(row, row)),
+      (s"array($geog2, $geog2)", geographyType2, Seq(row, row)),
+      (s"array($geog1, $geog2)", mixedSridGeographyType, Seq(row, row)),
+      (s"array($geog2, $geog1)", mixedSridGeographyType, Seq(row, row))
+    )
+
+    for ((expr, expectedType, expectedRows) <- testCases) {
+      assertType(
+        s"SELECT $expr",
+        ArrayType(expectedType)
+      )
+      checkAnswer(
+        sql(s"WITH t AS (SELECT explode($expr) AS g) SELECT $geo FROM t"),
+        expectedRows
+      )
+    }
+  }
+
+  test("CreateArray with GEOGRAPHY columns") {
+    // Test data: WKB representation of POINT(1 2).
+    val wkbString = "0101000000000000000000F03F0000000000000040"
+    // Test with columns, using geographies with different SRID values.
+    val geog1 = "ST_GeogFromWKB(wkb)" // Column with fixed SRID (4326).
+    val geographyType1 = GeographyType(4326)
+    val geog2 = s"$geog1::GEOGRAPHY(ANY)" // Column with mixed SRID (ANY).
+    val geographyType2 = GeographyType("ANY")
+    val geo = "hex(ST_AsBinary(g)), ST_Srid(g)"
+    val row = Row(wkbString, 4326)
+
+    val testCases = Seq(
+      (s"array($geog1)", geographyType1, Seq(row)),
+      (s"array($geog2)", geographyType2, Seq(row)),
+      (s"array($geog1, $geog1)", geographyType1, Seq(row, row)),
+      (s"array($geog2, $geog2)", geographyType2, Seq(row, row)),
+      (s"array($geog1, $geog2)", mixedSridGeographyType, Seq(row, row)),
+      (s"array($geog2, $geog1)", mixedSridGeographyType, Seq(row, row))
+    )
+
+    // Test with literal and column, using geographies with different SRID 
values.
+    withTable("tbl") {
+      // Construct and populate the test table.
+      sql("CREATE TABLE tbl (wkb BINARY)")
+      sql(s"INSERT INTO tbl VALUES (X'$wkbString')")
+
+      for ((query, expectedType, expectedRows) <- testCases) {
+        assertType(
+          s"SELECT $query FROM tbl",
+          ArrayType(expectedType)
+        )
+        checkAnswer(
+          sql(s"WITH t AS (SELECT explode($query) AS g FROM tbl) SELECT $geo 
FROM t"),
+          expectedRows
+        )
+      }
+    }
+  }
+
+  test("NVL with GEOGRAPHY literals") {
+    // Test data: WKB representation of POINT(1 2).
+    val wkbString = "0101000000000000000000F03F0000000000000040"
+    // Test with literals, using geographies with different SRID values.
+    val geog1 = s"ST_GeogFromWKB(X'$wkbString')" // Literal with fixed SRID 
(4326).
+    val geographyType1 = GeographyType(4326)
+    val geog2 = s"$geog1::GEOGRAPHY(ANY)" // Literal with mixed SRID (ANY).
+    val geographyType2 = GeographyType("ANY")
+    val geo = "hex(ST_AsBinary(g)), ST_Srid(g)"
+    val row = Row(wkbString, 4326)
+
+    val testCases = Seq(
+      (s"nvl(null, $geog1)", geographyType1, Seq(row)),
+      (s"nvl($geog1, null)", geographyType1, Seq(row)),
+      (s"nvl(null, $geog2)", geographyType2, Seq(row)),
+      (s"nvl($geog2, null)", geographyType2, Seq(row)),
+      (s"nvl($geog1, $geog1)", geographyType1, Seq(row)),
+      (s"nvl($geog2, $geog2)", geographyType2, Seq(row)),
+      (s"nvl($geog1, $geog2)", mixedSridGeographyType, Seq(row)),
+      (s"nvl($geog2, $geog1)", mixedSridGeographyType, Seq(row))
+    )
+
+    for ((expr, expectedType, expectedRows) <- testCases) {
+      assertType(
+        s"SELECT $expr",
+        expectedType
+      )
+      checkAnswer(
+        sql(s"WITH t AS (SELECT $expr AS g) SELECT $geo FROM t"),
+        expectedRows
+      )
+    }
+  }
+
+  test("NVL with GEOGRAPHY columns") {
+    // Test data: WKB representation of POINT(1 2).
+    val wkbString = "0101000000000000000000F03F0000000000000040"
+    // Test with columns, using geographies with different SRID values.
+    val geog1 = "ST_GeogFromWKB(wkb)" // Column with fixed SRID (4326).
+    val geographyType1 = GeographyType(4326)
+    val geog2 = s"$geog1::GEOGRAPHY(ANY)" // Column with mixed SRID (ANY).
+    val geographyType2 = GeographyType("ANY")
+    val geo = "hex(ST_AsBinary(g)), ST_Srid(g)"
+    val row = Row(wkbString, 4326)
+
+    val testCases = Seq(
+      (s"nvl(null, $geog1)", geographyType1, Seq(row)),
+      (s"nvl($geog1, null)", geographyType1, Seq(row)),
+      (s"nvl(null, $geog2)", geographyType2, Seq(row)),
+      (s"nvl($geog2, null)", geographyType2, Seq(row)),
+      (s"nvl($geog1, $geog1)", geographyType1, Seq(row)),
+      (s"nvl($geog2, $geog2)", geographyType2, Seq(row)),
+      (s"nvl($geog1, $geog2)", mixedSridGeographyType, Seq(row)),
+      (s"nvl($geog2, $geog1)", mixedSridGeographyType, Seq(row))
+    )
+
+    // Test with literal and column, using geographies with different SRID 
values.
+    withTable("tbl") {
+      // Construct and populate the test table.
+      sql("CREATE TABLE tbl (wkb BINARY)")
+      sql(s"INSERT INTO tbl VALUES (X'$wkbString')")
+
+      for ((query, expectedType, expectedRows) <- testCases) {
+        assertType(
+          s"SELECT $query FROM tbl",
+          expectedType
+        )
+        checkAnswer(
+          sql(s"WITH t AS (SELECT $query AS g FROM tbl) SELECT $geo FROM t"),
+          expectedRows
+        )
+      }
+    }
+  }
+
   /** ST reader/writer expressions. */
 
   test("ST_AsBinary") {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to