This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 136246c29515 [SPARK-54243][SQL] Introduce type coercion support for
GEOGRAPHY data types
136246c29515 is described below
commit 136246c295150d589b70766edb0010321902159f
Author: Uros Bojanic <[email protected]>
AuthorDate: Fri Nov 7 19:42:48 2025 -0800
[SPARK-54243][SQL] Introduce type coercion support for GEOGRAPHY data types
### What changes were proposed in this pull request?
Implement least common type (LCT) logic for `GEOGRAPHY` types, as follows:
- LCT for GeographyType(`srid_1`) and GeographyType(`srid_1`) is:
GeographyType(`srid_1`)
- LCT for GeographyType(`srid_1`) and GeographyType(`srid_2`) is:
GeographyType(`ANY`)
- LCT for GeographyType(`srid_1`) and GeographyType(`ANY`) is:
GeographyType(`ANY`)
- LCT for GeographyType(`ANY`) and GeographyType(`ANY`) is:
GeographyType(`ANY`)
In other words, the mixed SRID `GEOGRAPHY` type is the *common type* for
all GEOGRAPHY types.
### Why are the changes needed?
Introducing LCT and type coercion logic in the geospatial data type system.
### Does this PR introduce _any_ user-facing change?
Yes, type coercion is now supported for `GeographyType`.
### How was this patch tested?
Added tests for geography type coercion:
- `AnsiTypeCoercionSuite`
- `TypeCoercionSuite`
Added appropriate Scala suite unit tests:
- `STExpressionsSuite`
Added appropriate end-to-end SQL tests:
- `st-functions`
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #52944 from uros-db/geo-coercion-geog.
Authored-by: Uros Bojanic <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit f2d594de6195b31ccad9de233ad5321be1107a1a)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../sql/catalyst/analysis/AnsiTypeCoercion.scala | 4 +
.../spark/sql/catalyst/analysis/TypeCoercion.scala | 4 +
.../catalyst/analysis/AnsiTypeCoercionSuite.scala | 7 +
.../sql/catalyst/analysis/TypeCoercionSuite.scala | 7 +
.../analyzer-results/nonansi/st-functions.sql.out | 72 ++++++++++
.../analyzer-results/st-functions.sql.out | 72 ++++++++++
.../resources/sql-tests/inputs/st-functions.sql | 22 +++
.../sql-tests/results/nonansi/st-functions.sql.out | 81 +++++++++++
.../sql-tests/results/st-functions.sql.out | 81 +++++++++++
.../org/apache/spark/sql/STExpressionsSuite.scala | 148 +++++++++++++++++++++
10 files changed, 498 insertions(+)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index 13b554eb53d4..ea4d04ff6e77 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -130,6 +130,10 @@ object AnsiTypeCoercion extends TypeCoercionBase {
case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
Some(YearMonthIntervalType(t1.startField.min(t2.startField),
t1.endField.max(t2.endField)))
+ // We allow coercion from GEOGRAPHY(<srid>) types (i.e. fixed SRID types)
to the
+ // GEOGRAPHY(ANY) type (i.e. mixed SRID type). This coercion is always
safe to do.
+ case (t1: GeographyType, t2: GeographyType) if t1 != t2 =>
Some(GeographyType("ANY"))
+
case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType)
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 3e5f14810935..9030de9473de 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -98,6 +98,10 @@ object TypeCoercion extends TypeCoercionBase {
case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
Some(YearMonthIntervalType(t1.startField.min(t2.startField),
t1.endField.max(t2.endField)))
+ // We allow coercion from GEOGRAPHY(<srid>) types (i.e. fixed SRID
types) to the
+ // GEOGRAPHY(ANY) type (i.e. mixed SRID type). This coercion is always
safe to do.
+ case (t1: GeographyType, t2: GeographyType) if t1 != t2 =>
Some(GeographyType("ANY"))
+
case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType)
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
index 42acc38eee2d..0e5ebcfa313c 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
@@ -174,6 +174,13 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
widenTest(FloatType, FloatType, Some(FloatType))
widenTest(DoubleType, DoubleType, Some(DoubleType))
+ // Geography with same fixed SRIDs.
+ widenTest(GeographyType(4326), GeographyType(4326),
Some(GeographyType(4326)))
+ // Geography with mixed SRIDs.
+ widenTest(GeographyType("ANY"), GeographyType("ANY"),
Some(GeographyType("ANY")))
+ widenTest(GeographyType("ANY"), GeographyType(4326),
Some(GeographyType("ANY")))
+ widenTest(GeographyType(4326), GeographyType("ANY"),
Some(GeographyType("ANY")))
+
// Integral mixed with floating point.
widenTest(IntegerType, FloatType, Some(DoubleType))
widenTest(IntegerType, DoubleType, Some(DoubleType))
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 250f20fd0957..0169034c3475 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -597,6 +597,13 @@ class TypeCoercionSuite extends TypeCoercionSuiteBase {
widenTest(FloatType, FloatType, Some(FloatType))
widenTest(DoubleType, DoubleType, Some(DoubleType))
+ // Geography with same fixed SRIDs.
+ widenTest(GeographyType(4326), GeographyType(4326),
Some(GeographyType(4326)))
+ // Geography with mixed SRIDs.
+ widenTest(GeographyType("ANY"), GeographyType("ANY"),
Some(GeographyType("ANY")))
+ widenTest(GeographyType("ANY"), GeographyType(4326),
Some(GeographyType("ANY")))
+ widenTest(GeographyType(4326), GeographyType("ANY"),
Some(GeographyType("ANY")))
+
// Integral mixed with floating point.
widenTest(IntegerType, FloatType, Some(FloatType))
widenTest(IntegerType, DoubleType, Some(DoubleType))
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
index a564c6a32932..71d5a808e686 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/st-functions.sql.out
@@ -153,6 +153,78 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
}
+-- !query
+SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))
FROM geodata
+-- !query analysis
+Project [typeof(array(cast(st_geogfromwkb(wkb#x) as geography(any)),
cast(st_geogfromwkb(wkb#x) as geography(any)))) AS
typeof(array(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(map('a', ST_GeogFromWKB(wkb), 'b',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query analysis
+Project [typeof(map(a, cast(st_geogfromwkb(wkb#x) as geography(any)), b,
cast(st_geogfromwkb(wkb#x) as geography(any)))) AS typeof(map(a,
st_geogfromwkb(wkb), b, CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(array(named_struct('g1', ST_GeogFromWKB(wkb), 'g2',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), named_struct('g1',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 'g2', ST_GeogFromWKB(wkb)))) FROM geodata
+-- !query analysis
+Project [typeof(array(cast(named_struct(g1, st_geogfromwkb(wkb#x), g2,
cast(st_geogfromwkb(wkb#x) as geography(any))) as
struct<g1:geography(any),g2:geography(any)>), cast(named_struct(g1,
cast(st_geogfromwkb(wkb#x) as geography(any)), g2, st_geogfromwkb(wkb#x)) as
struct<g1:geography(any),g2:geography(any)>))) AS typeof(array(named_struct(g1,
st_geogfromwkb(wkb), g2, CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))),
named_struct(g1, CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), g2, st_geogfr
[...]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(named_struct('a', array(ST_GeogFromWKB(wkb),
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), 'b', map('g', ST_GeogFromWKB(wkb), 'h',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))) FROM geodata
+-- !query analysis
+Project [typeof(named_struct(a, array(cast(st_geogfromwkb(wkb#x) as
geography(any)), cast(st_geogfromwkb(wkb#x) as geography(any))), b, map(g,
cast(st_geogfromwkb(wkb#x) as geography(any)), h, cast(st_geogfromwkb(wkb#x) as
geography(any))))) AS typeof(named_struct(a, array(st_geogfromwkb(wkb),
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), b, map(g, st_geogfromwkb(wkb), h,
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(nvl(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))
FROM geodata
+-- !query analysis
+Project [typeof(nvl(st_geogfromwkb(wkb#x), cast(st_geogfromwkb(wkb#x) as
geography(any)))) AS typeof(nvl(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb)
AS GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(nvl2(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY),
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query analysis
+Project [typeof(nvl2(st_geogfromwkb(wkb#x), cast(st_geogfromwkb(wkb#x) as
geography(any)), st_geogfromwkb(wkb#x))) AS typeof(nvl2(st_geogfromwkb(wkb),
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), st_geogfromwkb(wkb)))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(CASE WHEN wkb IS NOT NULL THEN
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY) ELSE ST_GeogFromWKB(wkb) END) FROM geodata
+-- !query analysis
+Project [typeof(CASE WHEN isnotnull(wkb#x) THEN cast(st_geogfromwkb(wkb#x) as
geography(any)) ELSE cast(st_geogfromwkb(wkb#x) as geography(any)) END) AS
typeof(CASE WHEN (wkb IS NOT NULL) THEN CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)) ELSE st_geogfromwkb(wkb) END)#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(coalesce(ST_GeogFromWKB(wkb),
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query analysis
+Project [typeof(coalesce(cast(st_geogfromwkb(wkb#x) as geography(any)),
cast(st_geogfromwkb(wkb#x) as geography(any)))) AS
typeof(coalesce(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(IF(wkb IS NOT NULL, ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY),
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query analysis
+Project [typeof(if (isnotnull(wkb#x)) cast(st_geogfromwkb(wkb#x) as
geography(any)) else cast(st_geogfromwkb(wkb#x) as geography(any))) AS
typeof((IF((wkb IS NOT NULL), CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)),
st_geogfromwkb(wkb))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
-- !query
SELECT
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result
-- !query analysis
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
index a564c6a32932..71d5a808e686 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/st-functions.sql.out
@@ -153,6 +153,78 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
}
+-- !query
+SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))
FROM geodata
+-- !query analysis
+Project [typeof(array(cast(st_geogfromwkb(wkb#x) as geography(any)),
cast(st_geogfromwkb(wkb#x) as geography(any)))) AS
typeof(array(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(map('a', ST_GeogFromWKB(wkb), 'b',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query analysis
+Project [typeof(map(a, cast(st_geogfromwkb(wkb#x) as geography(any)), b,
cast(st_geogfromwkb(wkb#x) as geography(any)))) AS typeof(map(a,
st_geogfromwkb(wkb), b, CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(array(named_struct('g1', ST_GeogFromWKB(wkb), 'g2',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), named_struct('g1',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 'g2', ST_GeogFromWKB(wkb)))) FROM geodata
+-- !query analysis
+Project [typeof(array(cast(named_struct(g1, st_geogfromwkb(wkb#x), g2,
cast(st_geogfromwkb(wkb#x) as geography(any))) as
struct<g1:geography(any),g2:geography(any)>), cast(named_struct(g1,
cast(st_geogfromwkb(wkb#x) as geography(any)), g2, st_geogfromwkb(wkb#x)) as
struct<g1:geography(any),g2:geography(any)>))) AS typeof(array(named_struct(g1,
st_geogfromwkb(wkb), g2, CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))),
named_struct(g1, CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), g2, st_geogfr
[...]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(named_struct('a', array(ST_GeogFromWKB(wkb),
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), 'b', map('g', ST_GeogFromWKB(wkb), 'h',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))) FROM geodata
+-- !query analysis
+Project [typeof(named_struct(a, array(cast(st_geogfromwkb(wkb#x) as
geography(any)), cast(st_geogfromwkb(wkb#x) as geography(any))), b, map(g,
cast(st_geogfromwkb(wkb#x) as geography(any)), h, cast(st_geogfromwkb(wkb#x) as
geography(any))))) AS typeof(named_struct(a, array(st_geogfromwkb(wkb),
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), b, map(g, st_geogfromwkb(wkb), h,
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(nvl(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))
FROM geodata
+-- !query analysis
+Project [typeof(nvl(st_geogfromwkb(wkb#x), cast(st_geogfromwkb(wkb#x) as
geography(any)))) AS typeof(nvl(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb)
AS GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(nvl2(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY),
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query analysis
+Project [typeof(nvl2(st_geogfromwkb(wkb#x), cast(st_geogfromwkb(wkb#x) as
geography(any)), st_geogfromwkb(wkb#x))) AS typeof(nvl2(st_geogfromwkb(wkb),
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), st_geogfromwkb(wkb)))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(CASE WHEN wkb IS NOT NULL THEN
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY) ELSE ST_GeogFromWKB(wkb) END) FROM geodata
+-- !query analysis
+Project [typeof(CASE WHEN isnotnull(wkb#x) THEN cast(st_geogfromwkb(wkb#x) as
geography(any)) ELSE cast(st_geogfromwkb(wkb#x) as geography(any)) END) AS
typeof(CASE WHEN (wkb IS NOT NULL) THEN CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)) ELSE st_geogfromwkb(wkb) END)#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(coalesce(ST_GeogFromWKB(wkb),
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query analysis
+Project [typeof(coalesce(cast(st_geogfromwkb(wkb#x) as geography(any)),
cast(st_geogfromwkb(wkb#x) as geography(any)))) AS
typeof(coalesce(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
+-- !query
+SELECT typeof(IF(wkb IS NOT NULL, ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY),
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query analysis
+Project [typeof(if (isnotnull(wkb#x)) cast(st_geogfromwkb(wkb#x) as
geography(any)) else cast(st_geogfromwkb(wkb#x) as geography(any))) AS
typeof((IF((wkb IS NOT NULL), CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)),
st_geogfromwkb(wkb))))#x]
++- SubqueryAlias spark_catalog.default.geodata
+ +- Relation spark_catalog.default.geodata[wkb#x] parquet
+
+
-- !query
SELECT
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result
-- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
index ceda71398305..e1c11bb089b5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/st-functions.sql
@@ -28,6 +28,28 @@ SELECT
hex(ST_AsBinary(CAST(ST_GeomFromWKB(X'0101000000000000000000f03f000000000
-- Casting GEOMETRY(ANY) to GEOMETRY(<srid>) is not allowed.
SELECT
CAST(ST_GeomFromWKB(X'0101000000000000000000f03f0000000000000040')::GEOMETRY(ANY)
AS GEOMETRY(4326)) AS result;
+---- Geospatial type coercion
+
+-- Array
+SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))
FROM geodata;
+-- Map
+SELECT typeof(map('a', ST_GeogFromWKB(wkb), 'b',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata;
+-- Struct
+SELECT typeof(array(named_struct('g1', ST_GeogFromWKB(wkb), 'g2',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), named_struct('g1',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 'g2', ST_GeogFromWKB(wkb)))) FROM geodata;
+-- Nested
+SELECT typeof(named_struct('a', array(ST_GeogFromWKB(wkb),
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), 'b', map('g', ST_GeogFromWKB(wkb), 'h',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))) FROM geodata;
+
+-- NVL
+SELECT typeof(nvl(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))
FROM geodata;
+-- NVL2
+SELECT typeof(nvl2(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY),
ST_GeogFromWKB(wkb))) FROM geodata;
+-- CASE WHEN
+SELECT typeof(CASE WHEN wkb IS NOT NULL THEN
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY) ELSE ST_GeogFromWKB(wkb) END) FROM geodata;
+-- COALESCE
+SELECT typeof(coalesce(ST_GeogFromWKB(wkb),
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata;
+-- IF
+SELECT typeof(IF(wkb IS NOT NULL, ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY),
ST_GeogFromWKB(wkb))) FROM geodata;
+
---- ST reader/writer expressions
-- WKB (Well-Known Binary) round-trip tests for GEOGRAPHY and GEOMETRY types.
diff --git
a/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
index c01534c64e7c..71a7d6eaa5dc 100644
--- a/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/st-functions.sql.out
@@ -169,6 +169,87 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
}
+-- !query
+SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))
FROM geodata
+-- !query schema
+struct<typeof(array(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)))):string>
+-- !query output
+array<geography(any)>
+array<geography(any)>
+
+
+-- !query
+SELECT typeof(map('a', ST_GeogFromWKB(wkb), 'b',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query schema
+struct<typeof(map(a, st_geogfromwkb(wkb), b, CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)))):string>
+-- !query output
+map<string,geography(any)>
+map<string,geography(any)>
+
+
+-- !query
+SELECT typeof(array(named_struct('g1', ST_GeogFromWKB(wkb), 'g2',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), named_struct('g1',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 'g2', ST_GeogFromWKB(wkb)))) FROM geodata
+-- !query schema
+struct<typeof(array(named_struct(g1, st_geogfromwkb(wkb), g2,
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), named_struct(g1,
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), g2, st_geogfromwkb(wkb)))):string>
+-- !query output
+array<struct<g1:geography(any),g2:geography(any)>>
+array<struct<g1:geography(any),g2:geography(any)>>
+
+
+-- !query
+SELECT typeof(named_struct('a', array(ST_GeogFromWKB(wkb),
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), 'b', map('g', ST_GeogFromWKB(wkb), 'h',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))) FROM geodata
+-- !query schema
+struct<typeof(named_struct(a, array(st_geogfromwkb(wkb),
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), b, map(g, st_geogfromwkb(wkb), h,
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))))):string>
+-- !query output
+struct<a:array<geography(any)>,b:map<string,geography(any)>>
+struct<a:array<geography(any)>,b:map<string,geography(any)>>
+
+
+-- !query
+SELECT typeof(nvl(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))
FROM geodata
+-- !query schema
+struct<typeof(nvl(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(nvl2(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY),
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query schema
+struct<typeof(nvl2(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)), st_geogfromwkb(wkb))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(CASE WHEN wkb IS NOT NULL THEN
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY) ELSE ST_GeogFromWKB(wkb) END) FROM geodata
+-- !query schema
+struct<typeof(CASE WHEN (wkb IS NOT NULL) THEN CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)) ELSE st_geogfromwkb(wkb) END):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(coalesce(ST_GeogFromWKB(wkb),
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query schema
+struct<typeof(coalesce(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(IF(wkb IS NOT NULL, ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY),
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query schema
+struct<typeof((IF((wkb IS NOT NULL), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)), st_geogfromwkb(wkb)))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
-- !query
SELECT
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result
-- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
index c01534c64e7c..71a7d6eaa5dc 100644
--- a/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/st-functions.sql.out
@@ -169,6 +169,87 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
}
+-- !query
+SELECT typeof(array(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))
FROM geodata
+-- !query schema
+struct<typeof(array(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)))):string>
+-- !query output
+array<geography(any)>
+array<geography(any)>
+
+
+-- !query
+SELECT typeof(map('a', ST_GeogFromWKB(wkb), 'b',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query schema
+struct<typeof(map(a, st_geogfromwkb(wkb), b, CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)))):string>
+-- !query output
+map<string,geography(any)>
+map<string,geography(any)>
+
+
+-- !query
+SELECT typeof(array(named_struct('g1', ST_GeogFromWKB(wkb), 'g2',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), named_struct('g1',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY), 'g2', ST_GeogFromWKB(wkb)))) FROM geodata
+-- !query schema
+struct<typeof(array(named_struct(g1, st_geogfromwkb(wkb), g2,
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), named_struct(g1,
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY)), g2, st_geogfromwkb(wkb)))):string>
+-- !query output
+array<struct<g1:geography(any),g2:geography(any)>>
+array<struct<g1:geography(any),g2:geography(any)>>
+
+
+-- !query
+SELECT typeof(named_struct('a', array(ST_GeogFromWKB(wkb),
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)), 'b', map('g', ST_GeogFromWKB(wkb), 'h',
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))) FROM geodata
+-- !query schema
+struct<typeof(named_struct(a, array(st_geogfromwkb(wkb),
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))), b, map(g, st_geogfromwkb(wkb), h,
CAST(st_geogfromwkb(wkb) AS GEOGRAPHY(ANY))))):string>
+-- !query output
+struct<a:array<geography(any)>,b:map<string,geography(any)>>
+struct<a:array<geography(any)>,b:map<string,geography(any)>>
+
+
+-- !query
+SELECT typeof(nvl(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY)))
FROM geodata
+-- !query schema
+struct<typeof(nvl(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(nvl2(ST_GeogFromWKB(wkb), ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY),
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query schema
+struct<typeof(nvl2(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)), st_geogfromwkb(wkb))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(CASE WHEN wkb IS NOT NULL THEN
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY) ELSE ST_GeogFromWKB(wkb) END) FROM geodata
+-- !query schema
+struct<typeof(CASE WHEN (wkb IS NOT NULL) THEN CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)) ELSE st_geogfromwkb(wkb) END):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(coalesce(ST_GeogFromWKB(wkb),
ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY))) FROM geodata
+-- !query schema
+struct<typeof(coalesce(st_geogfromwkb(wkb), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
+-- !query
+SELECT typeof(IF(wkb IS NOT NULL, ST_GeogFromWKB(wkb)::GEOGRAPHY(ANY),
ST_GeogFromWKB(wkb))) FROM geodata
+-- !query schema
+struct<typeof((IF((wkb IS NOT NULL), CAST(st_geogfromwkb(wkb) AS
GEOGRAPHY(ANY)), st_geogfromwkb(wkb)))):string>
+-- !query output
+geography(any)
+geography(any)
+
+
-- !query
SELECT
hex(ST_AsBinary(ST_GeogFromWKB(X'0101000000000000000000f03f0000000000000040')))
AS result
-- !query schema
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
index 79e7ecbdf4c3..6dd8f4347105 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/STExpressionsSuite.scala
@@ -123,6 +123,154 @@ class STExpressionsSuite
}
}
+ /** Geospatial type coercion. */
+
+ test("CreateArray with GEOGRAPHY literals") {
+ // Test data: WKB representation of POINT(1 2).
+ val wkbString = "0101000000000000000000F03F0000000000000040"
+ // Test with literals, using geographies with different SRID values.
+ val geog1 = s"ST_GeogFromWKB(X'$wkbString')" // Literal with fixed SRID
(4326).
+ val geographyType1 = GeographyType(4326)
+ val geog2 = s"$geog1::GEOGRAPHY(ANY)" // Literal with mixed SRID (ANY).
+ val geographyType2 = GeographyType("ANY")
+ val geo = "hex(ST_AsBinary(g)), ST_Srid(g)"
+ val row = Row(wkbString, 4326)
+
+ val testCases = Seq(
+ (s"array($geog1)", geographyType1, Seq(row)),
+ (s"array($geog2)", geographyType2, Seq(row)),
+ (s"array($geog1, $geog1)", geographyType1, Seq(row, row)),
+ (s"array($geog2, $geog2)", geographyType2, Seq(row, row)),
+ (s"array($geog1, $geog2)", mixedSridGeographyType, Seq(row, row)),
+ (s"array($geog2, $geog1)", mixedSridGeographyType, Seq(row, row))
+ )
+
+ for ((expr, expectedType, expectedRows) <- testCases) {
+ assertType(
+ s"SELECT $expr",
+ ArrayType(expectedType)
+ )
+ checkAnswer(
+ sql(s"WITH t AS (SELECT explode($expr) AS g) SELECT $geo FROM t"),
+ expectedRows
+ )
+ }
+ }
+
+ test("CreateArray with GEOGRAPHY columns") {
+ // Test data: WKB representation of POINT(1 2).
+ val wkbString = "0101000000000000000000F03F0000000000000040"
+ // Test with columns, using geographies with different SRID values.
+ val geog1 = "ST_GeogFromWKB(wkb)" // Column with fixed SRID (4326).
+ val geographyType1 = GeographyType(4326)
+ val geog2 = s"$geog1::GEOGRAPHY(ANY)" // Column with mixed SRID (ANY).
+ val geographyType2 = GeographyType("ANY")
+ val geo = "hex(ST_AsBinary(g)), ST_Srid(g)"
+ val row = Row(wkbString, 4326)
+
+ val testCases = Seq(
+ (s"array($geog1)", geographyType1, Seq(row)),
+ (s"array($geog2)", geographyType2, Seq(row)),
+ (s"array($geog1, $geog1)", geographyType1, Seq(row, row)),
+ (s"array($geog2, $geog2)", geographyType2, Seq(row, row)),
+ (s"array($geog1, $geog2)", mixedSridGeographyType, Seq(row, row)),
+ (s"array($geog2, $geog1)", mixedSridGeographyType, Seq(row, row))
+ )
+
+ // Test with literal and column, using geographies with different SRID
values.
+ withTable("tbl") {
+ // Construct and populate the test table.
+ sql("CREATE TABLE tbl (wkb BINARY)")
+ sql(s"INSERT INTO tbl VALUES (X'$wkbString')")
+
+ for ((query, expectedType, expectedRows) <- testCases) {
+ assertType(
+ s"SELECT $query FROM tbl",
+ ArrayType(expectedType)
+ )
+ checkAnswer(
+ sql(s"WITH t AS (SELECT explode($query) AS g FROM tbl) SELECT $geo
FROM t"),
+ expectedRows
+ )
+ }
+ }
+ }
+
+ test("NVL with GEOGRAPHY literals") {
+ // Test data: WKB representation of POINT(1 2).
+ val wkbString = "0101000000000000000000F03F0000000000000040"
+ // Test with literals, using geographies with different SRID values.
+ val geog1 = s"ST_GeogFromWKB(X'$wkbString')" // Literal with fixed SRID
(4326).
+ val geographyType1 = GeographyType(4326)
+ val geog2 = s"$geog1::GEOGRAPHY(ANY)" // Literal with mixed SRID (ANY).
+ val geographyType2 = GeographyType("ANY")
+ val geo = "hex(ST_AsBinary(g)), ST_Srid(g)"
+ val row = Row(wkbString, 4326)
+
+ val testCases = Seq(
+ (s"nvl(null, $geog1)", geographyType1, Seq(row)),
+ (s"nvl($geog1, null)", geographyType1, Seq(row)),
+ (s"nvl(null, $geog2)", geographyType2, Seq(row)),
+ (s"nvl($geog2, null)", geographyType2, Seq(row)),
+ (s"nvl($geog1, $geog1)", geographyType1, Seq(row)),
+ (s"nvl($geog2, $geog2)", geographyType2, Seq(row)),
+ (s"nvl($geog1, $geog2)", mixedSridGeographyType, Seq(row)),
+ (s"nvl($geog2, $geog1)", mixedSridGeographyType, Seq(row))
+ )
+
+ for ((expr, expectedType, expectedRows) <- testCases) {
+ assertType(
+ s"SELECT $expr",
+ expectedType
+ )
+ checkAnswer(
+ sql(s"WITH t AS (SELECT $expr AS g) SELECT $geo FROM t"),
+ expectedRows
+ )
+ }
+ }
+
+ test("NVL with GEOGRAPHY columns") {
+ // Test data: WKB representation of POINT(1 2).
+ val wkbString = "0101000000000000000000F03F0000000000000040"
+ // Test with columns, using geographies with different SRID values.
+ val geog1 = "ST_GeogFromWKB(wkb)" // Column with fixed SRID (4326).
+ val geographyType1 = GeographyType(4326)
+ val geog2 = s"$geog1::GEOGRAPHY(ANY)" // Column with mixed SRID (ANY).
+ val geographyType2 = GeographyType("ANY")
+ val geo = "hex(ST_AsBinary(g)), ST_Srid(g)"
+ val row = Row(wkbString, 4326)
+
+ val testCases = Seq(
+ (s"nvl(null, $geog1)", geographyType1, Seq(row)),
+ (s"nvl($geog1, null)", geographyType1, Seq(row)),
+ (s"nvl(null, $geog2)", geographyType2, Seq(row)),
+ (s"nvl($geog2, null)", geographyType2, Seq(row)),
+ (s"nvl($geog1, $geog1)", geographyType1, Seq(row)),
+ (s"nvl($geog2, $geog2)", geographyType2, Seq(row)),
+ (s"nvl($geog1, $geog2)", mixedSridGeographyType, Seq(row)),
+ (s"nvl($geog2, $geog1)", mixedSridGeographyType, Seq(row))
+ )
+
+ // Test with literal and column, using geographies with different SRID
values.
+ withTable("tbl") {
+ // Construct and populate the test table.
+ sql("CREATE TABLE tbl (wkb BINARY)")
+ sql(s"INSERT INTO tbl VALUES (X'$wkbString')")
+
+ for ((query, expectedType, expectedRows) <- testCases) {
+ assertType(
+ s"SELECT $query FROM tbl",
+ expectedType
+ )
+ checkAnswer(
+ sql(s"WITH t AS (SELECT $query AS g FROM tbl) SELECT $geo FROM t"),
+ expectedRows
+ )
+ }
+ }
+ }
+
/** ST reader/writer expressions. */
test("ST_AsBinary") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]