This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new bed030cd23cc [SPARK-54669][SQL] Remove redundant casting in rCTEs
bed030cd23cc is described below
commit bed030cd23ccac019d40459bfb9e979bb3616e35
Author: pavle-martinovic_data <[email protected]>
AuthorDate: Thu Dec 11 20:31:23 2025 +0800
[SPARK-54669][SQL] Remove redundant casting in rCTEs
### What changes were proposed in this pull request?
Change TypeCoercionBase so that the only outputs that are casted are the
ones that need to be casted.
### Why are the changes needed?
Remove redundant CAST.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing golden file test cte-recursion.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #53428 from Pajaraja/pavle-martinovic_data/noRedundantCast.
Authored-by: pavle-martinovic_data <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../sql/catalyst/analysis/TypeCoercionBase.scala | 6 ++++-
.../analyzer-results/cte-recursion.sql.out | 28 +++++++++++++++++++++-
.../resources/sql-tests/inputs/cte-recursion.sql | 8 +++++++
.../sql-tests/results/cte-recursion.sql.out | 17 +++++++++++++
4 files changed, 57 insertions(+), 2 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionBase.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionBase.scala
index 271e151e709c..54c4ed5fb843 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionBase.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionBase.scala
@@ -262,7 +262,11 @@ abstract class TypeCoercionBase extends TypeCoercionHelper
{
case (attr, dt) =>
val widerType = findWiderTypeForTwo(attr.dataType, dt)
if (widerType.isDefined && widerType.get == dt) {
- Alias(Cast(attr, dt), attr.name)()
+ if (attr.dataType != dt) {
+ Alias(Cast(attr, dt), attr.name)()
+ } else {
+ attr
+ }
} else {
throw cannotMergeIncompatibleDataTypesError(dt, attr.dataType)
}
diff --git
a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
index ae0de444ed5e..419dc9e8dcea 100644
---
a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
+++
b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
@@ -1907,7 +1907,7 @@ WithCTE
: +- UnionLoop xxxx
: :- Project [1 AS 1#x, cast(1 as bigint) AS CAST(1 AS BIGINT)#xL]
: : +- OneRowRelation
-: +- Project [cast((n + 1)#x as int) AS (n + 1)#x, cast((n + 1)#x as
bigint) AS (n + 1)#xL]
+: +- Project [(n + 1)#x, cast((n + 1)#x as bigint) AS (n + 1)#xL]
: +- Project [(n#x + 1) AS (n + 1)#x, (n#x + 1) AS (n + 1)#x]
: +- Filter (n#x < 5)
: +- SubqueryAlias t1
@@ -1936,6 +1936,32 @@ org.apache.spark.SparkException
}
+-- !query
+WITH RECURSIVE t1(n, str, ts) AS (
+ SELECT 1, '2024-01-15 00:00:00' ,CAST('2024-01-15 00:00:00' AS TIMESTAMP)
+ UNION ALL
+ SELECT n + 1, str, str FROM t1 WHERE n < 5
+)
+SELECT * FROM t1
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+: +- SubqueryAlias t1
+: +- Project [1#x AS n#x, 2024-01-15 00:00:00#x AS str#x, CAST(2024-01-15
00:00:00 AS TIMESTAMP)#x AS ts#x]
+: +- UnionLoop xxxx
+: :- Project [1 AS 1#x, 2024-01-15 00:00:00 AS 2024-01-15
00:00:00#x, cast(2024-01-15 00:00:00 as timestamp) AS CAST(2024-01-15 00:00:00
AS TIMESTAMP)#x]
+: : +- OneRowRelation
+: +- Project [(n + 1)#x, str#x, cast(str#x as timestamp) AS str#x]
+: +- Project [(n#x + 1) AS (n + 1)#x, str#x, str#x]
+: +- Filter (n#x < 5)
+: +- SubqueryAlias t1
+: +- Project [1#x AS n#x, 2024-01-15 00:00:00#x AS
str#x, CAST(2024-01-15 00:00:00 AS TIMESTAMP)#x AS ts#x]
+: +- UnionLoopRef xxxx, [1#x, 2024-01-15 00:00:00#x,
CAST(2024-01-15 00:00:00 AS TIMESTAMP)#x], false
++- Project [n#x, str#x, ts#x]
+ +- SubqueryAlias t1
+ +- CTERelationRef xxxx, true, [n#x, str#x, ts#x], false, false
+
+
-- !query
WITH RECURSIVE t1(n) AS (
SELECT 1
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
index 4aa7be79a058..44d5ae49104a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
@@ -728,6 +728,14 @@ WITH RECURSIVE t1(n, m) AS (
SELECT n+1, CAST(n+1 AS BIGINT) FROM t1 WHERE n < 5)
SELECT * FROM t1;
+-- Type coercion with timeZone sensitive type
+WITH RECURSIVE t1(n, str, ts) AS (
+ SELECT 1, '2024-01-15 00:00:00' ,CAST('2024-01-15 00:00:00' AS TIMESTAMP)
+ UNION ALL
+ SELECT n + 1, str, str FROM t1 WHERE n < 5
+)
+SELECT * FROM t1;
+
-- Recursive CTE with nullable recursion and non-recursive anchor
WITH RECURSIVE t1(n) AS (
SELECT 1
diff --git
a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
index 536fc6c4ea63..0a2cfb5c706e 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
@@ -1997,6 +1997,23 @@ org.apache.spark.SparkException
}
+-- !query
+WITH RECURSIVE t1(n, str, ts) AS (
+ SELECT 1, '2024-01-15 00:00:00' ,CAST('2024-01-15 00:00:00' AS TIMESTAMP)
+ UNION ALL
+ SELECT n + 1, str, str FROM t1 WHERE n < 5
+)
+SELECT * FROM t1
+-- !query schema
+struct<n:int,str:string,ts:timestamp>
+-- !query output
+1 2024-01-15 00:00:00 2024-01-15 00:00:00
+2 2024-01-15 00:00:00 2024-01-15 00:00:00
+3 2024-01-15 00:00:00 2024-01-15 00:00:00
+4 2024-01-15 00:00:00 2024-01-15 00:00:00
+5 2024-01-15 00:00:00 2024-01-15 00:00:00
+
+
-- !query
WITH RECURSIVE t1(n) AS (
SELECT 1
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]