This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new bed030cd23cc [SPARK-54669][SQL] Remove redundant casting in rCTEs
bed030cd23cc is described below

commit bed030cd23ccac019d40459bfb9e979bb3616e35
Author: pavle-martinovic_data <[email protected]>
AuthorDate: Thu Dec 11 20:31:23 2025 +0800

    [SPARK-54669][SQL] Remove redundant casting in rCTEs
    
    ### What changes were proposed in this pull request?
    
    Change TypeCoercionBase so that the only outputs that are casted are the 
ones that need to be casted.
    
    ### Why are the changes needed?
    
    Remove redundant CAST.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing golden file test cte-recursion.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #53428 from Pajaraja/pavle-martinovic_data/noRedundantCast.
    
    Authored-by: pavle-martinovic_data <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../sql/catalyst/analysis/TypeCoercionBase.scala   |  6 ++++-
 .../analyzer-results/cte-recursion.sql.out         | 28 +++++++++++++++++++++-
 .../resources/sql-tests/inputs/cte-recursion.sql   |  8 +++++++
 .../sql-tests/results/cte-recursion.sql.out        | 17 +++++++++++++
 4 files changed, 57 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionBase.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionBase.scala
index 271e151e709c..54c4ed5fb843 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionBase.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionBase.scala
@@ -262,7 +262,11 @@ abstract class TypeCoercionBase extends TypeCoercionHelper 
{
             case (attr, dt) =>
               val widerType = findWiderTypeForTwo(attr.dataType, dt)
               if (widerType.isDefined && widerType.get == dt) {
-                Alias(Cast(attr, dt), attr.name)()
+                if (attr.dataType != dt) {
+                  Alias(Cast(attr, dt), attr.name)()
+                } else {
+                  attr
+                }
               } else {
                 throw cannotMergeIncompatibleDataTypesError(dt, attr.dataType)
               }
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
index ae0de444ed5e..419dc9e8dcea 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-recursion.sql.out
@@ -1907,7 +1907,7 @@ WithCTE
 :        +- UnionLoop xxxx
 :           :- Project [1 AS 1#x, cast(1 as bigint) AS CAST(1 AS BIGINT)#xL]
 :           :  +- OneRowRelation
-:           +- Project [cast((n + 1)#x as int) AS (n + 1)#x, cast((n + 1)#x as 
bigint) AS (n + 1)#xL]
+:           +- Project [(n + 1)#x, cast((n + 1)#x as bigint) AS (n + 1)#xL]
 :              +- Project [(n#x + 1) AS (n + 1)#x, (n#x + 1) AS (n + 1)#x]
 :                 +- Filter (n#x < 5)
 :                    +- SubqueryAlias t1
@@ -1936,6 +1936,32 @@ org.apache.spark.SparkException
 }
 
 
+-- !query
+WITH RECURSIVE t1(n, str, ts) AS (
+    SELECT 1, '2024-01-15 00:00:00' ,CAST('2024-01-15 00:00:00' AS TIMESTAMP)
+    UNION ALL
+    SELECT n + 1, str, str FROM t1 WHERE n < 5
+)
+SELECT * FROM t1
+-- !query analysis
+WithCTE
+:- CTERelationDef xxxx, false
+:  +- SubqueryAlias t1
+:     +- Project [1#x AS n#x, 2024-01-15 00:00:00#x AS str#x, CAST(2024-01-15 
00:00:00 AS TIMESTAMP)#x AS ts#x]
+:        +- UnionLoop xxxx
+:           :- Project [1 AS 1#x, 2024-01-15 00:00:00 AS 2024-01-15 
00:00:00#x, cast(2024-01-15 00:00:00 as timestamp) AS CAST(2024-01-15 00:00:00 
AS TIMESTAMP)#x]
+:           :  +- OneRowRelation
+:           +- Project [(n + 1)#x, str#x, cast(str#x as timestamp) AS str#x]
+:              +- Project [(n#x + 1) AS (n + 1)#x, str#x, str#x]
+:                 +- Filter (n#x < 5)
+:                    +- SubqueryAlias t1
+:                       +- Project [1#x AS n#x, 2024-01-15 00:00:00#x AS 
str#x, CAST(2024-01-15 00:00:00 AS TIMESTAMP)#x AS ts#x]
+:                          +- UnionLoopRef xxxx, [1#x, 2024-01-15 00:00:00#x, 
CAST(2024-01-15 00:00:00 AS TIMESTAMP)#x], false
++- Project [n#x, str#x, ts#x]
+   +- SubqueryAlias t1
+      +- CTERelationRef xxxx, true, [n#x, str#x, ts#x], false, false
+
+
 -- !query
 WITH RECURSIVE t1(n) AS (
     SELECT 1
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql 
b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
index 4aa7be79a058..44d5ae49104a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte-recursion.sql
@@ -728,6 +728,14 @@ WITH RECURSIVE t1(n, m) AS (
     SELECT n+1, CAST(n+1 AS BIGINT) FROM t1 WHERE n < 5)
 SELECT * FROM t1;
 
+-- Type coercion with timeZone sensitive type
+WITH RECURSIVE t1(n, str, ts) AS (
+    SELECT 1, '2024-01-15 00:00:00' ,CAST('2024-01-15 00:00:00' AS TIMESTAMP)
+    UNION ALL
+    SELECT n + 1, str, str FROM t1 WHERE n < 5
+)
+SELECT * FROM t1;
+
 -- Recursive CTE with nullable recursion and non-recursive anchor
 WITH RECURSIVE t1(n) AS (
     SELECT 1
diff --git 
a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out 
b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
index 536fc6c4ea63..0a2cfb5c706e 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-recursion.sql.out
@@ -1997,6 +1997,23 @@ org.apache.spark.SparkException
 }
 
 
+-- !query
+WITH RECURSIVE t1(n, str, ts) AS (
+    SELECT 1, '2024-01-15 00:00:00' ,CAST('2024-01-15 00:00:00' AS TIMESTAMP)
+    UNION ALL
+    SELECT n + 1, str, str FROM t1 WHERE n < 5
+)
+SELECT * FROM t1
+-- !query schema
+struct<n:int,str:string,ts:timestamp>
+-- !query output
+1      2024-01-15 00:00:00     2024-01-15 00:00:00
+2      2024-01-15 00:00:00     2024-01-15 00:00:00
+3      2024-01-15 00:00:00     2024-01-15 00:00:00
+4      2024-01-15 00:00:00     2024-01-15 00:00:00
+5      2024-01-15 00:00:00     2024-01-15 00:00:00
+
+
 -- !query
 WITH RECURSIVE t1(n) AS (
     SELECT 1


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to