This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 6a13ca52404d [SPARK-53524][CONNECT][SQL][4.0] Fix temporal value 
conversion in LiteralValueProtoConverter
6a13ca52404d is described below

commit 6a13ca52404d1e4e893d8f2da66919678a77f846
Author: Yihong He <heyihong...@gmail.com>
AuthorDate: Mon Sep 15 09:49:23 2025 +0800

    [SPARK-53524][CONNECT][SQL][4.0] Fix temporal value conversion in 
LiteralValueProtoConverter
    
    ### What changes were proposed in this pull request?
    
    This PR fixes temporal value conversion issues in the 
`LiteralValueProtoConverter` for Spark Connect. The main changes include:
    
    1. **Fixed temporal value conversion in `getConverter` method**: Updated 
the conversion logic for temporal data types (DATE, TIMESTAMP, TIMESTAMP_NTZ, 
DAY_TIME_INTERVAL, YEAR_MONTH_INTERVAL, TIME) to use proper utility methods 
from `SparkDateTimeUtils` and `SparkIntervalUtils` instead of directly 
returning raw protobuf values.
    
    2. **Added comprehensive test coverage**: Extended the 
`PlanGenerationTestSuite` with a new test case that includes a tuple containing 
all temporal types to ensure proper conversion and serialization.
    
    3. **Updated test expectations**: Modified the expected explain output and 
query test files to reflect the corrected temporal value handling.
    
    ### Why are the changes needed?
    
    The struct type in typedlit doesn't work well with temporal values due to 
bugs in type conversions. For example, the code below fails:
    
    ```scala
    import org.apache.spark.sql.functions.typedlit
    
    spark.sql("select 1").select(typedlit((1, java.time.LocalDate.of(2020, 10, 
10)))).collect()
    """
    org.apache.spark.SparkIllegalArgumentException: The value (18545) of the 
type (java.lang.Integer) cannot be converted to the DATE type.
      
org.apache.spark.sql.catalyst.CatalystTypeConverters$DateConverter$.toCatalystImpl(CatalystTypeConverters.scala:356)
      
org.apache.spark.sql.catalyst.CatalystTypeConverters$DateConverter$.toCatalystImpl(CatalystTypeConverters.scala:347)
      
org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:110)
      
org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:271)
      
org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:251)
      
org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:110)
      
org.apache.spark.sql.catalyst.CatalystTypeConverters$.$anonfun$createToCatalystConverter$2(CatalystTypeConverters.scala:532)
      
org.apache.spark.sql.connect.planner.LiteralExpressionProtoConverter$.toCatalystExpression(LiteralExpressionProtoConverter.scala:116)
    """
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    **Yes.** This PR fixes temporal value conversion in 
LiteralValueProtoConverter, allowing the struct type in typedlit to work with 
temporal values.
    
    ### How was this patch tested?
    
    `build/sbt "connect-client-jvm/testOnly 
org.apache.spark.sql.PlanGenerationTestSuite"`
    `build/sbt "connect/testOnly 
org.apache.spark.sql.connect.ProtoToParsedPlanTestSuite"`
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Cursor 1.5.11
    
    Closes #52324 from heyihong/SPARK-53524-4.0.
    
    Lead-authored-by: Yihong He <heyihong...@gmail.com>
    Co-authored-by: Wenchen Fan <cloud0...@gmail.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../apache/spark/sql/PlanGenerationTestSuite.scala |  10 ++
 .../common/LiteralValueProtoConverter.scala        |  18 ++--
 .../explain-results/function_typedLit.explain      |   2 +-
 .../query-tests/queries/function_typedLit.json     | 108 +++++++++++++++++++++
 .../queries/function_typedLit.proto.bin            | Bin 9242 -> 9617 bytes
 5 files changed, 128 insertions(+), 10 deletions(-)

diff --git 
a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
 
b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index a548ec7007db..4bdaecffde4b 100644
--- 
a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ 
b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -3390,6 +3390,16 @@ class PlanGenerationTestSuite
       fn.typedLit(java.time.Duration.ofSeconds(200L)),
       fn.typedLit(java.time.Period.ofDays(100)),
       fn.typedLit(new CalendarInterval(2, 20, 100L)),
+      fn.typedLit(
+        (
+          java.time.LocalDate.of(2020, 10, 10),
+          java.time.Instant.ofEpochMilli(1677155519808L),
+          new java.sql.Timestamp(12345L),
+          java.time.LocalDateTime.of(2023, 2, 23, 20, 36),
+          java.sql.Date.valueOf("2023-02-23"),
+          java.time.Duration.ofSeconds(200L),
+          java.time.Period.ofDays(100),
+          new CalendarInterval(2, 20, 100L))),
 
       // Handle parameterized scala types e.g.: List, Seq and Map.
       fn.typedLit(Some(1)),
diff --git 
a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
 
b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
index 1f3496fa8984..e8522d7118c2 100644
--- 
a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
+++ 
b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
@@ -296,7 +296,7 @@ object LiteralValueProtoConverter {
     }
   }
 
-  private def getConverter(dataType: proto.DataType): proto.Expression.Literal 
=> Any = {
+  private def getScalaConverter(dataType: proto.DataType): 
proto.Expression.Literal => Any = {
     if (dataType.hasShort) { v =>
       v.getShort.toShort
     } else if (dataType.hasInteger) { v =>
@@ -316,15 +316,15 @@ object LiteralValueProtoConverter {
     } else if (dataType.hasBinary) { v =>
       v.getBinary.toByteArray
     } else if (dataType.hasDate) { v =>
-      v.getDate
+      SparkDateTimeUtils.toJavaDate(v.getDate)
     } else if (dataType.hasTimestamp) { v =>
-      v.getTimestamp
+      SparkDateTimeUtils.toJavaTimestamp(v.getTimestamp)
     } else if (dataType.hasTimestampNtz) { v =>
-      v.getTimestampNtz
+      SparkDateTimeUtils.microsToLocalDateTime(v.getTimestampNtz)
     } else if (dataType.hasDayTimeInterval) { v =>
-      v.getDayTimeInterval
+      SparkIntervalUtils.microsToDuration(v.getDayTimeInterval)
     } else if (dataType.hasYearMonthInterval) { v =>
-      v.getYearMonthInterval
+      SparkIntervalUtils.monthsToPeriod(v.getYearMonthInterval)
     } else if (dataType.hasDecimal) { v =>
       Decimal(v.getDecimal.getValue)
     } else if (dataType.hasCalendarInterval) { v =>
@@ -354,7 +354,7 @@ object LiteralValueProtoConverter {
       builder.result()
     }
 
-    makeArrayData(getConverter(array.getElementType))
+    makeArrayData(getScalaConverter(array.getElementType))
   }
 
   def toCatalystMap(map: proto.Expression.Literal.Map): mutable.Map[_, _] = {
@@ -373,7 +373,7 @@ object LiteralValueProtoConverter {
       builder
     }
 
-    makeMapData(getConverter(map.getKeyType), getConverter(map.getValueType))
+    makeMapData(getScalaConverter(map.getKeyType), 
getScalaConverter(map.getValueType))
   }
 
   def toCatalystStruct(struct: proto.Expression.Literal.Struct): Any = {
@@ -392,7 +392,7 @@ object LiteralValueProtoConverter {
     val structData = elements
       .zip(dataTypes)
       .map { case (element, dataType) =>
-        getConverter(dataType)(element)
+        getScalaConverter(dataType)(element)
       }
       .asInstanceOf[scala.collection.Seq[Object]]
       .toSeq
diff --git 
a/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
 
b/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
index 6d854da250fc..508128bec26d 100644
--- 
a/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
+++ 
b/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
@@ -1,2 +1,2 @@
-Project [id#0L, id#0L, 1 AS 1#0, null AS NULL#0, true AS true#0, 68 AS 68#0, 
9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 
7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 
89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS 
connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 
0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS 
X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, [8,6] 
AS ARRAY(8, 6)#0, null A [...]
+Project [id#0L, id#0L, 1 AS 1#0, null AS NULL#0, true AS true#0, 68 AS 68#0, 
9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 
7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 
89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS 
connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 
0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS 
X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, [8,6] 
AS ARRAY(8, 6)#0, null A [...]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
 
b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
index e56b6e1f3ee0..80b95e4664c1 100644
--- 
a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
+++ 
b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
@@ -652,6 +652,114 @@
           }
         }
       }
+    }, {
+      "literal": {
+        "struct": {
+          "structType": {
+            "struct": {
+              "fields": [{
+                "name": "_1",
+                "dataType": {
+                  "date": {
+                  }
+                },
+                "nullable": true
+              }, {
+                "name": "_2",
+                "dataType": {
+                  "timestamp": {
+                  }
+                },
+                "nullable": true
+              }, {
+                "name": "_3",
+                "dataType": {
+                  "timestamp": {
+                  }
+                },
+                "nullable": true
+              }, {
+                "name": "_4",
+                "dataType": {
+                  "timestampNtz": {
+                  }
+                },
+                "nullable": true
+              }, {
+                "name": "_5",
+                "dataType": {
+                  "date": {
+                  }
+                },
+                "nullable": true
+              }, {
+                "name": "_6",
+                "dataType": {
+                  "dayTimeInterval": {
+                    "startField": 0,
+                    "endField": 3
+                  }
+                },
+                "nullable": true
+              }, {
+                "name": "_7",
+                "dataType": {
+                  "yearMonthInterval": {
+                    "startField": 0,
+                    "endField": 1
+                  }
+                },
+                "nullable": true
+              }, {
+                "name": "_8",
+                "dataType": {
+                  "calendarInterval": {
+                  }
+                },
+                "nullable": true
+              }]
+            }
+          },
+          "elements": [{
+            "date": 18545
+          }, {
+            "timestamp": "1677155519808000"
+          }, {
+            "timestamp": "12345000"
+          }, {
+            "timestampNtz": "1677184560000000"
+          }, {
+            "date": 19411
+          }, {
+            "dayTimeInterval": "200000000"
+          }, {
+            "yearMonthInterval": 0
+          }, {
+            "calendarInterval": {
+              "months": 2,
+              "days": 20,
+              "microseconds": "100"
+            }
+          }]
+        }
+      },
+      "common": {
+        "origin": {
+          "jvmOrigin": {
+            "stackTrace": [{
+              "classLoaderName": "app",
+              "declaringClass": "org.apache.spark.sql.functions$",
+              "methodName": "typedLit",
+              "fileName": "functions.scala"
+            }, {
+              "classLoaderName": "app",
+              "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite",
+              "methodName": "~~trimmed~anonfun~~",
+              "fileName": "PlanGenerationTestSuite.scala"
+            }]
+          }
+        }
+      }
     }, {
       "literal": {
         "integer": 1
diff --git 
a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin
 
b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin
index 38a6ce630056..6aa367df3227 100644
Binary files 
a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin
 and 
b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin
 differ


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to