This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-4.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push: new 6a13ca52404d [SPARK-53524][CONNECT][SQL][4.0] Fix temporal value conversion in LiteralValueProtoConverter 6a13ca52404d is described below commit 6a13ca52404d1e4e893d8f2da66919678a77f846 Author: Yihong He <heyihong...@gmail.com> AuthorDate: Mon Sep 15 09:49:23 2025 +0800 [SPARK-53524][CONNECT][SQL][4.0] Fix temporal value conversion in LiteralValueProtoConverter ### What changes were proposed in this pull request? This PR fixes temporal value conversion issues in the `LiteralValueProtoConverter` for Spark Connect. The main changes include: 1. **Fixed temporal value conversion in `getConverter` method**: Updated the conversion logic for temporal data types (DATE, TIMESTAMP, TIMESTAMP_NTZ, DAY_TIME_INTERVAL, YEAR_MONTH_INTERVAL, TIME) to use proper utility methods from `SparkDateTimeUtils` and `SparkIntervalUtils` instead of directly returning raw protobuf values. 2. **Added comprehensive test coverage**: Extended the `PlanGenerationTestSuite` with a new test case that includes a tuple containing all temporal types to ensure proper conversion and serialization. 3. **Updated test expectations**: Modified the expected explain output and query test files to reflect the corrected temporal value handling. ### Why are the changes needed? The struct type in typedlit doesn't work well with temporal values due to bugs in type conversions. For example, the code below fails: ```scala import org.apache.spark.sql.functions.typedlit spark.sql("select 1").select(typedlit((1, java.time.LocalDate.of(2020, 10, 10)))).collect() """ org.apache.spark.SparkIllegalArgumentException: The value (18545) of the type (java.lang.Integer) cannot be converted to the DATE type. org.apache.spark.sql.catalyst.CatalystTypeConverters$DateConverter$.toCatalystImpl(CatalystTypeConverters.scala:356) org.apache.spark.sql.catalyst.CatalystTypeConverters$DateConverter$.toCatalystImpl(CatalystTypeConverters.scala:347) org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:110) org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:271) org.apache.spark.sql.catalyst.CatalystTypeConverters$StructConverter.toCatalystImpl(CatalystTypeConverters.scala:251) org.apache.spark.sql.catalyst.CatalystTypeConverters$CatalystTypeConverter.toCatalyst(CatalystTypeConverters.scala:110) org.apache.spark.sql.catalyst.CatalystTypeConverters$.$anonfun$createToCatalystConverter$2(CatalystTypeConverters.scala:532) org.apache.spark.sql.connect.planner.LiteralExpressionProtoConverter$.toCatalystExpression(LiteralExpressionProtoConverter.scala:116) """ ``` ### Does this PR introduce _any_ user-facing change? **Yes.** This PR fixes temporal value conversion in LiteralValueProtoConverter, allowing the struct type in typedlit to work with temporal values. ### How was this patch tested? `build/sbt "connect-client-jvm/testOnly org.apache.spark.sql.PlanGenerationTestSuite"` `build/sbt "connect/testOnly org.apache.spark.sql.connect.ProtoToParsedPlanTestSuite"` ### Was this patch authored or co-authored using generative AI tooling? Generated-by: Cursor 1.5.11 Closes #52324 from heyihong/SPARK-53524-4.0. Lead-authored-by: Yihong He <heyihong...@gmail.com> Co-authored-by: Wenchen Fan <cloud0...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/PlanGenerationTestSuite.scala | 10 ++ .../common/LiteralValueProtoConverter.scala | 18 ++-- .../explain-results/function_typedLit.explain | 2 +- .../query-tests/queries/function_typedLit.json | 108 +++++++++++++++++++++ .../queries/function_typedLit.proto.bin | Bin 9242 -> 9617 bytes 5 files changed, 128 insertions(+), 10 deletions(-) diff --git a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala index a548ec7007db..4bdaecffde4b 100644 --- a/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala +++ b/sql/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala @@ -3390,6 +3390,16 @@ class PlanGenerationTestSuite fn.typedLit(java.time.Duration.ofSeconds(200L)), fn.typedLit(java.time.Period.ofDays(100)), fn.typedLit(new CalendarInterval(2, 20, 100L)), + fn.typedLit( + ( + java.time.LocalDate.of(2020, 10, 10), + java.time.Instant.ofEpochMilli(1677155519808L), + new java.sql.Timestamp(12345L), + java.time.LocalDateTime.of(2023, 2, 23, 20, 36), + java.sql.Date.valueOf("2023-02-23"), + java.time.Duration.ofSeconds(200L), + java.time.Period.ofDays(100), + new CalendarInterval(2, 20, 100L))), // Handle parameterized scala types e.g.: List, Seq and Map. fn.typedLit(Some(1)), diff --git a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala index 1f3496fa8984..e8522d7118c2 100644 --- a/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala +++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala @@ -296,7 +296,7 @@ object LiteralValueProtoConverter { } } - private def getConverter(dataType: proto.DataType): proto.Expression.Literal => Any = { + private def getScalaConverter(dataType: proto.DataType): proto.Expression.Literal => Any = { if (dataType.hasShort) { v => v.getShort.toShort } else if (dataType.hasInteger) { v => @@ -316,15 +316,15 @@ object LiteralValueProtoConverter { } else if (dataType.hasBinary) { v => v.getBinary.toByteArray } else if (dataType.hasDate) { v => - v.getDate + SparkDateTimeUtils.toJavaDate(v.getDate) } else if (dataType.hasTimestamp) { v => - v.getTimestamp + SparkDateTimeUtils.toJavaTimestamp(v.getTimestamp) } else if (dataType.hasTimestampNtz) { v => - v.getTimestampNtz + SparkDateTimeUtils.microsToLocalDateTime(v.getTimestampNtz) } else if (dataType.hasDayTimeInterval) { v => - v.getDayTimeInterval + SparkIntervalUtils.microsToDuration(v.getDayTimeInterval) } else if (dataType.hasYearMonthInterval) { v => - v.getYearMonthInterval + SparkIntervalUtils.monthsToPeriod(v.getYearMonthInterval) } else if (dataType.hasDecimal) { v => Decimal(v.getDecimal.getValue) } else if (dataType.hasCalendarInterval) { v => @@ -354,7 +354,7 @@ object LiteralValueProtoConverter { builder.result() } - makeArrayData(getConverter(array.getElementType)) + makeArrayData(getScalaConverter(array.getElementType)) } def toCatalystMap(map: proto.Expression.Literal.Map): mutable.Map[_, _] = { @@ -373,7 +373,7 @@ object LiteralValueProtoConverter { builder } - makeMapData(getConverter(map.getKeyType), getConverter(map.getValueType)) + makeMapData(getScalaConverter(map.getKeyType), getScalaConverter(map.getValueType)) } def toCatalystStruct(struct: proto.Expression.Literal.Struct): Any = { @@ -392,7 +392,7 @@ object LiteralValueProtoConverter { val structData = elements .zip(dataTypes) .map { case (element, dataType) => - getConverter(dataType)(element) + getScalaConverter(dataType)(element) } .asInstanceOf[scala.collection.Seq[Object]] .toSeq diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain index 6d854da250fc..508128bec26d 100644 --- a/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain +++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain @@ -1,2 +1,2 @@ -Project [id#0L, id#0L, 1 AS 1#0, null AS NULL#0, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, [8,6] AS ARRAY(8, 6)#0, null A [...] +Project [id#0L, id#0L, 1 AS 1#0, null AS NULL#0, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, [8,6] AS ARRAY(8, 6)#0, null A [...] +- LocalRelation <empty>, [id#0L, a#0, b#0] diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json index e56b6e1f3ee0..80b95e4664c1 100644 --- a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json +++ b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json @@ -652,6 +652,114 @@ } } } + }, { + "literal": { + "struct": { + "structType": { + "struct": { + "fields": [{ + "name": "_1", + "dataType": { + "date": { + } + }, + "nullable": true + }, { + "name": "_2", + "dataType": { + "timestamp": { + } + }, + "nullable": true + }, { + "name": "_3", + "dataType": { + "timestamp": { + } + }, + "nullable": true + }, { + "name": "_4", + "dataType": { + "timestampNtz": { + } + }, + "nullable": true + }, { + "name": "_5", + "dataType": { + "date": { + } + }, + "nullable": true + }, { + "name": "_6", + "dataType": { + "dayTimeInterval": { + "startField": 0, + "endField": 3 + } + }, + "nullable": true + }, { + "name": "_7", + "dataType": { + "yearMonthInterval": { + "startField": 0, + "endField": 1 + } + }, + "nullable": true + }, { + "name": "_8", + "dataType": { + "calendarInterval": { + } + }, + "nullable": true + }] + } + }, + "elements": [{ + "date": 18545 + }, { + "timestamp": "1677155519808000" + }, { + "timestamp": "12345000" + }, { + "timestampNtz": "1677184560000000" + }, { + "date": 19411 + }, { + "dayTimeInterval": "200000000" + }, { + "yearMonthInterval": 0 + }, { + "calendarInterval": { + "months": 2, + "days": 20, + "microseconds": "100" + } + }] + } + }, + "common": { + "origin": { + "jvmOrigin": { + "stackTrace": [{ + "classLoaderName": "app", + "declaringClass": "org.apache.spark.sql.functions$", + "methodName": "typedLit", + "fileName": "functions.scala" + }, { + "classLoaderName": "app", + "declaringClass": "org.apache.spark.sql.PlanGenerationTestSuite", + "methodName": "~~trimmed~anonfun~~", + "fileName": "PlanGenerationTestSuite.scala" + }] + } + } + } }, { "literal": { "integer": 1 diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin index 38a6ce630056..6aa367df3227 100644 Binary files a/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin differ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org