This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 76a423955a8b [SPARK-51456][SQL] Add the `to_time` function 76a423955a8b is described below commit 76a423955a8badfd82fe8fa88c76fba96adf8236 Author: Max Gekk <max.g...@gmail.com> AuthorDate: Tue Mar 18 10:15:05 2025 +0800 [SPARK-51456][SQL] Add the `to_time` function ### What changes were proposed in this pull request? In the PR, I propose to add new function `to_time()`. It casts a `STRING` input value to `TIME` using an optional formatting. #### Syntax ``` to_time(expr[, fmt]) ``` #### Arguments - expr: A `STRING` expression representing a time. - fmt: An optional format STRING expression. If `fmt` is supplied, it must conform with the datetime patterns, see https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html . If `fmt` is not supplied, the function is a synonym for `cast(expr AS TIME)`. #### Returns A TIME(n) where n is always 6 in the proposed implementation. #### Examples ```sql > SELECT to_time('00:12:00'); 00:12:00 > SELECT to_time('12.10.05', 'HH.mm.ss'); 12:10:05 ``` ### Why are the changes needed? 1. To improve user experience with Spark SQL, and allow to construct values of the new data type `TIME` from strings. 2. To simplify migration from other systems where `to_time` is supported. For instance: - Snowflake: https://docs.snowflake.com/en/sql-reference/functions/to_time - BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/time_functions#parse_time - MySQL: https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_str-to-date 3. For consistency: Spark SQL has already `to_timestamp()` for `TIMESTAMP` and `to_date()` for `DATE`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the related test suites: ``` $ build/sbt "test:testOnly *ExpressionInfoSuite" $ build/sbt "test:testOnly *TimeExpressionsSuite" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z time.sql" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50287 from MaxGekk/to_time-2. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Kent Yao <y...@apache.org> --- .../src/main/resources/error/error-conditions.json | 6 + .../spark/sql/catalyst/util/TimeFormatter.scala | 6 +- .../sql/catalyst/analysis/FunctionRegistry.scala | 1 + .../sql/catalyst/expressions/timeExpressions.scala | 122 +++++++++++++++++++++ .../spark/sql/errors/QueryExecutionErrors.scala | 13 +++ .../expressions/TimeExpressionsSuite.scala | 54 +++++++++ .../sql-functions/sql-expression-schema.md | 1 + .../sql-tests/analyzer-results/time.sql.out | 40 +++++++ .../src/test/resources/sql-tests/inputs/time.sql | 10 ++ .../test/resources/sql-tests/results/time.sql.out | 48 ++++++++ 10 files changed, 299 insertions(+), 2 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 2ae54ef5f305..4c0420552fbe 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -405,6 +405,12 @@ ], "sqlState" : "22018" }, + "CANNOT_PARSE_TIME" : { + "message" : [ + "The input string <input> cannot be parsed to a TIME value because it does not match to the datetime format <format>." + ], + "sqlState" : "22010" + }, "CANNOT_PARSE_TIMESTAMP" : { "message" : [ "<message>. Use <func> to tolerate invalid input string and return NULL instead." diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala index 618aee85a9a1..46afbc8aca19 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala @@ -122,10 +122,12 @@ object TimeFormatter { getFormatter(Some(format), locale, isParsing) } - def apply(format: String, isParsing: Boolean): TimeFormatter = { - getFormatter(Some(format), defaultLocale, isParsing) + def apply(format: Option[String], isParsing: Boolean): TimeFormatter = { + getFormatter(format, defaultLocale, isParsing) } + def apply(format: String, isParsing: Boolean): TimeFormatter = apply(Some(format), isParsing) + def apply(format: String): TimeFormatter = { getFormatter(Some(format), defaultLocale, isParsing = false) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 597ac57a10cc..b3539b2c85c9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -650,6 +650,7 @@ object FunctionRegistry { expression[Second]("second"), expression[ParseToTimestamp]("to_timestamp"), expression[ParseToDate]("to_date"), + expression[ToTime]("to_time"), expression[ToBinary]("to_binary"), expression[ToUnixTimestamp]("to_unix_timestamp"), expression[ToUTCTimestamp]("to_utc_timestamp"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala new file mode 100644 index 000000000000..d66b7fc4ec62 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import java.time.DateTimeException + +import org.apache.spark.sql.catalyst.expressions.objects.Invoke +import org.apache.spark.sql.catalyst.util.TimeFormatter +import org.apache.spark.sql.errors.QueryExecutionErrors +import org.apache.spark.sql.internal.types.StringTypeWithCollation +import org.apache.spark.sql.types.{AbstractDataType, ObjectType, TimeType} +import org.apache.spark.unsafe.types.UTF8String + +/** + * Parses a column to a time based on the given format. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = """ + _FUNC_(str[, format]) - Parses the `str` expression with the `format` expression to a time. + If `format` is malformed or its application does not result in a well formed time, the function + raises an error. By default, it follows casting rules to a time if the `format` is omitted. + """, + arguments = """ + Arguments: + * str - A string to be parsed to time. + * format - Time format pattern to follow. See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a> for valid + time format patterns. + """, + examples = """ + Examples: + > SELECT _FUNC_('00:12:00'); + 00:12:00 + > SELECT _FUNC_('12.10.05', 'HH.mm.ss'); + 12:10:05 + """, + group = "datetime_funcs", + since = "4.1.0") +// scalastyle:on line.size.limit +case class ToTime(str: Expression, format: Option[Expression]) + extends RuntimeReplaceable with ExpectsInputTypes { + + def this(str: Expression, format: Expression) = this(str, Option(format)) + def this(str: Expression) = this(str, None) + + private def invokeParser( + fmt: Option[String] = None, + arguments: Seq[Expression] = children): Expression = { + Invoke( + targetObject = Literal.create(ToTimeParser(fmt), ObjectType(classOf[ToTimeParser])), + functionName = "parse", + dataType = TimeType(), + arguments = arguments, + methodInputTypes = arguments.map(_.dataType)) + } + + override lazy val replacement: Expression = format match { + case None => invokeParser() + case Some(expr) if expr.foldable => + Option(expr.eval()) + .map(f => invokeParser(Some(f.toString), Seq(str))) + .getOrElse(Literal(null, expr.dataType)) + case _ => invokeParser() + } + + override def inputTypes: Seq[AbstractDataType] = { + Seq(StringTypeWithCollation(supportsTrimCollation = true)) ++ + format.map(_ => StringTypeWithCollation(supportsTrimCollation = true)) + } + + override def prettyName: String = "to_time" + + override def children: Seq[Expression] = str +: format.toSeq + + override protected def withNewChildrenInternal( + newChildren: IndexedSeq[Expression]): Expression = { + if (format.isDefined) { + copy(str = newChildren.head, format = Some(newChildren.last)) + } else { + copy(str = newChildren.head) + } + } +} + +case class ToTimeParser(fmt: Option[String]) { + private lazy val formatter = TimeFormatter(fmt, isParsing = true) + + def this() = this(None) + + private def withErrorCondition(input: => UTF8String, fmt: => Option[String]) + (f: => Long): Long = { + try f + catch { + case e: DateTimeException => + throw QueryExecutionErrors.timeParseError(input.toString, fmt, e) + } + } + + def parse(s: UTF8String): Long = withErrorCondition(s, fmt)(formatter.parse(s.toString)) + + def parse(s: UTF8String, fmt: UTF8String): Long = { + val format = fmt.toString + withErrorCondition(s, Some(format)) { + TimeFormatter(format, isParsing = true).parse(s.toString) + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 6777bebbbf2b..d0ece5baff43 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -276,6 +276,19 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE summary = "") } + def timeParseError(input: String, fmt: Option[String], e: Throwable): SparkDateTimeException = { + new SparkDateTimeException( + errorClass = "CANNOT_PARSE_TIME", + messageParameters = Map( + "input" -> toSQLValue(input, StringType), + "format" -> toSQLValue( + fmt.getOrElse("HH:mm:ss.SSSSSS"), + StringType)), + context = Array.empty, + summary = "", + cause = Some(e)) + } + def ansiDateTimeArgumentOutOfRange(e: Exception): SparkDateTimeException = { new SparkDateTimeException( errorClass = "DATETIME_FIELD_OUT_OF_BOUNDS", diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala new file mode 100644 index 000000000000..422e9cbcda03 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.{SparkDateTimeException, SparkFunSuite} +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ +import org.apache.spark.sql.types.StringType + +class TimeExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { + test("ParseToTime") { + checkEvaluation(new ToTime(Literal("00:00:00"), Literal.create(null)), null) + checkEvaluation(new ToTime(Literal("00:00:00"), NonFoldableLiteral(null, StringType)), null) + checkEvaluation(new ToTime(Literal(null, StringType), Literal("HH:mm:ss")), null) + + checkEvaluation(new ToTime(Literal("00:00:00")), localTime()) + checkEvaluation(new ToTime(Literal("23-59-00.000999"), Literal("HH-mm-ss.SSSSSS")), + localTime(23, 59, 0, 999)) + checkEvaluation( + new ToTime(Literal("12.00.59.90909"), NonFoldableLiteral("HH.mm.ss.SSSSS")), + localTime(12, 0, 59, 909090)) + checkEvaluation( + new ToTime(NonFoldableLiteral(" 12:00.909 "), Literal(" HH:mm.SSS ")), + localTime(12, 0, 0, 909000)) + checkEvaluation( + new ToTime( + NonFoldableLiteral("12 hours 123 millis"), + NonFoldableLiteral("HH 'hours' SSS 'millis'")), + localTime(12, 0, 0, 123000)) + + checkErrorInExpression[SparkDateTimeException]( + expression = new ToTime(Literal("100:50")), + condition = "CANNOT_PARSE_TIME", + parameters = Map("input" -> "'100:50'", "format" -> "'HH:mm:ss.SSSSSS'")) + checkErrorInExpression[SparkDateTimeException]( + expression = new ToTime(Literal("100:50"), Literal("mm:HH")), + condition = "CANNOT_PARSE_TIME", + parameters = Map("input" -> "'100:50'", "format" -> "'mm:HH'")) + } +} diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index da081bfdaee4..0db551bb286f 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -345,6 +345,7 @@ | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct<DEGREES(3.141592653589793):double> | | org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct<to_number(454, 999):decimal(3,0)> | | org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT radians(180) | struct<RADIANS(180):double> | +| org.apache.spark.sql.catalyst.expressions.ToTime | to_time | SELECT to_time('00:12:00') | struct<to_time(00:12:00):time(6)> | | org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp | to_utc_timestamp | SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') | struct<to_utc_timestamp(2016-08-31, Asia/Seoul):timestamp> | | org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp | to_unix_timestamp | SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') | struct<to_unix_timestamp(2016-04-08, yyyy-MM-dd):bigint> | | org.apache.spark.sql.catalyst.expressions.TransformKeys | transform_keys | SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1) | struct<transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), lambdafunction((namedlambdavariable() + 1), namedlambdavariable(), namedlambdavariable())):map<int,int>> | diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/time.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/time.sql.out index 762958979288..02608683007e 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/time.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/time.sql.out @@ -1,6 +1,46 @@ -- Automatically generated by SQLQueryTestSuite +-- !query +create temporary view time_view as select '11:53:26.038344' time_str, 'HH:mm:ss.SSSSSS' fmt_str +-- !query analysis +CreateViewCommand `time_view`, select '11:53:26.038344' time_str, 'HH:mm:ss.SSSSSS' fmt_str, false, false, LocalTempView, UNSUPPORTED, true + +- Project [11:53:26.038344 AS time_str#x, HH:mm:ss.SSSSSS AS fmt_str#x] + +- OneRowRelation + + -- !query select time '16:39:45\t' -- !query analysis Project [59985000000 AS 59985000000#x] +- OneRowRelation + + +-- !query +select to_time(null), to_time('01:02:03'), to_time('23-59-59.999999', 'HH-mm-ss.SSSSSS') +-- !query analysis +Project [to_time(null, None) AS to_time(NULL)#x, to_time(01:02:03, None) AS to_time(01:02:03)#x, to_time(23-59-59.999999, Some(HH-mm-ss.SSSSSS)) AS to_time(23-59-59.999999, HH-mm-ss.SSSSSS)#x] ++- OneRowRelation + + +-- !query +select to_time(time_str, fmt_str) from time_view +-- !query analysis +Project [to_time(time_str#x, Some(fmt_str#x)) AS to_time(time_str, fmt_str)#x] ++- SubqueryAlias time_view + +- View (`time_view`, [time_str#x, fmt_str#x]) + +- Project [cast(time_str#x as string) AS time_str#x, cast(fmt_str#x as string) AS fmt_str#x] + +- Project [11:53:26.038344 AS time_str#x, HH:mm:ss.SSSSSS AS fmt_str#x] + +- OneRowRelation + + +-- !query +select to_time("11", "HH") +-- !query analysis +Project [to_time(11, Some(HH)) AS to_time(11, HH)#x] ++- OneRowRelation + + +-- !query +select to_time("13-60", "HH-mm") +-- !query analysis +Project [to_time(13-60, Some(HH-mm)) AS to_time(13-60, HH-mm)#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/time.sql b/sql/core/src/test/resources/sql-tests/inputs/time.sql index 00488914c91a..61690221ab9c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/time.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/time.sql @@ -1,3 +1,13 @@ -- time literals, functions and operations +create temporary view time_view as select '11:53:26.038344' time_str, 'HH:mm:ss.SSSSSS' fmt_str; + select time '16:39:45\t'; + +select to_time(null), to_time('01:02:03'), to_time('23-59-59.999999', 'HH-mm-ss.SSSSSS'); +select to_time(time_str, fmt_str) from time_view; + +-- missing fields in `to_time` +select to_time("11", "HH"); +-- invalid: there is no 13 hours +select to_time("13-60", "HH-mm"); diff --git a/sql/core/src/test/resources/sql-tests/results/time.sql.out b/sql/core/src/test/resources/sql-tests/results/time.sql.out index 728bf6a2e402..d21665aeeed8 100644 --- a/sql/core/src/test/resources/sql-tests/results/time.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/time.sql.out @@ -1,7 +1,55 @@ -- Automatically generated by SQLQueryTestSuite +-- !query +create temporary view time_view as select '11:53:26.038344' time_str, 'HH:mm:ss.SSSSSS' fmt_str +-- !query schema +struct<> +-- !query output + + + -- !query select time '16:39:45\t' -- !query schema struct<59985000000:time(6)> -- !query output 16:39:45 + + +-- !query +select to_time(null), to_time('01:02:03'), to_time('23-59-59.999999', 'HH-mm-ss.SSSSSS') +-- !query schema +struct<to_time(NULL):time(6),to_time(01:02:03):time(6),to_time(23-59-59.999999, HH-mm-ss.SSSSSS):time(6)> +-- !query output +NULL 01:02:03 23:59:59.999999 + + +-- !query +select to_time(time_str, fmt_str) from time_view +-- !query schema +struct<to_time(time_str, fmt_str):time(6)> +-- !query output +11:53:26.038344 + + +-- !query +select to_time("11", "HH") +-- !query schema +struct<to_time(11, HH):time(6)> +-- !query output +11:00:00 + + +-- !query +select to_time("13-60", "HH-mm") +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkDateTimeException +{ + "errorClass" : "CANNOT_PARSE_TIME", + "sqlState" : "22010", + "messageParameters" : { + "format" : "'HH-mm'", + "input" : "'13-60'" + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org