This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 76a423955a8b [SPARK-51456][SQL] Add the `to_time` function
76a423955a8b is described below

commit 76a423955a8badfd82fe8fa88c76fba96adf8236
Author: Max Gekk <max.g...@gmail.com>
AuthorDate: Tue Mar 18 10:15:05 2025 +0800

    [SPARK-51456][SQL] Add the `to_time` function
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to add new function `to_time()`. It casts a `STRING` 
input value to `TIME` using an optional formatting.
    
    #### Syntax
    ```
    to_time(expr[, fmt])
    ```
    #### Arguments
    - expr: A `STRING` expression representing a time.
    - fmt: An optional format STRING expression. If `fmt` is supplied, it must 
conform with the datetime patterns, see 
https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html . If `fmt` 
is not supplied, the function is a synonym for `cast(expr AS TIME)`.
    
    #### Returns
    A TIME(n) where n is always 6 in the proposed implementation.
    
    #### Examples
    ```sql
    > SELECT to_time('00:12:00');
     00:12:00
    
    > SELECT to_time('12.10.05', 'HH.mm.ss');
     12:10:05
    ```
    
    ### Why are the changes needed?
    1. To improve user experience with Spark SQL, and allow to construct values 
of the new data type `TIME` from strings.
    2. To simplify migration from other systems where `to_time` is supported. 
For instance:
       - Snowflake: 
https://docs.snowflake.com/en/sql-reference/functions/to_time
       - BigQuery: 
https://cloud.google.com/bigquery/docs/reference/standard-sql/time_functions#parse_time
       - MySQL: 
https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_str-to-date
    3. For consistency: Spark SQL has already `to_timestamp()` for `TIMESTAMP` 
and `to_date()` for `DATE`.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    By running the related test suites:
    ```
    $ build/sbt "test:testOnly *ExpressionInfoSuite"
    $ build/sbt "test:testOnly *TimeExpressionsSuite"
    $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z 
time.sql"
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #50287 from MaxGekk/to_time-2.
    
    Authored-by: Max Gekk <max.g...@gmail.com>
    Signed-off-by: Kent Yao <y...@apache.org>
---
 .../src/main/resources/error/error-conditions.json |   6 +
 .../spark/sql/catalyst/util/TimeFormatter.scala    |   6 +-
 .../sql/catalyst/analysis/FunctionRegistry.scala   |   1 +
 .../sql/catalyst/expressions/timeExpressions.scala | 122 +++++++++++++++++++++
 .../spark/sql/errors/QueryExecutionErrors.scala    |  13 +++
 .../expressions/TimeExpressionsSuite.scala         |  54 +++++++++
 .../sql-functions/sql-expression-schema.md         |   1 +
 .../sql-tests/analyzer-results/time.sql.out        |  40 +++++++
 .../src/test/resources/sql-tests/inputs/time.sql   |  10 ++
 .../test/resources/sql-tests/results/time.sql.out  |  48 ++++++++
 10 files changed, 299 insertions(+), 2 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-conditions.json 
b/common/utils/src/main/resources/error/error-conditions.json
index 2ae54ef5f305..4c0420552fbe 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -405,6 +405,12 @@
     ],
     "sqlState" : "22018"
   },
+  "CANNOT_PARSE_TIME" : {
+    "message" : [
+      "The input string <input> cannot be parsed to a TIME value because it 
does not match to the datetime format <format>."
+    ],
+    "sqlState" : "22010"
+  },
   "CANNOT_PARSE_TIMESTAMP" : {
     "message" : [
       "<message>. Use <func> to tolerate invalid input string and return NULL 
instead."
diff --git 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala
index 618aee85a9a1..46afbc8aca19 100644
--- 
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala
+++ 
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimeFormatter.scala
@@ -122,10 +122,12 @@ object TimeFormatter {
     getFormatter(Some(format), locale, isParsing)
   }
 
-  def apply(format: String, isParsing: Boolean): TimeFormatter = {
-    getFormatter(Some(format), defaultLocale, isParsing)
+  def apply(format: Option[String], isParsing: Boolean): TimeFormatter = {
+    getFormatter(format, defaultLocale, isParsing)
   }
 
+  def apply(format: String, isParsing: Boolean): TimeFormatter = 
apply(Some(format), isParsing)
+
   def apply(format: String): TimeFormatter = {
     getFormatter(Some(format), defaultLocale, isParsing = false)
   }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 597ac57a10cc..b3539b2c85c9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -650,6 +650,7 @@ object FunctionRegistry {
     expression[Second]("second"),
     expression[ParseToTimestamp]("to_timestamp"),
     expression[ParseToDate]("to_date"),
+    expression[ToTime]("to_time"),
     expression[ToBinary]("to_binary"),
     expression[ToUnixTimestamp]("to_unix_timestamp"),
     expression[ToUTCTimestamp]("to_utc_timestamp"),
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala
new file mode 100644
index 000000000000..d66b7fc4ec62
--- /dev/null
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.time.DateTimeException
+
+import org.apache.spark.sql.catalyst.expressions.objects.Invoke
+import org.apache.spark.sql.catalyst.util.TimeFormatter
+import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
+import org.apache.spark.sql.types.{AbstractDataType, ObjectType, TimeType}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * Parses a column to a time based on the given format.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(str[, format]) - Parses the `str` expression with the `format` 
expression to a time.
+    If `format` is malformed or its application does not result in a well 
formed time, the function
+    raises an error. By default, it follows casting rules to a time if the 
`format` is omitted.
+  """,
+  arguments = """
+    Arguments:
+      * str - A string to be parsed to time.
+      * format - Time format pattern to follow. See <a 
href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html";>Datetime
 Patterns</a> for valid
+                 time format patterns.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('00:12:00');
+       00:12:00
+      > SELECT _FUNC_('12.10.05', 'HH.mm.ss');
+       12:10:05
+  """,
+  group = "datetime_funcs",
+  since = "4.1.0")
+// scalastyle:on line.size.limit
+case class ToTime(str: Expression, format: Option[Expression])
+  extends RuntimeReplaceable with ExpectsInputTypes {
+
+  def this(str: Expression, format: Expression) = this(str, Option(format))
+  def this(str: Expression) = this(str, None)
+
+  private def invokeParser(
+      fmt: Option[String] = None,
+      arguments: Seq[Expression] = children): Expression = {
+    Invoke(
+      targetObject = Literal.create(ToTimeParser(fmt), 
ObjectType(classOf[ToTimeParser])),
+      functionName = "parse",
+      dataType = TimeType(),
+      arguments = arguments,
+      methodInputTypes = arguments.map(_.dataType))
+  }
+
+  override lazy val replacement: Expression = format match {
+    case None => invokeParser()
+    case Some(expr) if expr.foldable =>
+      Option(expr.eval())
+        .map(f => invokeParser(Some(f.toString), Seq(str)))
+        .getOrElse(Literal(null, expr.dataType))
+    case _ => invokeParser()
+  }
+
+  override def inputTypes: Seq[AbstractDataType] = {
+    Seq(StringTypeWithCollation(supportsTrimCollation = true)) ++
+      format.map(_ => StringTypeWithCollation(supportsTrimCollation = true))
+  }
+
+  override def prettyName: String = "to_time"
+
+  override def children: Seq[Expression] = str +: format.toSeq
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): Expression = {
+    if (format.isDefined) {
+      copy(str = newChildren.head, format = Some(newChildren.last))
+    } else {
+      copy(str = newChildren.head)
+    }
+  }
+}
+
+case class ToTimeParser(fmt: Option[String]) {
+  private lazy val formatter = TimeFormatter(fmt, isParsing = true)
+
+  def this() = this(None)
+
+  private def withErrorCondition(input: => UTF8String, fmt: => Option[String])
+      (f: => Long): Long = {
+    try f
+    catch {
+      case e: DateTimeException =>
+        throw QueryExecutionErrors.timeParseError(input.toString, fmt, e)
+    }
+  }
+
+  def parse(s: UTF8String): Long = withErrorCondition(s, 
fmt)(formatter.parse(s.toString))
+
+  def parse(s: UTF8String, fmt: UTF8String): Long = {
+    val format = fmt.toString
+    withErrorCondition(s, Some(format)) {
+      TimeFormatter(format, isParsing = true).parse(s.toString)
+    }
+  }
+}
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 6777bebbbf2b..d0ece5baff43 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -276,6 +276,19 @@ private[sql] object QueryExecutionErrors extends 
QueryErrorsBase with ExecutionE
       summary = "")
   }
 
+  def timeParseError(input: String, fmt: Option[String], e: Throwable): 
SparkDateTimeException = {
+    new SparkDateTimeException(
+      errorClass = "CANNOT_PARSE_TIME",
+      messageParameters = Map(
+        "input" -> toSQLValue(input, StringType),
+        "format" -> toSQLValue(
+          fmt.getOrElse("HH:mm:ss.SSSSSS"),
+          StringType)),
+      context = Array.empty,
+      summary = "",
+      cause = Some(e))
+  }
+
   def ansiDateTimeArgumentOutOfRange(e: Exception): SparkDateTimeException = {
     new SparkDateTimeException(
       errorClass = "DATETIME_FIELD_OUT_OF_BOUNDS",
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala
new file mode 100644
index 000000000000..422e9cbcda03
--- /dev/null
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeExpressionsSuite.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.{SparkDateTimeException, SparkFunSuite}
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
+import org.apache.spark.sql.types.StringType
+
+class TimeExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+  test("ParseToTime") {
+    checkEvaluation(new ToTime(Literal("00:00:00"), Literal.create(null)), 
null)
+    checkEvaluation(new ToTime(Literal("00:00:00"), NonFoldableLiteral(null, 
StringType)), null)
+    checkEvaluation(new ToTime(Literal(null, StringType), 
Literal("HH:mm:ss")), null)
+
+    checkEvaluation(new ToTime(Literal("00:00:00")), localTime())
+    checkEvaluation(new ToTime(Literal("23-59-00.000999"), 
Literal("HH-mm-ss.SSSSSS")),
+      localTime(23, 59, 0, 999))
+    checkEvaluation(
+      new ToTime(Literal("12.00.59.90909"), 
NonFoldableLiteral("HH.mm.ss.SSSSS")),
+      localTime(12, 0, 59, 909090))
+    checkEvaluation(
+      new ToTime(NonFoldableLiteral(" 12:00.909 "), Literal(" HH:mm.SSS ")),
+      localTime(12, 0, 0, 909000))
+    checkEvaluation(
+      new ToTime(
+        NonFoldableLiteral("12 hours 123 millis"),
+        NonFoldableLiteral("HH 'hours' SSS 'millis'")),
+      localTime(12, 0, 0, 123000))
+
+    checkErrorInExpression[SparkDateTimeException](
+      expression = new ToTime(Literal("100:50")),
+      condition = "CANNOT_PARSE_TIME",
+      parameters = Map("input" -> "'100:50'", "format" -> "'HH:mm:ss.SSSSSS'"))
+    checkErrorInExpression[SparkDateTimeException](
+      expression = new ToTime(Literal("100:50"), Literal("mm:HH")),
+      condition = "CANNOT_PARSE_TIME",
+      parameters = Map("input" -> "'100:50'", "format" -> "'mm:HH'"))
+  }
+}
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md 
b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index da081bfdaee4..0db551bb286f 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -345,6 +345,7 @@
 | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT 
degrees(3.141592653589793) | struct<DEGREES(3.141592653589793):double> |
 | org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT 
to_number('454', '999') | struct<to_number(454, 999):decimal(3,0)> |
 | org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT 
radians(180) | struct<RADIANS(180):double> |
+| org.apache.spark.sql.catalyst.expressions.ToTime | to_time | SELECT 
to_time('00:12:00') | struct<to_time(00:12:00):time(6)> |
 | org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp | to_utc_timestamp 
| SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul') | 
struct<to_utc_timestamp(2016-08-31, Asia/Seoul):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp | 
to_unix_timestamp | SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd') | 
struct<to_unix_timestamp(2016-04-08, yyyy-MM-dd):bigint> |
 | org.apache.spark.sql.catalyst.expressions.TransformKeys | transform_keys | 
SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) 
-> k + 1) | struct<transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 
3)), lambdafunction((namedlambdavariable() + 1), namedlambdavariable(), 
namedlambdavariable())):map<int,int>> |
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/time.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/time.sql.out
index 762958979288..02608683007e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/time.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/time.sql.out
@@ -1,6 +1,46 @@
 -- Automatically generated by SQLQueryTestSuite
+-- !query
+create temporary view time_view as select '11:53:26.038344' time_str, 
'HH:mm:ss.SSSSSS' fmt_str
+-- !query analysis
+CreateViewCommand `time_view`, select '11:53:26.038344' time_str, 
'HH:mm:ss.SSSSSS' fmt_str, false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [11:53:26.038344 AS time_str#x, HH:mm:ss.SSSSSS AS fmt_str#x]
+      +- OneRowRelation
+
+
 -- !query
 select time '16:39:45\t'
 -- !query analysis
 Project [59985000000 AS 59985000000#x]
 +- OneRowRelation
+
+
+-- !query
+select to_time(null), to_time('01:02:03'), to_time('23-59-59.999999', 
'HH-mm-ss.SSSSSS')
+-- !query analysis
+Project [to_time(null, None) AS to_time(NULL)#x, to_time(01:02:03, None) AS 
to_time(01:02:03)#x, to_time(23-59-59.999999, Some(HH-mm-ss.SSSSSS)) AS 
to_time(23-59-59.999999, HH-mm-ss.SSSSSS)#x]
++- OneRowRelation
+
+
+-- !query
+select to_time(time_str, fmt_str) from time_view
+-- !query analysis
+Project [to_time(time_str#x, Some(fmt_str#x)) AS to_time(time_str, fmt_str)#x]
++- SubqueryAlias time_view
+   +- View (`time_view`, [time_str#x, fmt_str#x])
+      +- Project [cast(time_str#x as string) AS time_str#x, cast(fmt_str#x as 
string) AS fmt_str#x]
+         +- Project [11:53:26.038344 AS time_str#x, HH:mm:ss.SSSSSS AS 
fmt_str#x]
+            +- OneRowRelation
+
+
+-- !query
+select to_time("11", "HH")
+-- !query analysis
+Project [to_time(11, Some(HH)) AS to_time(11, HH)#x]
++- OneRowRelation
+
+
+-- !query
+select to_time("13-60", "HH-mm")
+-- !query analysis
+Project [to_time(13-60, Some(HH-mm)) AS to_time(13-60, HH-mm)#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/inputs/time.sql 
b/sql/core/src/test/resources/sql-tests/inputs/time.sql
index 00488914c91a..61690221ab9c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/time.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/time.sql
@@ -1,3 +1,13 @@
 -- time literals, functions and operations
 
+create temporary view time_view as select '11:53:26.038344' time_str, 
'HH:mm:ss.SSSSSS' fmt_str;
+
 select time '16:39:45\t';
+
+select to_time(null), to_time('01:02:03'), to_time('23-59-59.999999', 
'HH-mm-ss.SSSSSS');
+select to_time(time_str, fmt_str) from time_view;
+
+-- missing fields in `to_time`
+select to_time("11", "HH");
+-- invalid: there is no 13 hours
+select to_time("13-60", "HH-mm");
diff --git a/sql/core/src/test/resources/sql-tests/results/time.sql.out 
b/sql/core/src/test/resources/sql-tests/results/time.sql.out
index 728bf6a2e402..d21665aeeed8 100644
--- a/sql/core/src/test/resources/sql-tests/results/time.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/time.sql.out
@@ -1,7 +1,55 @@
 -- Automatically generated by SQLQueryTestSuite
+-- !query
+create temporary view time_view as select '11:53:26.038344' time_str, 
'HH:mm:ss.SSSSSS' fmt_str
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 select time '16:39:45\t'
 -- !query schema
 struct<59985000000:time(6)>
 -- !query output
 16:39:45
+
+
+-- !query
+select to_time(null), to_time('01:02:03'), to_time('23-59-59.999999', 
'HH-mm-ss.SSSSSS')
+-- !query schema
+struct<to_time(NULL):time(6),to_time(01:02:03):time(6),to_time(23-59-59.999999,
 HH-mm-ss.SSSSSS):time(6)>
+-- !query output
+NULL   01:02:03        23:59:59.999999
+
+
+-- !query
+select to_time(time_str, fmt_str) from time_view
+-- !query schema
+struct<to_time(time_str, fmt_str):time(6)>
+-- !query output
+11:53:26.038344
+
+
+-- !query
+select to_time("11", "HH")
+-- !query schema
+struct<to_time(11, HH):time(6)>
+-- !query output
+11:00:00
+
+
+-- !query
+select to_time("13-60", "HH-mm")
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIME",
+  "sqlState" : "22010",
+  "messageParameters" : {
+    "format" : "'HH-mm'",
+    "input" : "'13-60'"
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to