(spark) branch master updated: [SPARK-52883][SPARK-52884][SQL] Implement the to_time and try_to_time functions in Scala

maxgekk Wed, 23 Jul 2025 07:19:01 -0700

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 03cb4d9d6874 [SPARK-52883][SPARK-52884][SQL] Implement the to_time and 
try_to_time functions in Scala
03cb4d9d6874 is described below

commit 03cb4d9d68746a714efcd335540c420da97bb9ed
Author: Uros Bojanic <uros.boja...@databricks.com>
AuthorDate: Wed Jul 23 16:17:27 2025 +0200

    [SPARK-52883][SPARK-52884][SQL] Implement the to_time and try_to_time 
functions in Scala
    
    ### What changes were proposed in this pull request?
    Implement the `to_time` and `try_to_time` functions in Scala API.
    
    ### Why are the changes needed?
    Expand API support for the `ToTime` and `TryToTime` expressions.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, the new functions are now available in Scala API.
    
    ### How was this patch tested?
    Added appropriate Scala functions tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #51575 from uros-db/scala-try_to_time.
    
    Authored-by: Uros Bojanic <uros.boja...@databricks.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 python/pyspark/sql/tests/test_functions.py         |   4 +-
 .../scala/org/apache/spark/sql/functions.scala     |  70 +++++++++
 .../apache/spark/sql/TimeFunctionsSuiteBase.scala  | 174 ++++++++++++++++++++-
 3 files changed, 246 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/tests/test_functions.py 
b/python/pyspark/sql/tests/test_functions.py
index 183a17728421..3a4dfd8f6e4d 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -83,7 +83,9 @@ class FunctionsTestsMixin:
         # Functions that we expect to be missing in python until they are 
added to pyspark
         expected_missing_in_py = set(
             # TODO(SPARK-52888): Implement the make_time function in Python
-            ["make_time"]
+            # TODO(SPARK-52890): Implement the to_time function in Python
+            # TODO(SPARK-52891): Implement the try_to_time function in Python
+            ["make_time", "to_time", "try_to_time"]
         )
 
         self.assertEqual(
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
index 2b41d0333534..e16554eaddbf 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
@@ -5699,6 +5699,41 @@ object functions {
   def unix_timestamp(s: Column, p: String): Column =
     Column.fn("unix_timestamp", s, lit(p))
 
+  /**
+   * Parses a string value to a time value.
+   *
+   * @param str
+   *   A string to be parsed to time.
+   * @return
+   *   A time, or raises an error if the input is malformed.
+   *
+   * @group datetime_funcs
+   * @since 4.1.0
+   */
+  def to_time(str: Column): Column = {
+    Column.fn("to_time", str)
+  }
+
+  /**
+   * Parses a string value to a time value.
+   *
+   * See <a 
href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html";> 
Datetime
+   * Patterns</a> for valid time format patterns.
+   *
+   * @param str
+   *   A string to be parsed to time.
+   * @param format
+   *   A time format pattern to follow.
+   * @return
+   *   A time, or raises an error if the input is malformed.
+   *
+   * @group datetime_funcs
+   * @since 4.1.0
+   */
+  def to_time(str: Column, format: Column): Column = {
+    Column.fn("to_time", str, format)
+  }
+
   /**
    * Converts to a timestamp by casting rules to `TimestampType`.
    *
@@ -5731,6 +5766,41 @@ object functions {
    */
   def to_timestamp(s: Column, fmt: String): Column = Column.fn("to_timestamp", 
s, lit(fmt))
 
+  /**
+   * Parses a string value to a time value.
+   *
+   * @param str
+   *   A string to be parsed to time.
+   * @return
+   *   A time, or null if the input is malformed.
+   *
+   * @group datetime_funcs
+   * @since 4.1.0
+   */
+  def try_to_time(str: Column): Column = {
+    Column.fn("try_to_time", str)
+  }
+
+  /**
+   * Parses a string value to a time value.
+   *
+   * See <a 
href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html";> 
Datetime
+   * Patterns</a> for valid time format patterns.
+   *
+   * @param str
+   *   A string to be parsed to time.
+   * @param format
+   *   A time format pattern to follow.
+   * @return
+   *   A time, or null if the input is malformed.
+   *
+   * @group datetime_funcs
+   * @since 4.1.0
+   */
+  def try_to_time(str: Column, format: Column): Column = {
+    Column.fn("try_to_time", str, format)
+  }
+
   /**
    * Parses the `s` with the `format` to a timestamp. The function always 
returns null on an
    * invalid input with`/`without ANSI SQL mode enabled. The result data type 
is consistent with
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala
index 4702b9d43498..7d7c4597ddfe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
 
 import java.time.LocalTime
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkDateTimeException}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -161,6 +161,178 @@ abstract class TimeFunctionsSuiteBase extends QueryTest 
with SharedSparkSession
     checkAnswer(result1, expected)
     checkAnswer(result2, expected)
   }
+
+  test("SPARK-52883: to_time function without format") {
+    // Input data for the function.
+    val schema = StructType(Seq(
+      StructField("str", StringType, nullable = false)
+    ))
+    val data = Seq(
+      Row("00:00:00"),
+      Row("01:02:03.4"),
+      Row("23:59:59.999999")
+    )
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(data), 
schema)
+
+    // Test the function using both `selectExpr` and `select`.
+    val result1 = df.selectExpr(
+      "to_time(str)"
+    )
+    val result2 = df.select(
+      to_time(col("str"))
+    )
+    // Check that both methods produce the same result.
+    checkAnswer(result1, result2)
+
+    // Expected output of the function.
+    val expected = Seq(
+      "00:00:00",
+      "01:02:03.4",
+      "23:59:59.999999"
+    ).toDF("timeString").select(col("timeString").cast("time"))
+    // Check that the results match the expected output.
+    checkAnswer(result1, expected)
+    checkAnswer(result2, expected)
+
+    // Error is thrown for malformed input.
+    val invalidTimeDF = Seq("invalid_time").toDF("str")
+    checkError(
+      exception = intercept[SparkDateTimeException] {
+        invalidTimeDF.select(to_time(col("str"))).collect()
+      },
+      condition = "CANNOT_PARSE_TIME",
+      parameters = Map("input" -> "'invalid_time'", "format" -> 
"'HH:mm:ss.SSSSSS'")
+    )
+  }
+
+  test("SPARK-52883: to_time function with format") {
+    // Input data for the function.
+    val schema = StructType(Seq(
+      StructField("str", StringType, nullable = false),
+      StructField("format", StringType, nullable = false)
+    ))
+    val data = Seq(
+      Row("00.00.00", "HH.mm.ss"),
+      Row("01.02.03.4", "HH.mm.ss.S"),
+      Row("23.59.59.999999", "HH.mm.ss.SSSSSS")
+    )
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(data), 
schema)
+
+    // Test the function using both `selectExpr` and `select`.
+    val result1 = df.selectExpr(
+      "to_time(str, format)"
+    )
+    val result2 = df.select(
+      to_time(col("str"), col("format"))
+    )
+    // Check that both methods produce the same result.
+    checkAnswer(result1, result2)
+
+    // Expected output of the function.
+    val expected = Seq(
+      "00:00:00",
+      "01:02:03.4",
+      "23:59:59.999999"
+    ).toDF("timeString").select(col("timeString").cast("time"))
+    // Check that the results match the expected output.
+    checkAnswer(result1, expected)
+    checkAnswer(result2, expected)
+
+    // Error is thrown for malformed input.
+    val invalidTimeDF = Seq(("invalid_time", "HH.mm.ss")).toDF("str", "format")
+    checkError(
+      exception = intercept[SparkDateTimeException] {
+        invalidTimeDF.select(to_time(col("str"), col("format"))).collect()
+      },
+      condition = "CANNOT_PARSE_TIME",
+      parameters = Map("input" -> "'invalid_time'", "format" -> "'HH.mm.ss'")
+    )
+  }
+
+  test("SPARK-52884: try_to_time function without format") {
+    // Input data for the function.
+    val schema = StructType(Seq(
+      StructField("str", StringType)
+    ))
+    val data = Seq(
+      Row("00:00:00"),
+      Row("01:02:03.4"),
+      Row("23:59:59.999999"),
+      Row("invalid_time"),
+      Row(null)
+    )
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(data), 
schema)
+
+    // Test the function using both `selectExpr` and `select`.
+    val result1 = df.selectExpr(
+      "try_to_time(str)"
+    )
+    val result2 = df.select(
+      try_to_time(col("str"))
+    )
+    // Check that both methods produce the same result.
+    checkAnswer(result1, result2)
+
+    // Expected output of the function.
+    val expected = Seq(
+      "00:00:00",
+      "01:02:03.4",
+      "23:59:59.999999",
+      null,
+      null
+    ).toDF("timeString").select(col("timeString").cast("time"))
+    // Check that the results match the expected output.
+    checkAnswer(result1, expected)
+    checkAnswer(result2, expected)
+  }
+
+  test("SPARK-52884: try_to_time function with format") {
+    // Input data for the function.
+    val schema = StructType(Seq(
+      StructField("str", StringType),
+      StructField("format", StringType)
+    ))
+    val data = Seq(
+      Row("00.00.00", "HH.mm.ss"),
+      Row("01.02.03.4", "HH.mm.ss.SSS"),
+      Row("23.59.59.999999", "HH.mm.ss.SSSSSS"),
+      Row("invalid_time", "HH.mm.ss"),
+      Row("00.00.00", "invalid_format"),
+      Row("invalid_time", "invalid_format"),
+      Row("00:00:00", "HH.mm.ss"),
+      Row("abc", "HH.mm.ss"),
+      Row("00:00:00", null),
+      Row(null, "HH.mm.ss")
+    )
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(data), 
schema)
+
+    // Test the function using both `selectExpr` and `select`.
+    val result1 = df.selectExpr(
+      "try_to_time(str, format)"
+    )
+    val result2 = df.select(
+      try_to_time(col("str"), col("format"))
+    )
+    // Check that both methods produce the same result.
+    checkAnswer(result1, result2)
+
+    // Expected output of the function.
+    val expected = Seq(
+      "00:00:00",
+      "01:02:03.4",
+      "23:59:59.999999",
+      null,
+      null,
+      null,
+      null,
+      null,
+      null,
+      null
+    ).toDF("timeString").select(col("timeString").cast("time"))
+    // Check that the results match the expected output.
+    checkAnswer(result1, expected)
+    checkAnswer(result2, expected)
+  }
 }
 
 // This class is used to run the same tests with ANSI mode enabled explicitly.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-52883][SPARK-52884][SQL] Implement the to_time and try_to_time functions in Scala

Reply via email to