This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 03cb4d9d6874 [SPARK-52883][SPARK-52884][SQL] Implement the to_time and try_to_time functions in Scala 03cb4d9d6874 is described below commit 03cb4d9d68746a714efcd335540c420da97bb9ed Author: Uros Bojanic <uros.boja...@databricks.com> AuthorDate: Wed Jul 23 16:17:27 2025 +0200 [SPARK-52883][SPARK-52884][SQL] Implement the to_time and try_to_time functions in Scala ### What changes were proposed in this pull request? Implement the `to_time` and `try_to_time` functions in Scala API. ### Why are the changes needed? Expand API support for the `ToTime` and `TryToTime` expressions. ### Does this PR introduce _any_ user-facing change? Yes, the new functions are now available in Scala API. ### How was this patch tested? Added appropriate Scala functions tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51575 from uros-db/scala-try_to_time. Authored-by: Uros Bojanic <uros.boja...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- python/pyspark/sql/tests/test_functions.py | 4 +- .../scala/org/apache/spark/sql/functions.scala | 70 +++++++++ .../apache/spark/sql/TimeFunctionsSuiteBase.scala | 174 ++++++++++++++++++++- 3 files changed, 246 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 183a17728421..3a4dfd8f6e4d 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -83,7 +83,9 @@ class FunctionsTestsMixin: # Functions that we expect to be missing in python until they are added to pyspark expected_missing_in_py = set( # TODO(SPARK-52888): Implement the make_time function in Python - ["make_time"] + # TODO(SPARK-52890): Implement the to_time function in Python + # TODO(SPARK-52891): Implement the try_to_time function in Python + ["make_time", "to_time", "try_to_time"] ) self.assertEqual( diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala index 2b41d0333534..e16554eaddbf 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala @@ -5699,6 +5699,41 @@ object functions { def unix_timestamp(s: Column, p: String): Column = Column.fn("unix_timestamp", s, lit(p)) + /** + * Parses a string value to a time value. + * + * @param str + * A string to be parsed to time. + * @return + * A time, or raises an error if the input is malformed. + * + * @group datetime_funcs + * @since 4.1.0 + */ + def to_time(str: Column): Column = { + Column.fn("to_time", str) + } + + /** + * Parses a string value to a time value. + * + * See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html"> Datetime + * Patterns</a> for valid time format patterns. + * + * @param str + * A string to be parsed to time. + * @param format + * A time format pattern to follow. + * @return + * A time, or raises an error if the input is malformed. + * + * @group datetime_funcs + * @since 4.1.0 + */ + def to_time(str: Column, format: Column): Column = { + Column.fn("to_time", str, format) + } + /** * Converts to a timestamp by casting rules to `TimestampType`. * @@ -5731,6 +5766,41 @@ object functions { */ def to_timestamp(s: Column, fmt: String): Column = Column.fn("to_timestamp", s, lit(fmt)) + /** + * Parses a string value to a time value. + * + * @param str + * A string to be parsed to time. + * @return + * A time, or null if the input is malformed. + * + * @group datetime_funcs + * @since 4.1.0 + */ + def try_to_time(str: Column): Column = { + Column.fn("try_to_time", str) + } + + /** + * Parses a string value to a time value. + * + * See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html"> Datetime + * Patterns</a> for valid time format patterns. + * + * @param str + * A string to be parsed to time. + * @param format + * A time format pattern to follow. + * @return + * A time, or null if the input is malformed. + * + * @group datetime_funcs + * @since 4.1.0 + */ + def try_to_time(str: Column, format: Column): Column = { + Column.fn("try_to_time", str, format) + } + /** * Parses the `s` with the `format` to a timestamp. The function always returns null on an * invalid input with`/`without ANSI SQL mode enabled. The result data type is consistent with diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala index 4702b9d43498..7d7c4597ddfe 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TimeFunctionsSuiteBase.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql import java.time.LocalTime -import org.apache.spark.SparkConf +import org.apache.spark.{SparkConf, SparkDateTimeException} import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -161,6 +161,178 @@ abstract class TimeFunctionsSuiteBase extends QueryTest with SharedSparkSession checkAnswer(result1, expected) checkAnswer(result2, expected) } + + test("SPARK-52883: to_time function without format") { + // Input data for the function. + val schema = StructType(Seq( + StructField("str", StringType, nullable = false) + )) + val data = Seq( + Row("00:00:00"), + Row("01:02:03.4"), + Row("23:59:59.999999") + ) + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) + + // Test the function using both `selectExpr` and `select`. + val result1 = df.selectExpr( + "to_time(str)" + ) + val result2 = df.select( + to_time(col("str")) + ) + // Check that both methods produce the same result. + checkAnswer(result1, result2) + + // Expected output of the function. + val expected = Seq( + "00:00:00", + "01:02:03.4", + "23:59:59.999999" + ).toDF("timeString").select(col("timeString").cast("time")) + // Check that the results match the expected output. + checkAnswer(result1, expected) + checkAnswer(result2, expected) + + // Error is thrown for malformed input. + val invalidTimeDF = Seq("invalid_time").toDF("str") + checkError( + exception = intercept[SparkDateTimeException] { + invalidTimeDF.select(to_time(col("str"))).collect() + }, + condition = "CANNOT_PARSE_TIME", + parameters = Map("input" -> "'invalid_time'", "format" -> "'HH:mm:ss.SSSSSS'") + ) + } + + test("SPARK-52883: to_time function with format") { + // Input data for the function. + val schema = StructType(Seq( + StructField("str", StringType, nullable = false), + StructField("format", StringType, nullable = false) + )) + val data = Seq( + Row("00.00.00", "HH.mm.ss"), + Row("01.02.03.4", "HH.mm.ss.S"), + Row("23.59.59.999999", "HH.mm.ss.SSSSSS") + ) + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) + + // Test the function using both `selectExpr` and `select`. + val result1 = df.selectExpr( + "to_time(str, format)" + ) + val result2 = df.select( + to_time(col("str"), col("format")) + ) + // Check that both methods produce the same result. + checkAnswer(result1, result2) + + // Expected output of the function. + val expected = Seq( + "00:00:00", + "01:02:03.4", + "23:59:59.999999" + ).toDF("timeString").select(col("timeString").cast("time")) + // Check that the results match the expected output. + checkAnswer(result1, expected) + checkAnswer(result2, expected) + + // Error is thrown for malformed input. + val invalidTimeDF = Seq(("invalid_time", "HH.mm.ss")).toDF("str", "format") + checkError( + exception = intercept[SparkDateTimeException] { + invalidTimeDF.select(to_time(col("str"), col("format"))).collect() + }, + condition = "CANNOT_PARSE_TIME", + parameters = Map("input" -> "'invalid_time'", "format" -> "'HH.mm.ss'") + ) + } + + test("SPARK-52884: try_to_time function without format") { + // Input data for the function. + val schema = StructType(Seq( + StructField("str", StringType) + )) + val data = Seq( + Row("00:00:00"), + Row("01:02:03.4"), + Row("23:59:59.999999"), + Row("invalid_time"), + Row(null) + ) + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) + + // Test the function using both `selectExpr` and `select`. + val result1 = df.selectExpr( + "try_to_time(str)" + ) + val result2 = df.select( + try_to_time(col("str")) + ) + // Check that both methods produce the same result. + checkAnswer(result1, result2) + + // Expected output of the function. + val expected = Seq( + "00:00:00", + "01:02:03.4", + "23:59:59.999999", + null, + null + ).toDF("timeString").select(col("timeString").cast("time")) + // Check that the results match the expected output. + checkAnswer(result1, expected) + checkAnswer(result2, expected) + } + + test("SPARK-52884: try_to_time function with format") { + // Input data for the function. + val schema = StructType(Seq( + StructField("str", StringType), + StructField("format", StringType) + )) + val data = Seq( + Row("00.00.00", "HH.mm.ss"), + Row("01.02.03.4", "HH.mm.ss.SSS"), + Row("23.59.59.999999", "HH.mm.ss.SSSSSS"), + Row("invalid_time", "HH.mm.ss"), + Row("00.00.00", "invalid_format"), + Row("invalid_time", "invalid_format"), + Row("00:00:00", "HH.mm.ss"), + Row("abc", "HH.mm.ss"), + Row("00:00:00", null), + Row(null, "HH.mm.ss") + ) + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) + + // Test the function using both `selectExpr` and `select`. + val result1 = df.selectExpr( + "try_to_time(str, format)" + ) + val result2 = df.select( + try_to_time(col("str"), col("format")) + ) + // Check that both methods produce the same result. + checkAnswer(result1, result2) + + // Expected output of the function. + val expected = Seq( + "00:00:00", + "01:02:03.4", + "23:59:59.999999", + null, + null, + null, + null, + null, + null, + null + ).toDF("timeString").select(col("timeString").cast("time")) + // Check that the results match the expected output. + checkAnswer(result1, expected) + checkAnswer(result2, expected) + } } // This class is used to run the same tests with ANSI mode enabled explicitly. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org