This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 5e1e4d31d7d9 [SPARK-52540][SQL] Make TIMESTAMP_NTZ from DATE and TIME 5e1e4d31d7d9 is described below commit 5e1e4d31d7d9fb2f93ce8c72d32774b560bcd81f Author: Max Gekk <max.g...@gmail.com> AuthorDate: Mon Jun 23 09:33:40 2025 +0200 [SPARK-52540][SQL] Make TIMESTAMP_NTZ from DATE and TIME ### What changes were proposed in this pull request? In the PR, I propose to extend the `make_timestamp_ntz` function, and accept a date and time fields. #### Syntax ```sql make_timestamp_ntz(date[, time]) ``` #### Arguments - date: A date expression - time: A time expression #### Returns A TIMESTAMP_NTZ. #### Examples ```sql > SELECT make_timestamp_ntz(DATE'2014-12-28', TIME'6:30:45.887'); 2014-12-28 06:30:45.887 ``` ### Why are the changes needed? Users will be able to create a timestamp without time zone by combining a time and a date. ### Does this PR introduce _any_ user-facing change? No, it just extends the existing API. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "test:testOnly *ExpressionInfoSuite" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp-ntz.sql" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #51179 Closes #51232 from MaxGekk/make_timestamp_ntz. Lead-authored-by: Max Gekk <max.g...@gmail.com> Co-authored-by: Maxim Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../catalyst/expressions/datetimeExpressions.scala | 43 +++++++++++- .../spark/sql/catalyst/util/DateTimeUtils.scala | 12 ++++ .../expressions/DateExpressionsSuite.scala | 18 ++++- .../sql/catalyst/util/DateTimeUtilsSuite.scala | 14 ++++ .../analyzer-results/timestamp-ntz.sql.out | 69 ++++++++++++++++++- .../resources/sql-tests/inputs/timestamp-ntz.sql | 5 ++ .../sql-tests/results/timestamp-ntz.sql.out | 78 +++++++++++++++++++++- 7 files changed, 233 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 4ae8883dae04..70c3abc1acf9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry} import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin import org.apache.spark.sql.catalyst.trees.TreePattern._ import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimestampFormatter} @@ -2554,9 +2555,39 @@ case class MakeDate( copy(year = newFirst, month = newSecond, day = newThird) } +case class MakeTimestampNTZ(left: Expression, right: Expression) + extends BinaryExpression + with RuntimeReplaceable + with ExpectsInputTypes { + + override def replacement: Expression = StaticInvoke( + classOf[DateTimeUtils.type], + TimestampNTZType, + "makeTimestampNTZ", + Seq(left, right), + Seq(left.dataType, right.dataType) + ) + + override def inputTypes: Seq[AbstractDataType] = + Seq( + DateType, + TypeCollection(TimeType.MIN_PRECISION to TimeType.MAX_PRECISION map TimeType.apply: _*)) + + override def prettyName: String = "make_timestamp_ntz" + + override protected def withNewChildrenInternal( + newLeft: Expression, newRight: Expression): Expression = { + copy(left = newLeft, right = newRight) + } +} + // scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(year, month, day, hour, min, sec) - Create local date-time from year, month, day, hour, min, sec fields. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error instead.", + usage = """ + _FUNC_(year, month, day, hour, min, sec) - Create local date-time from year, month, day, hour, min, sec fields. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error instead. + + _FUNC_(date, time) - Create a local date-time from date and time fields. + """, arguments = """ Arguments: * year - the year to represent, from 1 to 9999 @@ -2567,6 +2598,8 @@ case class MakeDate( * sec - the second-of-minute and its micro-fraction to represent, from 0 to 60. If the sec argument equals to 60, the seconds field is set to 0 and 1 minute is added to the final timestamp. + * date - a date to represent, from 0001-01-01 to 9999-12-31 + * time - a local time to represent, from 00:00:00 to 23:59:59.999999 """, examples = """ Examples: @@ -2576,6 +2609,8 @@ case class MakeDate( 2019-07-01 00:00:00 > SELECT _FUNC_(null, 7, 22, 15, 30, 0); NULL + > SELECT _FUNC_(DATE'2014-12-28', TIME'6:30:45.887'); + 2014-12-28 06:30:45.887 """, group = "datetime_funcs", since = "3.4.0") @@ -2583,7 +2618,9 @@ case class MakeDate( object MakeTimestampNTZExpressionBuilder extends ExpressionBuilder { override def build(funcName: String, expressions: Seq[Expression]): Expression = { val numArgs = expressions.length - if (numArgs == 6) { + if (numArgs == 2) { + MakeTimestampNTZ(expressions(0), expressions(1)) + } else if (numArgs == 6) { MakeTimestamp( expressions(0), expressions(1), @@ -2593,7 +2630,7 @@ object MakeTimestampNTZExpressionBuilder extends ExpressionBuilder { expressions(5), dataType = TimestampNTZType) } else { - throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(6), numArgs) + throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(2, 6), numArgs) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index cd811bc9749f..afffb07da080 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -822,4 +822,16 @@ object DateTimeUtils extends SparkDateTimeUtils { throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRangeWithoutSuggestion(e) } } + + /** + * Makes a timestamp without time zone from a date and a local time. + * + * @param days The number of days since the epoch. 1970-01-01. + * Negative numbers represent earlier days. + * @param nanos The number of nanoseconds within the day since the midnight. + * @return The number of microseconds since the epoch of 1970-01-01 00:00:00Z. + */ + def makeTimestampNTZ(days: Int, nanos: Long): Long = { + localDateTimeToMicros(LocalDateTime.of(daysToLocalDate(days), nanosToLocalTime(nanos))) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index f166dfb2d7e7..23763e1769f6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat -import java.time.{DateTimeException, Duration, Instant, LocalDate, LocalDateTime, Period, ZoneId} +import java.time.{DateTimeException, Duration, Instant, LocalDate, LocalDateTime, LocalTime, Period, ZoneId} import java.time.temporal.ChronoUnit import java.util.{Calendar, Locale, TimeZone} import java.util.concurrent.TimeUnit._ @@ -2140,4 +2140,20 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } } } + + test("make timestamp_ntz from date and time") { + def dateLit(d: String): Expression = Literal(LocalDate.parse(d)) + def timeLit(t: String): Expression = Literal(LocalTime.parse(t)) + def tsNtz(s: String): Long = localDateTimeToMicros(LocalDateTime.parse(s), UTC) + + checkEvaluation(MakeTimestampNTZ(dateLit("1970-01-01"), timeLit("00:00:00")), 0L) + checkEvaluation(MakeTimestampNTZ(dateLit("2025-06-20"), timeLit("15:20:30.123456")), + tsNtz("2025-06-20T15:20:30.123456")) + checkEvaluation(MakeTimestampNTZ(Literal(null, DateType), timeLit("15:20:30.123456")), + null) + checkEvaluation(MakeTimestampNTZ(dateLit("2025-06-20"), Literal(null, TimeType())), + null) + checkEvaluation(MakeTimestampNTZ(Literal(null, DateType), Literal(null, TimeType())), + null) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 0307b6d944fb..5328d2de7ad9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -1216,4 +1216,18 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { s"Invalid value for SecondOfMinute (valid values 0 - 59): ${invalidSecond.toLong}")) } } + + test("makeTimestampNTZ") { + assert(makeTimestampNTZ(0, 0) == 0) + assert(makeTimestampNTZ(0, localTime(23, 59, 59)) * NANOS_PER_MICROS == localTime(23, 59, 59)) + assert(makeTimestampNTZ(-1, 0) == -1 * MICROS_PER_DAY) + assert(makeTimestampNTZ(-1, localTime(23, 59, 59, 999999)) == -1) + assert(makeTimestampNTZ(days(9999, 12, 31), localTime(23, 59, 59, 999999)) == + date(9999, 12, 31, 23, 59, 59, 999999)) + assert(makeTimestampNTZ(days(1, 1, 1), localTime(0, 0, 0)) == date(1, 1, 1, 0, 0, 0)) + val msg = intercept[DateTimeException] { + makeTimestampNTZ(0, -1) + }.getMessage + assert(msg.contains("Invalid value")) + } } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz.sql.out index 9ab5b2445fc3..a81a2bbf805e 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz.sql.out @@ -44,7 +44,7 @@ org.apache.spark.sql.AnalysisException "messageParameters" : { "actualNum" : "7", "docroot" : "https://spark.apache.org/docs/latest", - "expectedNum" : "6", + "expectedNum" : "[2, 6]", "functionName" : "`make_timestamp_ntz`" }, "queryContext" : [ { @@ -64,6 +64,73 @@ Project [make_timestamp_ntz(2021, 7, 11, 6, 30, cast(60.007 as decimal(16,6)), N +- OneRowRelation +-- !query +SELECT make_timestamp_ntz(make_date(2021, 07, 11), make_time(6, 30, 45.678)) +-- !query analysis +Project [make_timestamp_ntz(make_date(2021, 7, 11, true), make_time(6, 30, cast(45.678 as decimal(16,6)))) AS make_timestamp_ntz(make_date(2021, 7, 11), make_time(6, 30, 45.678))#x] ++- OneRowRelation + + +-- !query +SELECT make_timestamp_ntz(NULL, TIME'00:00:00') +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT make_timestamp_ntz(DATE'1970-01-01', NULL) +-- !query analysis +[Analyzer test output redacted due to nondeterminism] + + +-- !query +SELECT make_timestamp_ntz(timestamp_ntz'2018-11-17 13:33:33', TIME'0:0:0') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"TIMESTAMP_NTZ '2018-11-17 13:33:33'\"", + "inputType" : "\"TIMESTAMP_NTZ\"", + "paramIndex" : "first", + "requiredType" : "\"DATE\"", + "sqlExpr" : "\"make_timestamp_ntz(TIMESTAMP_NTZ '2018-11-17 13:33:33', TIME '00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 74, + "fragment" : "make_timestamp_ntz(timestamp_ntz'2018-11-17 13:33:33', TIME'0:0:0')" + } ] +} + + +-- !query +SELECT make_timestamp_ntz(DATE'2025-06-20', '0:0:0') +-- !query analysis +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"0:0:0\"", + "inputType" : "\"STRING\"", + "paramIndex" : "second", + "requiredType" : "(\"TIME(0)\" or \"TIME(1)\" or \"TIME(2)\" or \"TIME(3)\" or \"TIME(4)\" or \"TIME(5)\" or \"TIME(6)\")", + "sqlExpr" : "\"make_timestamp_ntz(DATE '2025-06-20', 0:0:0)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 52, + "fragment" : "make_timestamp_ntz(DATE'2025-06-20', '0:0:0')" + } ] +} + + -- !query SELECT convert_timezone('Europe/Moscow', 'America/Los_Angeles', timestamp_ntz'2022-01-01 00:00:00') -- !query analysis diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql index 7996f5879bf7..bfbe551747a6 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql @@ -15,6 +15,11 @@ SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678); -- make_timestamp_ntz should not accept time zone input SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678, 'CET'); SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007); +SELECT make_timestamp_ntz(make_date(2021, 07, 11), make_time(6, 30, 45.678)); +SELECT make_timestamp_ntz(NULL, TIME'00:00:00'); +SELECT make_timestamp_ntz(DATE'1970-01-01', NULL); +SELECT make_timestamp_ntz(timestamp_ntz'2018-11-17 13:33:33', TIME'0:0:0'); +SELECT make_timestamp_ntz(DATE'2025-06-20', '0:0:0'); SELECT convert_timezone('Europe/Moscow', 'America/Los_Angeles', timestamp_ntz'2022-01-01 00:00:00'); SELECT convert_timezone('Europe/Brussels', timestamp_ntz'2022-03-23 00:00:00'); diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out index 9e37bf4e9caa..ff7ba8d9eb71 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out @@ -51,7 +51,7 @@ org.apache.spark.sql.AnalysisException "messageParameters" : { "actualNum" : "7", "docroot" : "https://spark.apache.org/docs/latest", - "expectedNum" : "6", + "expectedNum" : "[2, 6]", "functionName" : "`make_timestamp_ntz`" }, "queryContext" : [ { @@ -79,6 +79,82 @@ org.apache.spark.SparkDateTimeException } +-- !query +SELECT make_timestamp_ntz(make_date(2021, 07, 11), make_time(6, 30, 45.678)) +-- !query schema +struct<make_timestamp_ntz(make_date(2021, 7, 11), make_time(6, 30, 45.678)):timestamp_ntz> +-- !query output +2021-07-11 06:30:45.678 + + +-- !query +SELECT make_timestamp_ntz(NULL, TIME'00:00:00') +-- !query schema +struct<make_timestamp_ntz(NULL, TIME '00:00:00'):timestamp_ntz> +-- !query output +NULL + + +-- !query +SELECT make_timestamp_ntz(DATE'1970-01-01', NULL) +-- !query schema +struct<make_timestamp_ntz(DATE '1970-01-01', NULL):timestamp_ntz> +-- !query output +NULL + + +-- !query +SELECT make_timestamp_ntz(timestamp_ntz'2018-11-17 13:33:33', TIME'0:0:0') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"TIMESTAMP_NTZ '2018-11-17 13:33:33'\"", + "inputType" : "\"TIMESTAMP_NTZ\"", + "paramIndex" : "first", + "requiredType" : "\"DATE\"", + "sqlExpr" : "\"make_timestamp_ntz(TIMESTAMP_NTZ '2018-11-17 13:33:33', TIME '00:00:00')\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 74, + "fragment" : "make_timestamp_ntz(timestamp_ntz'2018-11-17 13:33:33', TIME'0:0:0')" + } ] +} + + +-- !query +SELECT make_timestamp_ntz(DATE'2025-06-20', '0:0:0') +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.ExtendedAnalysisException +{ + "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE", + "sqlState" : "42K09", + "messageParameters" : { + "inputSql" : "\"0:0:0\"", + "inputType" : "\"STRING\"", + "paramIndex" : "second", + "requiredType" : "(\"TIME(0)\" or \"TIME(1)\" or \"TIME(2)\" or \"TIME(3)\" or \"TIME(4)\" or \"TIME(5)\" or \"TIME(6)\")", + "sqlExpr" : "\"make_timestamp_ntz(DATE '2025-06-20', 0:0:0)\"" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 52, + "fragment" : "make_timestamp_ntz(DATE'2025-06-20', '0:0:0')" + } ] +} + + -- !query SELECT convert_timezone('Europe/Moscow', 'America/Los_Angeles', timestamp_ntz'2022-01-01 00:00:00') -- !query schema --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org