mihailom-db commented on code in PR #48624:
URL: https://github.com/apache/spark/pull/48624#discussion_r1814304501
##########
python/pyspark/sql/functions/builtin.py:
##########
@@ -20635,6 +20635,109 @@ def make_timestamp(
)
+@_try_remote_functions
+def try_make_timestamp(
+ years: "ColumnOrName",
+ months: "ColumnOrName",
+ days: "ColumnOrName",
+ hours: "ColumnOrName",
+ mins: "ColumnOrName",
+ secs: "ColumnOrName",
+ timezone: Optional["ColumnOrName"] = None,
+) -> Column:
+ """
+ ANSI compliant version of make_timestamp function.
+ Try to create timestamp from years, months, days, hours, mins, secs and
timezone fields.
+ The result data type is consistent with the value of configuration
`spark.sql.timestampType`.
+ The function returns NULL on invalid inputs.
+
+ .. versionadded:: 4.0.0
+
+ Parameters
+ ----------
+ years : :class:`~pyspark.sql.Column` or str
+ The year to represent, from 1 to 9999
+ months : :class:`~pyspark.sql.Column` or str
+ The month-of-year to represent, from 1 (January) to 12 (December)
+ days : :class:`~pyspark.sql.Column` or str
+ The day-of-month to represent, from 1 to 31
+ hours : :class:`~pyspark.sql.Column` or str
+ The hour-of-day to represent, from 0 to 23
+ mins : :class:`~pyspark.sql.Column` or str
+ The minute-of-hour to represent, from 0 to 59
+ secs : :class:`~pyspark.sql.Column` or str
+ The second-of-minute and its micro-fraction to represent, from 0 to 60.
+ The value can be either an integer like 13 , or a fraction like 13.123.
+ If the sec argument equals to 60, the seconds field is set
+ to 0 and 1 minute is added to the final timestamp.
+ timezone : :class:`~pyspark.sql.Column` or str, optional
+ The time zone identifier. For example, CET, UTC and etc.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ A new column that contains a timestamp or NULL in case of an error.
+
+ Examples
+ --------
+
+ Example 1: Make timestamp from years, months, days, hours, mins and secs.
+
+ >>> import pyspark.sql.functions as sf
+ >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+ >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
+ ... ["year", "month", "day", "hour", "min", "sec", "timezone"])
+ >>> df.select(sf.try_make_timestamp(
+ ... df.year, df.month, df.day, df.hour, df.min, df.sec, df.timezone)
+ ... ).show(truncate=False)
+ +----------------------------------------------------------+
+ |try_make_timestamp(year, month, day, hour, min, sec, timezone)|
Review Comment:
This will fail the CI, just fix it to make sense.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala:
##########
@@ -2609,6 +2654,55 @@ object MakeTimestampLTZExpressionBuilder extends
ExpressionBuilder {
}
}
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Try to
create the current timestamp with local time zone from year, month, day, hour,
min, sec and timezone fields. The function returns NULL on invalid inputs.",
+ arguments = """
+ Arguments:
+ * year - the year to represent, from 1 to 9999
+ * month - the month-of-year to represent, from 1 (January) to 12
(December)
+ * day - the day-of-month to represent, from 1 to 31
+ * hour - the hour-of-day to represent, from 0 to 23
+ * min - the minute-of-hour to represent, from 0 to 59
+ * sec - the second-of-minute and its micro-fraction to represent, from
+ 0 to 60. If the sec argument equals to 60, the seconds field is
set
+ to 0 and 1 minute is added to the final timestamp.
+ * timezone - the time zone identifier. For example, CET, UTC and etc.
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
+ 2014-12-28 06:30:45.887
+ > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887, 'CET');
+ 2014-12-27 21:30:45.887
+ > SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
+ 2019-07-01 00:00:00
+ > SELECT _FUNC_(null, 7, 22, 15, 30, 0);
Review Comment:
ditto
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala:
##########
@@ -2812,6 +2906,79 @@ case class MakeTimestamp(
}
}
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Try to
create a timestamp from year, month, day, hour, min, sec and timezone fields.
The result data type is consistent with the value of configuration
`spark.sql.timestampType`. The function returns NULL on invalid inputs.",
+ arguments = """
+ Arguments:
+ * year - the year to represent, from 1 to 9999
+ * month - the month-of-year to represent, from 1 (January) to 12
(December)
+ * day - the day-of-month to represent, from 1 to 31
+ * hour - the hour-of-day to represent, from 0 to 23
+ * min - the minute-of-hour to represent, from 0 to 59
+ * sec - the second-of-minute and its micro-fraction to represent, from 0
to 60.
+ The value can be either an integer like 13 , or a fraction like
13.123.
+ If the sec argument equals to 60, the seconds field is set
+ to 0 and 1 minute is added to the final timestamp.
+ * timezone - the time zone identifier. For example, CET, UTC and etc.
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
+ 2014-12-28 06:30:45.887
+ > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887, 'CET');
+ 2014-12-27 21:30:45.887
+ > SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
+ 2019-07-01 00:00:00
+ > SELECT _FUNC_(2019, 6, 30, 23, 59, 1);
+ 2019-06-30 23:59:01
+ > SELECT _FUNC_(null, 7, 22, 15, 30, 0);
Review Comment:
ditto
##########
sql/api/src/main/scala/org/apache/spark/sql/functions.scala:
##########
@@ -8105,6 +8105,41 @@ object functions {
secs: Column): Column =
Column.fn("make_timestamp", years, months, days, hours, mins, secs)
+ /**
+ * Try to create a timestamp from years, months, days, hours, mins, secs and
timezone fields.
+ * The result data type is consistent with the value of configuration
`spark.sql.timestampType`.
+ * The function returns NULL on invalid inputs.
+ *
+ * @group datetime_funcs
+ * @since 4.0.0
+ */
+ def try_make_timestamp(
+ years: Column,
+ months: Column,
+ days: Column,
+ hours: Column,
+ mins: Column,
+ secs: Column,
+ timezone: Column): Column =
+ Column.fn("try_make_timestamp", years, months, days, hours, mins, secs,
timezone)
+
+ /**
+ * Try to create a timestamp from years, months, days, hours, mins, and secs
fields.
+ * The result data type is consistent with the value of configuration
`spark.sql.timestampType`.
+ * The function returns NULL on invalid inputs.
+ *
+ * @group datetime_funcs
+ * @since 4.0.0
+ */
+ def try_make_timestamp(
+ years: Column,
+ months: Column,
+ days: Column,
+ hours: Column,
+ mins: Column,
+ secs: Column): Column =
+ Column.fn("try_make_timestamp", years, months, days, hours, mins, secs)
+
/**
* Create the current timestamp with local time zone from years, months,
days, hours, mins, secs
Review Comment:
We need to add try ltz and ntz versions here as well.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala:
##########
@@ -2812,6 +2906,79 @@ case class MakeTimestamp(
}
}
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Try to
create a timestamp from year, month, day, hour, min, sec and timezone fields.
The result data type is consistent with the value of configuration
`spark.sql.timestampType`. The function returns NULL on invalid inputs.",
+ arguments = """
+ Arguments:
+ * year - the year to represent, from 1 to 9999
+ * month - the month-of-year to represent, from 1 (January) to 12
(December)
+ * day - the day-of-month to represent, from 1 to 31
+ * hour - the hour-of-day to represent, from 0 to 23
+ * min - the minute-of-hour to represent, from 0 to 59
+ * sec - the second-of-minute and its micro-fraction to represent, from 0
to 60.
+ The value can be either an integer like 13 , or a fraction like
13.123.
+ If the sec argument equals to 60, the seconds field is set
+ to 0 and 1 minute is added to the final timestamp.
+ * timezone - the time zone identifier. For example, CET, UTC and etc.
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
+ 2014-12-28 06:30:45.887
+ > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887, 'CET');
+ 2014-12-27 21:30:45.887
+ > SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
+ 2019-07-01 00:00:00
+ > SELECT _FUNC_(2019, 6, 30, 23, 59, 1);
+ 2019-06-30 23:59:01
+ > SELECT _FUNC_(null, 7, 22, 15, 30, 0);
+ NULL
+ """,
+ group = "datetime_funcs",
+ since = "4.0.0")
+// scalastyle:on line.size.limit
+case class TryMakeTimestamp(
+ year: Expression,
+ month: Expression,
+ day: Expression,
+ hour: Expression,
+ min: Expression,
+ sec: Expression,
+ timezone: Option[Expression],
+ timeZoneId: Option[String],
+ replacement: Expression)
+ extends RuntimeReplaceable with InheritAnalysisRules {
Review Comment:
Lets make it look nice, create a constructor with everything except
replacement, and then return this default constructor, where replacement is
maketimestamp, an then other constructors should call this constructor and not
actually make new replacements, this is to make it easier to maintain.
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala:
##########
@@ -2561,6 +2561,51 @@ object MakeTimestampNTZExpressionBuilder extends
ExpressionBuilder {
}
}
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(year, month, day, hour, min, sec) - Try to create local
date-time from year, month, day, hour, min, sec fields. The function returns
NULL on invalid inputs.",
+ arguments = """
+ Arguments:
+ * year - the year to represent, from 1 to 9999
+ * month - the month-of-year to represent, from 1 (January) to 12
(December)
+ * day - the day-of-month to represent, from 1 to 31
+ * hour - the hour-of-day to represent, from 0 to 23
+ * min - the minute-of-hour to represent, from 0 to 59
+ * sec - the second-of-minute and its micro-fraction to represent, from
+ 0 to 60. If the sec argument equals to 60, the seconds field is
set
+ to 0 and 1 minute is added to the final timestamp.
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
+ 2014-12-28 06:30:45.887
+ > SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
+ 2019-07-01 00:00:00
+ > SELECT _FUNC_(null, 7, 22, 15, 30, 0);
Review Comment:
Add test that actually has some input that is not null, but returns null
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]