This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new f7f424d19c59 [SPARK-46725][SQL] Add DAYNAME function
f7f424d19c59 is described below
commit f7f424d19c59c6fef8d08a68a49861458a06abcf
Author: Petar Vasiljevic <[email protected]>
AuthorDate: Sun Jan 21 22:27:19 2024 +0300
[SPARK-46725][SQL] Add DAYNAME function
### What changes were proposed in this pull request?
Added DAYNAME function that returns three letter abbreviation day name for
the specified date to:
- Scala API
- Python API
- R API
- Spark Connect Scala Client
- Spark Connect Python Client
### Why are the changes needed?
For parity with Snowflake
### Does this PR introduce _any_ user-facing change?
Yes, since new function DAYNAME is added
### How was this patch tested?
Tested on new unit tests
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #44758 from PetarVasiljevic-DB/function_daytime.
Lead-authored-by: Petar Vasiljevic <[email protected]>
Co-authored-by: PetarVasiljevic-DB
<[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
R/pkg/R/functions.R | 14 +++++++++
R/pkg/R/generics.R | 4 +++
R/pkg/tests/fulltests/test_sparkSQL.R | 1 +
.../scala/org/apache/spark/sql/functions.scala | 9 ++++++
.../apache/spark/sql/PlanGenerationTestSuite.scala | 4 +++
.../explain-results/function_dayname.explain | 2 ++
.../query-tests/queries/function_dayname.json | 25 ++++++++++++++++
.../query-tests/queries/function_dayname.proto.bin | Bin 0 -> 121 bytes
.../source/reference/pyspark.sql/functions.rst | 1 +
python/pyspark/sql/connect/functions/builtin.py | 7 +++++
python/pyspark/sql/functions/builtin.py | 33 +++++++++++++++++++++
python/pyspark/sql/tests/test_functions.py | 6 ++++
.../sql/catalyst/analysis/FunctionRegistry.scala | 1 +
.../catalyst/expressions/datetimeExpressions.scala | 19 ++++++++++++
.../spark/sql/catalyst/util/DateTimeUtils.scala | 11 +++++++
.../expressions/DateExpressionsSuite.scala | 11 +++++++
.../scala/org/apache/spark/sql/functions.scala | 9 ++++++
.../sql-functions/sql-expression-schema.md | 1 +
.../org/apache/spark/sql/DateFunctionsSuite.scala | 12 ++++++++
19 files changed, 170 insertions(+)
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 0db25ad8739f..5106a83bd0ec 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1105,6 +1105,20 @@ setMethod("monthname",
column(jc)
})
+#' @details
+#' \code{dayname}: Extracts the three-letter abbreviated day name from a
+#' given date/timestamp/string.
+#'
+#' @rdname column_datetime_functions
+#' @aliases dayname dayname,Column-method
+#' @note dayname since 4.0.0
+setMethod("dayname",
+ signature(x = "Column"),
+ function(x) {
+ jc <- callJStatic("org.apache.spark.sql.functions", "dayname",
x@jc)
+ column(jc)
+ })
+
#' @details
#' \code{decode}: Computes the first argument into a string from a binary
using the provided
#' character set.
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 92febc02710d..26e81733055a 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1024,6 +1024,10 @@ setGeneric("dayofyear", function(x) {
standardGeneric("dayofyear") })
#' @name NULL
setGeneric("monthname", function(x) { standardGeneric("monthname") })
+#' @rdname column_datetime_functions
+#' @name NULL
+setGeneric("dayname", function(x) { standardGeneric("dayname") })
+
#' @rdname column_string_functions
#' @name NULL
setGeneric("decode", function(x, charset) { standardGeneric("decode") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R
b/R/pkg/tests/fulltests/test_sparkSQL.R
index 118c853a00df..630781a57e44 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -2063,6 +2063,7 @@ test_that("date functions on a DataFrame", {
expect_equal(collect(select(df, year(df$b)))[, 1], c(2012, 2013, 2014))
expect_equal(collect(select(df, month(df$b)))[, 1], c(12, 12, 12))
expect_equal(collect(select(df, monthname(df$b)))[, 1], c("Dec", "Dec",
"Dec"))
+ expect_equal(collect(select(df, dayname(df$b)))[, 1], c("Thu", "Sat", "Mon"))
expect_equal(collect(select(df, last_day(df$b)))[, 1],
c(as.Date("2012-12-31"), as.Date("2013-12-31"),
as.Date("2014-12-31")))
expect_equal(collect(select(df, next_day(df$b, "MONDAY")))[, 1],
diff --git
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index 2a48958d4222..133b7e036cd7 100644
---
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -5960,6 +5960,15 @@ object functions {
def monthname(timeExp: Column): Column =
Column.fn("monthname", timeExp)
+ /**
+ * Extracts the three-letter abbreviated month name from a given
date/timestamp/string.
+ *
+ * @group datetime_funcs
+ * @since 4.0.0
+ */
+ def dayname(timeExp: Column): Column =
+ Column.fn("dayname", timeExp)
+
//////////////////////////////////////////////////////////////////////////////////////////////
// Collection functions
//////////////////////////////////////////////////////////////////////////////////////////////
diff --git
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index d5b384e9bc64..b52f75a2914d 100644
---
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -2129,6 +2129,10 @@ class PlanGenerationTestSuite
fn.monthname(fn.col("d"))
}
+ temporalFunctionTest("dayname") {
+ fn.dayname(fn.col("d"))
+ }
+
temporalFunctionTest("next_day") {
fn.next_day(fn.col("d"), "Mon")
}
diff --git
a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayname.explain
b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayname.explain
new file mode 100644
index 000000000000..49cbb48c8814
--- /dev/null
+++
b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayname.explain
@@ -0,0 +1,2 @@
+Project [dayname(d#0) AS dayname(d)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git
a/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.json
b/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.json
new file mode 100644
index 000000000000..7898aa53deb8
--- /dev/null
+++
b/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.json
@@ -0,0 +1,25 @@
+{
+ "common": {
+ "planId": "1"
+ },
+ "project": {
+ "input": {
+ "common": {
+ "planId": "0"
+ },
+ "localRelation": {
+ "schema":
"struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+ }
+ },
+ "expressions": [{
+ "unresolvedFunction": {
+ "functionName": "dayname",
+ "arguments": [{
+ "unresolvedAttribute": {
+ "unparsedIdentifier": "d"
+ }
+ }]
+ }
+ }]
+ }
+}
\ No newline at end of file
diff --git
a/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin
b/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin
new file mode 100644
index 000000000000..2809f3b9b7a1
Binary files /dev/null and
b/connector/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin
differ
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst
b/python/docs/source/reference/pyspark.sql/functions.rst
index cbdadd9cd2bd..ca20ccfb73c5 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -252,6 +252,7 @@ Date and Timestamp Functions
datediff
datepart
day
+ dayname
dayofmonth
dayofweek
dayofyear
diff --git a/python/pyspark/sql/connect/functions/builtin.py
b/python/pyspark/sql/connect/functions/builtin.py
index 7276cead88ef..72adfec33b1d 100644
--- a/python/pyspark/sql/connect/functions/builtin.py
+++ b/python/pyspark/sql/connect/functions/builtin.py
@@ -2996,6 +2996,13 @@ def monthname(col: "ColumnOrName") -> Column:
monthname.__doc__ = pysparkfuncs.monthname.__doc__
+def dayname(col: "ColumnOrName") -> Column:
+ return _invoke_function_over_columns("dayname", col)
+
+
+dayname.__doc__ = pysparkfuncs.dayname.__doc__
+
+
def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column:
return _invoke_function_over_columns("extract", field, source)
diff --git a/python/pyspark/sql/functions/builtin.py
b/python/pyspark/sql/functions/builtin.py
index 62400accba10..ca2efde0b3c2 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -7408,6 +7408,39 @@ def monthname(col: "ColumnOrName") -> Column:
return _invoke_function_over_columns("monthname", col)
+@_try_remote_functions
+def dayname(col: "ColumnOrName") -> Column:
+ """
+ Date and Timestamp Function: Returns the three-letter abbreviated day name
from the given date.
+
+ .. versionadded:: 4.0.0
+
+ Parameters
+ ----------
+ col : :class:`~pyspark.sql.Column` or str
+ target date/timestamp column to work on.
+
+ Returns
+ -------
+ :class:`~pyspark.sql.Column`
+ the three-letter abbreviation of day name for date/timestamp (Mon,
Tue, Wed...)
+
+ Examples
+ --------
+ Example 1: Basic usage of dayname function.
+
+ >>> import pyspark.sql.functions as sf
+ >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+ >>> df.select(sf.dayname('dt').alias('dayname')).show()
+ +-------+
+ |dayname|
+ +-------+
+ | Wed|
+ +-------+
+ """
+ return _invoke_function_over_columns("dayname", col)
+
+
@_try_remote_functions
def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column:
"""
diff --git a/python/pyspark/sql/tests/test_functions.py
b/python/pyspark/sql/tests/test_functions.py
index c67ddcde44ea..9ea70d25bd1f 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -421,6 +421,12 @@ class FunctionsTestsMixin:
row = df.select(F.monthname(df.date)).first()
self.assertEqual(row[0], "Nov")
+ def test_dayname(self):
+ dt = datetime.datetime(2017, 11, 6)
+ df = self.spark.createDataFrame([Row(date=dt)])
+ row = df.select(F.dayname(df.date)).first()
+ self.assertEqual(row[0], "Mon")
+
# Test added for SPARK-37738; change Python API to accept both col & int
as input
def test_date_add_function(self):
dt = datetime.date(2021, 12, 27)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index a77086c948e0..391ff2cd34f2 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -642,6 +642,7 @@ object FunctionRegistry {
expression[UnixTimestamp]("unix_timestamp"),
expression[DayOfWeek]("dayofweek"),
expression[WeekDay]("weekday"),
+ expression[DayName]("dayname"),
expression[WeekOfYear]("weekofyear"),
expression[Year]("year"),
expression[TimeWindow]("window"),
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 22d4e175b9a3..997d2d7420a3 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -909,6 +909,25 @@ case class MonthName(child: Expression) extends
GetDateField {
copy(child = newChild)
}
+@ExpressionDescription(
+ usage = "_FUNC_(date) - Returns the three-letter abbreviated day name from
the given date.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(DATE('2008-02-20'));
+ Wed
+ """,
+ group = "datetime_funcs",
+ since = "4.0.0")
+case class DayName(child: Expression) extends GetDateField {
+ override val func = DateTimeUtils.getDayName
+ override val funcName = "getDayName"
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
+ override def dataType: DataType = StringType
+ override protected def withNewChildInternal(newChild: Expression): DayName =
+ copy(child = newChild)
+}
+
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(timestamp, fmt) - Converts `timestamp` to a value of string
in the format specified by the date format `fmt`.",
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 0dd83c4b499e..01503324048b 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -208,6 +208,17 @@ object DateTimeUtils extends SparkDateTimeUtils {
UTF8String.fromString(monthName)
}
+ /**
+ * Returns the three-letter abbreviated day name for the given number of
days since 1970-01-01.
+ */
+ def getDayName(days: Int): UTF8String = {
+ val dayName = DayOfWeek
+ .of(getWeekDay(days) + 1)
+ .getDisplayName(TextStyle.SHORT, DateFormatter.defaultLocale)
+
+ UTF8String.fromString(dayName)
+ }
+
/**
* Adds months to a timestamp at the given time zone. It converts the input
timestamp to a local
* timestamp at the given time zone, adds months, and converts the resulted
local timestamp
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 88bb05cbf917..d159d911763b 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -269,6 +269,17 @@ class DateExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkConsistencyBetweenInterpretedAndCodegen(MonthName, DateType)
}
+ test("DayName") {
+ checkEvaluation(DayName(Literal.create(null, DateType)), null)
+ checkEvaluation(DayName(Literal(d)), "Wed")
+ checkEvaluation(DayName(Cast(Literal(date), DateType, UTC_OPT)), "Wed")
+ checkEvaluation(DayName(Cast(Literal(ts), DateType, UTC_OPT)), "Fri")
+ checkEvaluation(DayName(Cast(Literal("2011-05-06"), DateType, UTC_OPT)),
"Fri")
+ checkEvaluation(DayName(Cast(Literal(LocalDate.parse("2017-05-27")),
DateType, UTC_OPT)), "Sat")
+ checkEvaluation(DayName(Cast(Literal(LocalDate.parse("1582-10-15")),
DateType, UTC_OPT)), "Fri")
+ checkConsistencyBetweenInterpretedAndCodegen(DayName, DateType)
+ }
+
test("DateFormat") {
Seq("legacy", "corrected").foreach { legacyParserPolicy =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> legacyParserPolicy)
{
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 97963c4ed924..933d0b3f89a7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -5747,6 +5747,15 @@ object functions {
def monthname(timeExp: Column): Column =
Column.fn("monthname", timeExp)
+ /**
+ * Extracts the three-letter abbreviated day name from a given
date/timestamp/string.
+ *
+ * @group datetime_funcs
+ * @since 4.0.0
+ */
+ def dayname(timeExp: Column): Column =
+ Column.fn("dayname", timeExp)
+
//////////////////////////////////////////////////////////////////////////////////////////////
// Collection functions
//////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index cff7921db0e5..f5bd0c8425d2 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -116,6 +116,7 @@
| org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder |
date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') |
struct<date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
| org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder |
datepart | SELECT datepart('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') |
struct<datepart(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
| org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT
date_sub('2016-07-30', 1) | struct<date_sub(2016-07-30, 1):date> |
+| org.apache.spark.sql.catalyst.expressions.DayName | dayname | SELECT
dayname(DATE('2008-02-20')) | struct<dayname(2008-02-20):string> |
| org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT
day('2009-07-30') | struct<day(2009-07-30):int> |
| org.apache.spark.sql.catalyst.expressions.DayOfMonth | dayofmonth | SELECT
dayofmonth('2009-07-30') | struct<dayofmonth(2009-07-30):int> |
| org.apache.spark.sql.catalyst.expressions.DayOfWeek | dayofweek | SELECT
dayofweek('2009-07-30') | struct<dayofweek(2009-07-30):int> |
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 7d608f4a08c4..dbd0bbec0396 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -279,6 +279,18 @@ class DateFunctionsSuite extends QueryTest with
SharedSparkSession {
Row("Apr", "Apr", "Apr"))
}
+ test("dayname") {
+ val df = Seq((d, sdfDate.format(d), ts)).toDF("a", "b", "c")
+
+ checkAnswer(
+ df.select(dayname($"a"), dayname($"b"), dayname($"c")),
+ Row("Wed", "Wed", "Mon"))
+
+ checkAnswer(
+ df.selectExpr("dayname(a)", "dayname(b)", "dayname(c)"),
+ Row("Wed", "Wed", "Mon"))
+ }
+
test("extract") {
val df = Seq((d, sdf.format(d), ts)).toDF("a", "b", "c")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]