This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 3224cddcf9da [SPARK-46324][SQL][PYTHON] Fix the output name of
pyspark.sql.functions.user and session_user
3224cddcf9da is described below
commit 3224cddcf9da913c964b775b5912a67cd1e968b2
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Fri Dec 8 12:48:24 2023 -0800
[SPARK-46324][SQL][PYTHON] Fix the output name of
pyspark.sql.functions.user and session_user
### What changes were proposed in this pull request?
This PR proposes to fix `user()` and `session_user()` to have the same
names in its output name.
### Why are the changes needed?
To show the correct name of the functions being used.
### Does this PR introduce _any_ user-facing change?
Yes.
```scala
spark.range(1).select(user(), session_user()).show()
```
Before:
```
+--------------+--------------+
|current_user()|current_user()|
+--------------+--------------+
| hyukjin.kwon| hyukjin.kwon|
+--------------+--------------+
```
After:
```
+--------------+--------------+
| user()|session_user()|
+--------------+--------------+
| hyukjin.kwon| hyukjin.kwon|
+--------------+--------------+
```
### How was this patch tested?
Manually tested, and unittests were added.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44253 from HyukjinKwon/user-name.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../query-tests/explain-results/function_session_user.explain | 2 +-
.../resources/query-tests/explain-results/function_user.explain | 2 +-
python/pyspark/sql/functions/builtin.py | 4 ++--
python/pyspark/sql/tests/test_functions.py | 9 +++++++++
.../scala/org/apache/spark/sql/catalyst/expressions/misc.scala | 5 +++--
.../src/test/resources/sql-functions/sql-expression-schema.md | 6 +++---
6 files changed, 19 insertions(+), 9 deletions(-)
diff --git
a/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain
b/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain
index 82f5d2adcec0..b6205d9fb56c 100644
---
a/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain
+++
b/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain
@@ -1,2 +1,2 @@
-Project [current_user() AS current_user()#0]
+Project [session_user() AS session_user()#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git
a/connector/connect/common/src/test/resources/query-tests/explain-results/function_user.explain
b/connector/connect/common/src/test/resources/query-tests/explain-results/function_user.explain
index 82f5d2adcec0..52746c58c000 100644
---
a/connector/connect/common/src/test/resources/query-tests/explain-results/function_user.explain
+++
b/connector/connect/common/src/test/resources/query-tests/explain-results/function_user.explain
@@ -1,2 +1,2 @@
-Project [current_user() AS current_user()#0]
+Project [user() AS user()#0]
+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/python/pyspark/sql/functions/builtin.py
b/python/pyspark/sql/functions/builtin.py
index 997b641080cf..e1cffff01018 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -8967,7 +8967,7 @@ def user() -> Column:
>>> import pyspark.sql.functions as sf
>>> spark.range(1).select(sf.user()).show() # doctest: +SKIP
+--------------+
- |current_user()|
+ | user()|
+--------------+
| ruifeng.zheng|
+--------------+
@@ -8986,7 +8986,7 @@ def session_user() -> Column:
>>> import pyspark.sql.functions as sf
>>> spark.range(1).select(sf.session_user()).show() # doctest: +SKIP
+--------------+
- |current_user()|
+ |session_user()|
+--------------+
| ruifeng.zheng|
+--------------+
diff --git a/python/pyspark/sql/tests/test_functions.py
b/python/pyspark/sql/tests/test_functions.py
index b59417d8a310..5352ee04d7fe 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -1355,6 +1355,15 @@ class FunctionsTestsMixin:
message_parameters={"arg_name": "gapDuration", "arg_type": "int"},
)
+ def test_current_user(self):
+ df = self.spark.range(1).select(F.current_user())
+ self.assertIsInstance(df.first()[0], str)
+ self.assertEqual(df.schema.names[0], "current_user()")
+ df = self.spark.range(1).select(F.user())
+ self.assertEqual(df.schema.names[0], "user()")
+ df = self.spark.range(1).select(F.session_user())
+ self.assertEqual(df.schema.names[0], "session_user()")
+
def test_bucket(self):
with self.assertRaises(PySparkTypeError) as pe:
F.bucket("5", "id")
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 1ae8b19ff63e..8816e84490da 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.{SPARK_REVISION, SPARK_VERSION_SHORT}
import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder,
UnresolvedSeed}
+import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder,
FunctionRegistry, UnresolvedSeed}
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
@@ -336,7 +336,8 @@ case class TypeOf(child: Expression) extends
UnaryExpression {
case class CurrentUser() extends LeafExpression with Unevaluable {
override def nullable: Boolean = false
override def dataType: DataType = StringType
- override def prettyName: String = "current_user"
+ override def prettyName: String =
+ getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_user")
final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE)
}
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 1cdd061e1d3d..053b3c56b29e 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -104,8 +104,8 @@
| org.apache.spark.sql.catalyst.expressions.CurrentTimeZone | current_timezone
| SELECT current_timezone() | struct<current_timezone():string> |
| org.apache.spark.sql.catalyst.expressions.CurrentTimestamp |
current_timestamp | SELECT current_timestamp() |
struct<current_timestamp():timestamp> |
| org.apache.spark.sql.catalyst.expressions.CurrentUser | current_user |
SELECT current_user() | struct<current_user():string> |
-| org.apache.spark.sql.catalyst.expressions.CurrentUser | session_user |
SELECT session_user() | struct<current_user():string> |
-| org.apache.spark.sql.catalyst.expressions.CurrentUser | user | SELECT user()
| struct<current_user():string> |
+| org.apache.spark.sql.catalyst.expressions.CurrentUser | session_user |
SELECT session_user() | struct<session_user():string> |
+| org.apache.spark.sql.catalyst.expressions.CurrentUser | user | SELECT user()
| struct<user():string> |
| org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT
date_add('2016-07-30', 1) | struct<date_add(2016-07-30, 1):date> |
| org.apache.spark.sql.catalyst.expressions.DateAdd | dateadd | SELECT
dateadd('2016-07-30', 1) | struct<date_add(2016-07-30, 1):date> |
| org.apache.spark.sql.catalyst.expressions.DateDiff | date_diff | SELECT
date_diff('2009-07-31', '2009-07-30') | struct<date_diff(2009-07-31,
2009-07-30):int> |
@@ -254,7 +254,7 @@
| org.apache.spark.sql.catalyst.expressions.RLike | regexp_like | SELECT
regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') |
struct<REGEXP_LIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
| org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT
rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') |
struct<RLIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
| org.apache.spark.sql.catalyst.expressions.RPadExpressionBuilder | rpad |
SELECT rpad('hi', 5, '??') | struct<rpad(hi, 5, ??):string> |
-| org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT
raise_error('custom error message') | struct<raise_error(USER_RAISED_EXCEPTION,
map(errorMessage, custom error message)):void> |
+| org.apache.spark.sql.catalyst.expressions.RaiseErrorExpressionBuilder |
raise_error | SELECT raise_error('custom error message') |
struct<raise_error(USER_RAISED_EXCEPTION, map(errorMessage, custom error
message)):void> |
| org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() |
struct<rand():double> |
| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() |
struct<rand():double> |
| org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() |
struct<randn():double> |
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]