ueshin commented on a change in pull request #34951:
URL: https://github.com/apache/spark/pull/34951#discussion_r772634955
##########
File path: python/pyspark/sql/functions.py
##########
@@ -1264,10 +1269,7 @@ def grouping(col: "ColumnOrName") -> Column:
| Bob| 0| 5|
+-----+--------------+--------+
"""
- sc = SparkContext._active_spark_context
- assert sc is not None and sc._jvm is not None
- jc = sc._jvm.functions.grouping(_to_java_column(col))
- return Column(jc)
+ return _invoke_function_over_column("grouping", col)
Review comment:
ditto, and following several invocations?
##########
File path: python/pyspark/sql/functions.py
##########
@@ -1081,13 +1100,10 @@ def approx_count_distinct(col: "ColumnOrName", rsd:
Optional[float] = None) -> C
>>> df.agg(approx_count_distinct(df.age).alias('distinct_ages')).collect()
[Row(distinct_ages=2)]
"""
- sc = SparkContext._active_spark_context
- assert sc is not None and sc._jvm is not None
if rsd is None:
- jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col))
+ return _invoke_function_over_column("approx_count_distinct", col)
Review comment:
I guess we should use `_invoke_function_over_columns`?
##########
File path: python/pyspark/sql/functions.py
##########
@@ -79,16 +80,34 @@ def _invoke_function(name: str, *args: Any) -> Column:
and wraps the result with :class:`~pyspark.sql.Column`.
"""
assert SparkContext._active_spark_context is not None
- jf = _get_get_jvm_function(name, SparkContext._active_spark_context)
+ jf = _get_jvm_function(name, SparkContext._active_spark_context)
return Column(jf(*args))
+def _invoke_function_over_columns(name: str, *cols: "ColumnOrName") -> Column:
+ """
+ Invokes n-ary JVM function identified by name
+ and wraps the result with :class:`~pyspark.sql.Column`.
+ """
+ return _invoke_function(name, *(_to_java_column(col) for col in cols))
+
+
def _invoke_function_over_column(name: str, col: "ColumnOrName") -> Column:
Review comment:
I guess we can remove this and use `_invoke_function_over_columns`
instead?
We can do it in a separate PR, though.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]