This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 0bed57325de5 [SPARK-46984][PYTHON] Remove pyspark.copy_func
0bed57325de5 is described below
commit 0bed57325de591d388589f9ac75c54faeed53706
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Wed Feb 7 10:10:33 2024 +0900
[SPARK-46984][PYTHON] Remove pyspark.copy_func
### What changes were proposed in this pull request?
This PR removes `pyspark.copy_func` but just explicitly define the aliases.
### Why are the changes needed?
- It needs more code than what it actually deduplicates.
- For consistency with other cases.
- Type hints can't be leveraged without this.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing unittests should cover.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #45042 from HyukjinKwon/SPARK-46984.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/__init__.py | 33 +--------------------------------
python/pyspark/sql/column.py | 17 ++++++++++++++---
python/pyspark/sql/dataframe.py | 40 ++++++++++++++++++++++++++++++----------
3 files changed, 45 insertions(+), 45 deletions(-)
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index a9a2a3170256..a28144aedd70 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -47,8 +47,7 @@ Public classes:
"""
from functools import wraps
-import types
-from typing import cast, Any, Callable, Optional, TypeVar, Union
+from typing import cast, Any, Callable, TypeVar, Union
from pyspark.conf import SparkConf
from pyspark.rdd import RDD, RDDBarrier
@@ -86,36 +85,6 @@ def since(version: Union[str, float]) -> Callable[[_F], _F]:
return deco
-def copy_func(
- f: _F,
- name: Optional[str] = None,
- sinceversion: Optional[Union[str, float]] = None,
- doc: Optional[str] = None,
-) -> _F:
- """
- Returns a function with same code, globals, defaults, closure, and
- name (or provide a new name).
- """
- # See
- #
http://stackoverflow.com/questions/6527633/how-can-i-make-a-deepcopy-of-a-function-in-python
- assert isinstance(f, types.FunctionType)
-
- fn = types.FunctionType(
- f.__code__,
- f.__globals__,
- name or f.__name__,
- f.__defaults__,
- f.__closure__,
- )
- # in case f was given attrs (note this dict is a shallow copy):
- fn.__dict__.update(f.__dict__)
- if doc is not None:
- fn.__doc__ = doc
- if sinceversion is not None:
- fn = since(sinceversion)(fn)
- return cast(_F, fn)
-
-
def keyword_only(func: _F) -> _F:
"""
A decorator that forces keyword arguments in the wrapped method
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 5fa7fb3d42b0..4a7213593703 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -33,7 +33,6 @@ from typing import (
from py4j.java_gateway import JavaObject, JVMView
-from pyspark import copy_func
from pyspark.context import SparkContext
from pyspark.errors import PySparkAttributeError, PySparkTypeError,
PySparkValueError
from pyspark.sql.types import DataType
@@ -1230,7 +1229,13 @@ class Column:
)
return Column(getattr(self._jc, "as")(_to_seq(sc, list(alias))))
- name = copy_func(alias, sinceversion=2.0, doc=":func:`name` is an alias
for :func:`alias`.")
+ def name(self, *alias: str, **kwargs: Any) -> "Column":
+ """
+ :func:`name` is an alias for :func:`alias`.
+
+ .. versionadded:: 2.0.0
+ """
+ return self.alias(*alias, **kwargs)
def cast(self, dataType: Union[DataType, str]) -> "Column":
"""
@@ -1277,7 +1282,13 @@ class Column:
)
return Column(jc)
- astype = copy_func(cast, sinceversion=1.4, doc=":func:`astype` is an alias
for :func:`cast`.")
+ def astype(self, dataType: Union[DataType, str]) -> "Column":
+ """
+ :func:`astype` is an alias for :func:`cast`.
+
+ .. versionadded:: 1.4.0
+ """
+ return self.cast(dataType)
def between(
self,
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index c0c9b1e6ed33..8d2b4d041aa0 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -40,7 +40,7 @@ from typing import (
from py4j.java_gateway import JavaObject, JVMView
-from pyspark import copy_func, _NoValue
+from pyspark import _NoValue
from pyspark._globals import _NoValueType
from pyspark.context import SparkContext
from pyspark.errors import (
@@ -6780,22 +6780,42 @@ class DataFrame(PandasMapOpsMixin,
PandasConversionMixin):
"""
return list(self._jdf.inputFiles())
- where = copy_func(filter, sinceversion=1.3, doc=":func:`where` is an alias
for :func:`filter`.")
+ def where(self, condition: "ColumnOrName") -> "DataFrame":
+ """
+ :func:`where` is an alias for :func:`filter`.
+
+ .. versionadded:: 1.3.0
+ """
+ return self.filter(condition)
# Two aliases below were added for pandas compatibility many years ago.
# There are too many differences compared to pandas and we cannot just
# make it "compatible" by adding aliases. Therefore, we stop adding such
# aliases as of Spark 3.0. Two methods below remain just
# for legacy users currently.
- groupby = copy_func(
- groupBy, sinceversion=1.4, doc=":func:`groupby` is an alias for
:func:`groupBy`."
- )
+ @overload
+ def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
+ ...
- drop_duplicates = copy_func(
- dropDuplicates,
- sinceversion=1.4,
- doc=":func:`drop_duplicates` is an alias for :func:`dropDuplicates`.",
- )
+ @overload
+ def groupby(self, __cols: Union[List[Column], List[str], List[int]]) ->
"GroupedData":
+ ...
+
+ def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData": #
type: ignore[misc]
+ """
+ :func:`groupby` is an alias for :func:`groupBy`.
+
+ .. versionadded:: 1.4.0
+ """
+ return self.groupBy(*cols)
+
+ def drop_duplicates(self, subset: Optional[List[str]] = None) ->
"DataFrame":
+ """
+ :func:`drop_duplicates` is an alias for :func:`dropDuplicates`.
+
+ .. versionadded:: 1.4.0
+ """
+ return self.dropDuplicates(subset)
def writeTo(self, table: str) -> DataFrameWriterV2:
"""
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]