(spark) branch master updated: [SPARK-46984][PYTHON] Remove pyspark.copy_func

gurwls223 Tue, 06 Feb 2024 17:10:52 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 0bed57325de5 [SPARK-46984][PYTHON] Remove pyspark.copy_func
0bed57325de5 is described below

commit 0bed57325de591d388589f9ac75c54faeed53706
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Wed Feb 7 10:10:33 2024 +0900

    [SPARK-46984][PYTHON] Remove pyspark.copy_func
    
    ### What changes were proposed in this pull request?
    
    This PR removes `pyspark.copy_func` but just explicitly define the aliases.
    
    ### Why are the changes needed?
    
    - It needs more code than what it actually deduplicates.
    - For consistency with other cases.
    - Type hints can't be leveraged without this.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing unittests should cover.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #45042 from HyukjinKwon/SPARK-46984.
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/__init__.py      | 33 +--------------------------------
 python/pyspark/sql/column.py    | 17 ++++++++++++++---
 python/pyspark/sql/dataframe.py | 40 ++++++++++++++++++++++++++++++----------
 3 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index a9a2a3170256..a28144aedd70 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -47,8 +47,7 @@ Public classes:
 """
 
 from functools import wraps
-import types
-from typing import cast, Any, Callable, Optional, TypeVar, Union
+from typing import cast, Any, Callable, TypeVar, Union
 
 from pyspark.conf import SparkConf
 from pyspark.rdd import RDD, RDDBarrier
@@ -86,36 +85,6 @@ def since(version: Union[str, float]) -> Callable[[_F], _F]:
     return deco
 
 
-def copy_func(
-    f: _F,
-    name: Optional[str] = None,
-    sinceversion: Optional[Union[str, float]] = None,
-    doc: Optional[str] = None,
-) -> _F:
-    """
-    Returns a function with same code, globals, defaults, closure, and
-    name (or provide a new name).
-    """
-    # See
-    # 
http://stackoverflow.com/questions/6527633/how-can-i-make-a-deepcopy-of-a-function-in-python
-    assert isinstance(f, types.FunctionType)
-
-    fn = types.FunctionType(
-        f.__code__,
-        f.__globals__,
-        name or f.__name__,
-        f.__defaults__,
-        f.__closure__,
-    )
-    # in case f was given attrs (note this dict is a shallow copy):
-    fn.__dict__.update(f.__dict__)
-    if doc is not None:
-        fn.__doc__ = doc
-    if sinceversion is not None:
-        fn = since(sinceversion)(fn)
-    return cast(_F, fn)
-
-
 def keyword_only(func: _F) -> _F:
     """
     A decorator that forces keyword arguments in the wrapped method
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 5fa7fb3d42b0..4a7213593703 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -33,7 +33,6 @@ from typing import (
 
 from py4j.java_gateway import JavaObject, JVMView
 
-from pyspark import copy_func
 from pyspark.context import SparkContext
 from pyspark.errors import PySparkAttributeError, PySparkTypeError, 
PySparkValueError
 from pyspark.sql.types import DataType
@@ -1230,7 +1229,13 @@ class Column:
                 )
             return Column(getattr(self._jc, "as")(_to_seq(sc, list(alias))))
 
-    name = copy_func(alias, sinceversion=2.0, doc=":func:`name` is an alias 
for :func:`alias`.")
+    def name(self, *alias: str, **kwargs: Any) -> "Column":
+        """
+        :func:`name` is an alias for :func:`alias`.
+
+        .. versionadded:: 2.0.0
+        """
+        return self.alias(*alias, **kwargs)
 
     def cast(self, dataType: Union[DataType, str]) -> "Column":
         """
@@ -1277,7 +1282,13 @@ class Column:
             )
         return Column(jc)
 
-    astype = copy_func(cast, sinceversion=1.4, doc=":func:`astype` is an alias 
for :func:`cast`.")
+    def astype(self, dataType: Union[DataType, str]) -> "Column":
+        """
+        :func:`astype` is an alias for :func:`cast`.
+
+        .. versionadded:: 1.4.0
+        """
+        return self.cast(dataType)
 
     def between(
         self,
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index c0c9b1e6ed33..8d2b4d041aa0 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -40,7 +40,7 @@ from typing import (
 
 from py4j.java_gateway import JavaObject, JVMView
 
-from pyspark import copy_func, _NoValue
+from pyspark import _NoValue
 from pyspark._globals import _NoValueType
 from pyspark.context import SparkContext
 from pyspark.errors import (
@@ -6780,22 +6780,42 @@ class DataFrame(PandasMapOpsMixin, 
PandasConversionMixin):
         """
         return list(self._jdf.inputFiles())
 
-    where = copy_func(filter, sinceversion=1.3, doc=":func:`where` is an alias 
for :func:`filter`.")
+    def where(self, condition: "ColumnOrName") -> "DataFrame":
+        """
+        :func:`where` is an alias for :func:`filter`.
+
+        .. versionadded:: 1.3.0
+        """
+        return self.filter(condition)
 
     # Two aliases below were added for pandas compatibility many years ago.
     # There are too many differences compared to pandas and we cannot just
     # make it "compatible" by adding aliases. Therefore, we stop adding such
     # aliases as of Spark 3.0. Two methods below remain just
     # for legacy users currently.
-    groupby = copy_func(
-        groupBy, sinceversion=1.4, doc=":func:`groupby` is an alias for 
:func:`groupBy`."
-    )
+    @overload
+    def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
+        ...
 
-    drop_duplicates = copy_func(
-        dropDuplicates,
-        sinceversion=1.4,
-        doc=":func:`drop_duplicates` is an alias for :func:`dropDuplicates`.",
-    )
+    @overload
+    def groupby(self, __cols: Union[List[Column], List[str], List[int]]) -> 
"GroupedData":
+        ...
+
+    def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":  # 
type: ignore[misc]
+        """
+        :func:`groupby` is an alias for :func:`groupBy`.
+
+        .. versionadded:: 1.4.0
+        """
+        return self.groupBy(*cols)
+
+    def drop_duplicates(self, subset: Optional[List[str]] = None) -> 
"DataFrame":
+        """
+        :func:`drop_duplicates` is an alias for :func:`dropDuplicates`.
+
+        .. versionadded:: 1.4.0
+        """
+        return self.dropDuplicates(subset)
 
     def writeTo(self, table: str) -> DataFrameWriterV2:
         """


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-46984][PYTHON] Remove pyspark.copy_func

Reply via email to