init.py

zero323 Tue, 14 Dec 2021 14:11:32 -0800

This is an automated email from the ASF dual-hosted git repository.

zero323 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new b31f7bf  [SPARK-37146][PYTHON] Inline type hints for 
python/pyspark/__init__.py
b31f7bf is described below

commit b31f7bf4c102368b63b08cfe3d36cc1d8d128738
Author: dch nguyen <[email protected]>
AuthorDate: Tue Dec 14 23:10:02 2021 +0100

    [SPARK-37146][PYTHON] Inline type hints for python/pyspark/__init__.py
    
    ### What changes were proposed in this pull request?
    Inline type hints for python/pyspark/\_\_init\_\_.py
    
    ### Why are the changes needed?
    We can take advantage of static type checking within the functions by 
inlining the type hints.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Existing tests
    
    Closes #34433 from dchvn/SPARK-37146.
    
    Authored-by: dch nguyen <[email protected]>
    Signed-off-by: zero323 <[email protected]>
---
 python/pyspark/__init__.py      | 33 +++++++++++++-----
 python/pyspark/__init__.pyi     | 77 -----------------------------------------
 python/pyspark/sql/conf.py      |  5 +--
 python/pyspark/sql/context.py   |  3 +-
 python/pyspark/sql/dataframe.py | 11 ++++--
 5 files changed, 38 insertions(+), 91 deletions(-)

diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index aab95ad..e82817f 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -48,6 +48,7 @@ Public classes:
 
 from functools import wraps
 import types
+from typing import cast, Any, Callable, Optional, TypeVar, Union
 
 from pyspark.conf import SparkConf
 from pyspark.rdd import RDD, RDDBarrier
@@ -63,8 +64,11 @@ from pyspark.profiler import Profiler, BasicProfiler
 from pyspark.version import __version__
 from pyspark._globals import _NoValue  # noqa: F401
 
+T = TypeVar("T")
+F = TypeVar("F", bound=Callable)
 
-def since(version):
+
+def since(version: Union[str, float]) -> Callable[[F], F]:
     """
     A decorator that annotates a function to append the version of Spark the 
function was added.
     """
@@ -72,7 +76,9 @@ def since(version):
 
     indent_p = re.compile(r"\n( +)")
 
-    def deco(f):
+    def deco(f: F) -> F:
+        assert f.__doc__ is not None
+
         indents = indent_p.findall(f.__doc__)
         indent = " " * (min(len(m) for m in indents) if indents else 0)
         f.__doc__ = f.__doc__.rstrip() + "\n\n%s.. versionadded:: %s" % 
(indent, version)
@@ -81,15 +87,26 @@ def since(version):
     return deco
 
 
-def copy_func(f, name=None, sinceversion=None, doc=None):
+def copy_func(
+    f: F,
+    name: Optional[str] = None,
+    sinceversion: Optional[Union[str, float]] = None,
+    doc: Optional[str] = None,
+) -> F:
     """
     Returns a function with same code, globals, defaults, closure, and
     name (or provide a new name).
     """
     # See
     # 
http://stackoverflow.com/questions/6527633/how-can-i-make-a-deepcopy-of-a-function-in-python
+    assert isinstance(f, types.FunctionType)
+
     fn = types.FunctionType(
-        f.__code__, f.__globals__, name or f.__name__, f.__defaults__, 
f.__closure__
+        f.__code__,
+        f.__globals__,
+        name or f.__name__,
+        f.__defaults__,
+        f.__closure__,
     )
     # in case f was given attrs (note this dict is a shallow copy):
     fn.__dict__.update(f.__dict__)
@@ -97,10 +114,10 @@ def copy_func(f, name=None, sinceversion=None, doc=None):
         fn.__doc__ = doc
     if sinceversion is not None:
         fn = since(sinceversion)(fn)
-    return fn
+    return cast(F, fn)
 
 
-def keyword_only(func):
+def keyword_only(func: F) -> F:
     """
     A decorator that forces keyword arguments in the wrapped method
     and saves actual input keyword arguments in `_input_kwargs`.
@@ -111,13 +128,13 @@ def keyword_only(func):
     """
 
     @wraps(func)
-    def wrapper(self, *args, **kwargs):
+    def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
         if len(args) > 0:
             raise TypeError("Method %s forces keyword arguments." % 
func.__name__)
         self._input_kwargs = kwargs
         return func(self, **kwargs)
 
-    return wrapper
+    return cast(F, wrapper)
 
 
 # To avoid circular dependencies
diff --git a/python/pyspark/__init__.pyi b/python/pyspark/__init__.pyi
deleted file mode 100644
index fb045f2..0000000
--- a/python/pyspark/__init__.pyi
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Callable, Optional, TypeVar, Union
-
-from pyspark.accumulators import (  # noqa: F401
-    Accumulator as Accumulator,
-    AccumulatorParam as AccumulatorParam,
-)
-from pyspark.broadcast import Broadcast as Broadcast  # noqa: F401
-from pyspark.conf import SparkConf as SparkConf  # noqa: F401
-from pyspark.context import SparkContext as SparkContext  # noqa: F401
-from pyspark.files import SparkFiles as SparkFiles  # noqa: F401
-from pyspark.status import (
-    StatusTracker as StatusTracker,
-    SparkJobInfo as SparkJobInfo,
-    SparkStageInfo as SparkStageInfo,
-)  # noqa: F401
-from pyspark.profiler import (  # noqa: F401
-    BasicProfiler as BasicProfiler,
-    Profiler as Profiler,
-)
-from pyspark.rdd import RDD as RDD, RDDBarrier as RDDBarrier  # noqa: F401
-from pyspark.serializers import (  # noqa: F401
-    MarshalSerializer as MarshalSerializer,
-    CPickleSerializer as CPickleSerializer,
-)
-from pyspark.status import (  # noqa: F401
-    SparkJobInfo as SparkJobInfo,
-    SparkStageInfo as SparkStageInfo,
-    StatusTracker as StatusTracker,
-)
-from pyspark.storagelevel import StorageLevel as StorageLevel  # noqa: F401
-from pyspark.taskcontext import (  # noqa: F401
-    BarrierTaskContext as BarrierTaskContext,
-    BarrierTaskInfo as BarrierTaskInfo,
-    TaskContext as TaskContext,
-)
-from pyspark.util import (
-    InheritableThread as InheritableThread,  # noqa: F401
-    inheritable_thread_target as inheritable_thread_target,  # noqa: F401
-)
-from pyspark.version import __version__ as __version__
-
-# Compatibility imports
-from pyspark.sql import (  # noqa: F401
-    SQLContext as SQLContext,
-    HiveContext as HiveContext,
-    Row as Row,
-)
-
-T = TypeVar("T")
-F = TypeVar("F", bound=Callable)
-
-def since(version: Union[str, float]) -> Callable[[T], T]: ...
-def copy_func(
-    f: F,
-    name: Optional[str] = ...,
-    sinceversion: Optional[Union[str, float]] = ...,
-    doc: Optional[str] = ...,
-) -> F: ...
-def keyword_only(func: F) -> F: ...
diff --git a/python/pyspark/sql/conf.py b/python/pyspark/sql/conf.py
index 9891ca7..8489e55 100644
--- a/python/pyspark/sql/conf.py
+++ b/python/pyspark/sql/conf.py
@@ -16,11 +16,12 @@
 #
 
 import sys
-from typing import Any, Optional
+from typing import Any, Optional, Union
 
 from py4j.java_gateway import JavaObject  # type: ignore[import]
 
 from pyspark import since, _NoValue  # type: ignore[attr-defined]
+from pyspark._globals import _NoValueType
 
 
 class RuntimeConfig(object):
@@ -39,7 +40,7 @@ class RuntimeConfig(object):
         self._jconf.set(key, value)
 
     @since(2.0)
-    def get(self, key: str, default: Optional[str] = _NoValue) -> str:
+    def get(self, key: str, default: Union[Optional[str], _NoValueType] = 
_NoValue) -> str:
         """Returns the value of Spark runtime configuration property for the 
given key,
         assuming it is set.
         """
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index bf3d4fa..b64de05 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -35,6 +35,7 @@ from typing import (
 from py4j.java_gateway import JavaObject  # type: ignore[import]
 
 from pyspark import since, _NoValue  # type: ignore[attr-defined]
+from pyspark._globals import _NoValueType
 from pyspark.sql.session import _monkey_patch_RDD, SparkSession
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.readwriter import DataFrameReader
@@ -195,7 +196,7 @@ class SQLContext(object):
         """
         self.sparkSession.conf.set(key, value)  # type: ignore[arg-type]
 
-    def getConf(self, key: str, defaultValue: Optional[str] = _NoValue) -> str:
+    def getConf(self, key: str, defaultValue: Union[Optional[str], 
_NoValueType] = _NoValue) -> str:
         """Returns the value of Spark SQL configuration property for the given 
key.
 
         If the key is not set and defaultValue is set, return
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 160e7c3..e2574e5 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -41,6 +41,7 @@ from typing import (
 from py4j.java_gateway import JavaObject  # type: ignore[import]
 
 from pyspark import copy_func, since, _NoValue  # type: ignore[attr-defined]
+from pyspark._globals import _NoValueType
 from pyspark.context import SparkContext
 from pyspark.rdd import (  # type: ignore[attr-defined]
     RDD,
@@ -2530,7 +2531,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
         to_replace: Union[
             "LiteralType", List["LiteralType"], Dict["LiteralType", 
"OptionalPrimitiveType"]
         ],
-        value: Optional[Union["OptionalPrimitiveType", 
List["OptionalPrimitiveType"]]] = _NoValue,
+        value: Optional[
+            Union["OptionalPrimitiveType", List["OptionalPrimitiveType"], 
_NoValueType]
+        ] = _NoValue,
         subset: Optional[List[str]] = None,
     ) -> "DataFrame":
         """Returns a new :class:`DataFrame` replacing a value with another 
value.
@@ -2673,7 +2676,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
         else:
             if isinstance(value, (float, int, str)) or value is None:
                 value = [value for _ in range(len(to_replace))]
-            rep_dict = dict(zip(to_replace, value))
+            rep_dict = dict(zip(to_replace, 
cast("Iterable[Optional[Union[float, str]]]", value)))
 
         if isinstance(subset, str):
             subset = [subset]
@@ -3343,7 +3346,9 @@ class DataFrameNaFunctions(object):
     def replace(  # type: ignore[misc]
         self,
         to_replace: Union[List["LiteralType"], Dict["LiteralType", 
"OptionalPrimitiveType"]],
-        value: Optional[Union["OptionalPrimitiveType", 
List["OptionalPrimitiveType"]]] = _NoValue,
+        value: Optional[
+            Union["OptionalPrimitiveType", List["OptionalPrimitiveType"], 
_NoValueType]
+        ] = _NoValue,
         subset: Optional[List[str]] = None,
     ) -> DataFrame:
         return self.df.replace(to_replace, value, subset)  # type: 
ignore[arg-type]

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-37146][PYTHON] Inline type hints for python/pyspark/__init__.py

Reply via email to

[spark] branch master updated: [SPARK-37146][PYTHON] Inline type hints for python/pyspark/init.py