This is an automated email from the ASF dual-hosted git repository.
zero323 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new b31f7bf [SPARK-37146][PYTHON] Inline type hints for
python/pyspark/__init__.py
b31f7bf is described below
commit b31f7bf4c102368b63b08cfe3d36cc1d8d128738
Author: dch nguyen <[email protected]>
AuthorDate: Tue Dec 14 23:10:02 2021 +0100
[SPARK-37146][PYTHON] Inline type hints for python/pyspark/__init__.py
### What changes were proposed in this pull request?
Inline type hints for python/pyspark/\_\_init\_\_.py
### Why are the changes needed?
We can take advantage of static type checking within the functions by
inlining the type hints.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Existing tests
Closes #34433 from dchvn/SPARK-37146.
Authored-by: dch nguyen <[email protected]>
Signed-off-by: zero323 <[email protected]>
---
python/pyspark/__init__.py | 33 +++++++++++++-----
python/pyspark/__init__.pyi | 77 -----------------------------------------
python/pyspark/sql/conf.py | 5 +--
python/pyspark/sql/context.py | 3 +-
python/pyspark/sql/dataframe.py | 11 ++++--
5 files changed, 38 insertions(+), 91 deletions(-)
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index aab95ad..e82817f 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -48,6 +48,7 @@ Public classes:
from functools import wraps
import types
+from typing import cast, Any, Callable, Optional, TypeVar, Union
from pyspark.conf import SparkConf
from pyspark.rdd import RDD, RDDBarrier
@@ -63,8 +64,11 @@ from pyspark.profiler import Profiler, BasicProfiler
from pyspark.version import __version__
from pyspark._globals import _NoValue # noqa: F401
+T = TypeVar("T")
+F = TypeVar("F", bound=Callable)
-def since(version):
+
+def since(version: Union[str, float]) -> Callable[[F], F]:
"""
A decorator that annotates a function to append the version of Spark the
function was added.
"""
@@ -72,7 +76,9 @@ def since(version):
indent_p = re.compile(r"\n( +)")
- def deco(f):
+ def deco(f: F) -> F:
+ assert f.__doc__ is not None
+
indents = indent_p.findall(f.__doc__)
indent = " " * (min(len(m) for m in indents) if indents else 0)
f.__doc__ = f.__doc__.rstrip() + "\n\n%s.. versionadded:: %s" %
(indent, version)
@@ -81,15 +87,26 @@ def since(version):
return deco
-def copy_func(f, name=None, sinceversion=None, doc=None):
+def copy_func(
+ f: F,
+ name: Optional[str] = None,
+ sinceversion: Optional[Union[str, float]] = None,
+ doc: Optional[str] = None,
+) -> F:
"""
Returns a function with same code, globals, defaults, closure, and
name (or provide a new name).
"""
# See
#
http://stackoverflow.com/questions/6527633/how-can-i-make-a-deepcopy-of-a-function-in-python
+ assert isinstance(f, types.FunctionType)
+
fn = types.FunctionType(
- f.__code__, f.__globals__, name or f.__name__, f.__defaults__,
f.__closure__
+ f.__code__,
+ f.__globals__,
+ name or f.__name__,
+ f.__defaults__,
+ f.__closure__,
)
# in case f was given attrs (note this dict is a shallow copy):
fn.__dict__.update(f.__dict__)
@@ -97,10 +114,10 @@ def copy_func(f, name=None, sinceversion=None, doc=None):
fn.__doc__ = doc
if sinceversion is not None:
fn = since(sinceversion)(fn)
- return fn
+ return cast(F, fn)
-def keyword_only(func):
+def keyword_only(func: F) -> F:
"""
A decorator that forces keyword arguments in the wrapped method
and saves actual input keyword arguments in `_input_kwargs`.
@@ -111,13 +128,13 @@ def keyword_only(func):
"""
@wraps(func)
- def wrapper(self, *args, **kwargs):
+ def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
if len(args) > 0:
raise TypeError("Method %s forces keyword arguments." %
func.__name__)
self._input_kwargs = kwargs
return func(self, **kwargs)
- return wrapper
+ return cast(F, wrapper)
# To avoid circular dependencies
diff --git a/python/pyspark/__init__.pyi b/python/pyspark/__init__.pyi
deleted file mode 100644
index fb045f2..0000000
--- a/python/pyspark/__init__.pyi
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Callable, Optional, TypeVar, Union
-
-from pyspark.accumulators import ( # noqa: F401
- Accumulator as Accumulator,
- AccumulatorParam as AccumulatorParam,
-)
-from pyspark.broadcast import Broadcast as Broadcast # noqa: F401
-from pyspark.conf import SparkConf as SparkConf # noqa: F401
-from pyspark.context import SparkContext as SparkContext # noqa: F401
-from pyspark.files import SparkFiles as SparkFiles # noqa: F401
-from pyspark.status import (
- StatusTracker as StatusTracker,
- SparkJobInfo as SparkJobInfo,
- SparkStageInfo as SparkStageInfo,
-) # noqa: F401
-from pyspark.profiler import ( # noqa: F401
- BasicProfiler as BasicProfiler,
- Profiler as Profiler,
-)
-from pyspark.rdd import RDD as RDD, RDDBarrier as RDDBarrier # noqa: F401
-from pyspark.serializers import ( # noqa: F401
- MarshalSerializer as MarshalSerializer,
- CPickleSerializer as CPickleSerializer,
-)
-from pyspark.status import ( # noqa: F401
- SparkJobInfo as SparkJobInfo,
- SparkStageInfo as SparkStageInfo,
- StatusTracker as StatusTracker,
-)
-from pyspark.storagelevel import StorageLevel as StorageLevel # noqa: F401
-from pyspark.taskcontext import ( # noqa: F401
- BarrierTaskContext as BarrierTaskContext,
- BarrierTaskInfo as BarrierTaskInfo,
- TaskContext as TaskContext,
-)
-from pyspark.util import (
- InheritableThread as InheritableThread, # noqa: F401
- inheritable_thread_target as inheritable_thread_target, # noqa: F401
-)
-from pyspark.version import __version__ as __version__
-
-# Compatibility imports
-from pyspark.sql import ( # noqa: F401
- SQLContext as SQLContext,
- HiveContext as HiveContext,
- Row as Row,
-)
-
-T = TypeVar("T")
-F = TypeVar("F", bound=Callable)
-
-def since(version: Union[str, float]) -> Callable[[T], T]: ...
-def copy_func(
- f: F,
- name: Optional[str] = ...,
- sinceversion: Optional[Union[str, float]] = ...,
- doc: Optional[str] = ...,
-) -> F: ...
-def keyword_only(func: F) -> F: ...
diff --git a/python/pyspark/sql/conf.py b/python/pyspark/sql/conf.py
index 9891ca7..8489e55 100644
--- a/python/pyspark/sql/conf.py
+++ b/python/pyspark/sql/conf.py
@@ -16,11 +16,12 @@
#
import sys
-from typing import Any, Optional
+from typing import Any, Optional, Union
from py4j.java_gateway import JavaObject # type: ignore[import]
from pyspark import since, _NoValue # type: ignore[attr-defined]
+from pyspark._globals import _NoValueType
class RuntimeConfig(object):
@@ -39,7 +40,7 @@ class RuntimeConfig(object):
self._jconf.set(key, value)
@since(2.0)
- def get(self, key: str, default: Optional[str] = _NoValue) -> str:
+ def get(self, key: str, default: Union[Optional[str], _NoValueType] =
_NoValue) -> str:
"""Returns the value of Spark runtime configuration property for the
given key,
assuming it is set.
"""
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index bf3d4fa..b64de05 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -35,6 +35,7 @@ from typing import (
from py4j.java_gateway import JavaObject # type: ignore[import]
from pyspark import since, _NoValue # type: ignore[attr-defined]
+from pyspark._globals import _NoValueType
from pyspark.sql.session import _monkey_patch_RDD, SparkSession
from pyspark.sql.dataframe import DataFrame
from pyspark.sql.readwriter import DataFrameReader
@@ -195,7 +196,7 @@ class SQLContext(object):
"""
self.sparkSession.conf.set(key, value) # type: ignore[arg-type]
- def getConf(self, key: str, defaultValue: Optional[str] = _NoValue) -> str:
+ def getConf(self, key: str, defaultValue: Union[Optional[str],
_NoValueType] = _NoValue) -> str:
"""Returns the value of Spark SQL configuration property for the given
key.
If the key is not set and defaultValue is set, return
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 160e7c3..e2574e5 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -41,6 +41,7 @@ from typing import (
from py4j.java_gateway import JavaObject # type: ignore[import]
from pyspark import copy_func, since, _NoValue # type: ignore[attr-defined]
+from pyspark._globals import _NoValueType
from pyspark.context import SparkContext
from pyspark.rdd import ( # type: ignore[attr-defined]
RDD,
@@ -2530,7 +2531,9 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
to_replace: Union[
"LiteralType", List["LiteralType"], Dict["LiteralType",
"OptionalPrimitiveType"]
],
- value: Optional[Union["OptionalPrimitiveType",
List["OptionalPrimitiveType"]]] = _NoValue,
+ value: Optional[
+ Union["OptionalPrimitiveType", List["OptionalPrimitiveType"],
_NoValueType]
+ ] = _NoValue,
subset: Optional[List[str]] = None,
) -> "DataFrame":
"""Returns a new :class:`DataFrame` replacing a value with another
value.
@@ -2673,7 +2676,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
else:
if isinstance(value, (float, int, str)) or value is None:
value = [value for _ in range(len(to_replace))]
- rep_dict = dict(zip(to_replace, value))
+ rep_dict = dict(zip(to_replace,
cast("Iterable[Optional[Union[float, str]]]", value)))
if isinstance(subset, str):
subset = [subset]
@@ -3343,7 +3346,9 @@ class DataFrameNaFunctions(object):
def replace( # type: ignore[misc]
self,
to_replace: Union[List["LiteralType"], Dict["LiteralType",
"OptionalPrimitiveType"]],
- value: Optional[Union["OptionalPrimitiveType",
List["OptionalPrimitiveType"]]] = _NoValue,
+ value: Optional[
+ Union["OptionalPrimitiveType", List["OptionalPrimitiveType"],
_NoValueType]
+ ] = _NoValue,
subset: Optional[List[str]] = None,
) -> DataFrame:
return self.df.replace(to_replace, value, subset) # type:
ignore[arg-type]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]