This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 69dc9b3b2b8 [SPARK-39714][PYTHON] Try to fix the mypy annotation tests
69dc9b3b2b8 is described below
commit 69dc9b3b2b816b919b313f0f36cbe5542da09dad
Author: bzhaoop <[email protected]>
AuthorDate: Wed Jul 13 09:44:53 2022 +0900
[SPARK-39714][PYTHON] Try to fix the mypy annotation tests
This patch is for resolving the mypy annotation tests. For making the
annotation tests, we figure out all failure tests and fix them.
### What changes were proposed in this pull request?
The changed files are all the errors raised when we execute
dev/lint-python. All is related mypy annotation test failure.
### Why are the changes needed?
We need to resolve them for avoiding break the common commit merge. This
might improve the developer experience for the first insight of pyspark.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Once the PR merged, users/developers can exec `dev/lint-python` for testing.
Closes #37117 from bzhaoopenstack/mypy-annotations-test.
Lead-authored-by: bzhaoop <[email protected]>
Co-authored-by: bzhao <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/ml/util.py | 10 +++-------
python/pyspark/ml/wrapper.py | 6 ++----
python/pyspark/mllib/regression.py | 10 +++-------
python/pyspark/pandas/__init__.py | 4 ++--
python/pyspark/pandas/frame.py | 2 +-
python/pyspark/pandas/generic.py | 4 ++--
python/pyspark/pandas/series.py | 2 +-
python/pyspark/pandas/tests/test_categorical.py | 2 +-
8 files changed, 15 insertions(+), 25 deletions(-)
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 14e62ce6217..67aa2124b22 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -536,10 +536,8 @@ class DefaultParamsReader(MLReader[RL]):
"""
parts = clazz.split(".")
module = ".".join(parts[:-1])
- m = __import__(module)
- for comp in parts[1:]:
- m = getattr(m, comp)
- return m
+ m = __import__(module, fromlist=[parts[-1]])
+ return getattr(m, parts[-1])
def load(self, path: str) -> RL:
metadata = DefaultParamsReader.loadMetadata(path, self.sc)
@@ -696,9 +694,7 @@ class MetaAlgorithmReadWrite:
elif isinstance(pyInstance, OneVsRest):
pySubStages = [pyInstance.getClassifier()]
elif isinstance(pyInstance, OneVsRestModel):
- pySubStages = [
- pyInstance.getClassifier()
- ] + pyInstance.models # type: ignore[assignment, operator]
+ pySubStages = [pyInstance.getClassifier()] + pyInstance.models #
type: ignore[operator]
else:
pySubStages = []
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index 7853e766244..39685ea631e 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -278,10 +278,8 @@ class JavaParams(JavaWrapper, Params, metaclass=ABCMeta):
"""
parts = clazz.split(".")
module = ".".join(parts[:-1])
- m = __import__(module)
- for comp in parts[1:]:
- m = getattr(m, comp)
- return m
+ m = __import__(module, fromlist=[parts[-1]])
+ return getattr(m, parts[-1])
stage_name =
java_stage.getClass().getName().replace("org.apache.spark", "pyspark")
# Generate a default new instance from the stage_name class.
diff --git a/python/pyspark/mllib/regression.py
b/python/pyspark/mllib/regression.py
index 4f7da0131f6..18f37b4a71a 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -279,12 +279,10 @@ def _regression_train_wrapper(
weights, intercept, numFeatures, numClasses = train_func(
data, _convert_to_vector(initial_weights)
)
- return modelClass( # type: ignore[call-arg, return-value]
- weights, intercept, numFeatures, numClasses
- )
+ return modelClass(weights, intercept, numFeatures, numClasses) #
type: ignore[call-arg]
else:
weights, intercept = train_func(data,
_convert_to_vector(initial_weights))
- return modelClass(weights, intercept) # type: ignore[call-arg,
return-value]
+ return modelClass(weights, intercept) # type: ignore[call-arg]
class LinearRegressionWithSGD:
@@ -838,9 +836,7 @@ class IsotonicRegressionModel(Saveable,
Loader["IsotonicRegressionModel"]):
"""
if isinstance(x, RDD):
return x.map(lambda v: self.predict(v))
- return np.interp(
- x, self.boundaries, self.predictions # type:
ignore[call-overload, arg-type]
- )
+ return np.interp(x, self.boundaries, self.predictions) # type:
ignore[arg-type]
@since("1.4.0")
def save(self, sc: SparkContext, path: str) -> None:
diff --git a/python/pyspark/pandas/__init__.py
b/python/pyspark/pandas/__init__.py
index 56a4f80a13c..e367ef5e252 100644
--- a/python/pyspark/pandas/__init__.py
+++ b/python/pyspark/pandas/__init__.py
@@ -136,12 +136,12 @@ def _auto_patch_pandas() -> None:
if sys.version_info >= (3, 7):
# Just in case pandas implements '__class_getitem__' later.
if not _frame_has_class_getitem:
- pd.DataFrame.__class_getitem__ = ( # type:
ignore[assignment,attr-defined]
+ pd.DataFrame.__class_getitem__ = ( # type: ignore[attr-defined]
lambda params: DataFrame.__class_getitem__(params)
)
if not _series_has_class_getitem:
- pd.Series.__class_getitem__ = ( # type:
ignore[assignment,attr-defined]
+ pd.Series.__class_getitem__ = ( # type: ignore[attr-defined]
lambda params: Series.__class_getitem__(params)
)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index fc22f2f6989..8fc2b968118 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -9974,7 +9974,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
non_existence_col = [idv for idv in id_vars if idv not in
column_labels]
if len(non_existence_col) != 0:
- raveled_column_labels = np.ravel(column_labels)
+ raveled_column_labels: np.ndarray[Any, np.dtype[Any]] =
np.ravel(column_labels)
missing = [
nec for nec in np.ravel(non_existence_col) if nec not in
raveled_column_labels
]
diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index dbfaedc9321..bd2b68da51f 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -3117,11 +3117,11 @@ class Frame(object, metaclass=ABCMeta):
if isinstance(self, ps.Series):
if indexes_increasing:
result = first_series(
- self.to_frame().loc[before:after] # type:
ignore[arg-type, assignment]
+ self.to_frame().loc[before:after] # type: ignore[arg-type]
).rename(self.name)
else:
result = first_series(
- self.to_frame().loc[after:before] # type:
ignore[arg-type,assignment]
+ self.to_frame().loc[after:before] # type: ignore[arg-type]
).rename(self.name)
elif isinstance(self, ps.DataFrame):
if axis == 0:
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 838077ed7cd..306a2458bb4 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -4738,7 +4738,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
ser_count = self.value_counts(dropna=dropna, sort=False)
sdf_count = ser_count._internal.spark_frame
most_value = ser_count.max()
- sdf_most_value = sdf_count.filter("count == {}".format(most_value))
+ sdf_most_value = sdf_count.filter("count ==
{}".format(str(most_value)))
sdf = sdf_most_value.select(
F.col(SPARK_DEFAULT_INDEX_NAME).alias(SPARK_DEFAULT_SERIES_NAME)
)
diff --git a/python/pyspark/pandas/tests/test_categorical.py
b/python/pyspark/pandas/tests/test_categorical.py
index a4746cdda14..a939136aa54 100644
--- a/python/pyspark/pandas/tests/test_categorical.py
+++ b/python/pyspark/pandas/tests/test_categorical.py
@@ -433,7 +433,7 @@ class CategoricalTest(ComparisonTestBase, TestUtils):
pdf, psdf = self.df_pair
- def identity(x) -> ps.Series[psdf.b.dtype]: # type:
ignore[name-defined, no-untyped-def]
+ def identity(x) -> ps.Series[psdf.b.dtype]: # type:
ignore[name-defined]
return x
self.assert_eq(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]