This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 69dc9b3b2b8 [SPARK-39714][PYTHON] Try to fix the mypy annotation tests
69dc9b3b2b8 is described below

commit 69dc9b3b2b816b919b313f0f36cbe5542da09dad
Author: bzhaoop <[email protected]>
AuthorDate: Wed Jul 13 09:44:53 2022 +0900

    [SPARK-39714][PYTHON] Try to fix the mypy annotation tests
    
    This patch is for resolving the mypy annotation tests. For making the
     annotation tests, we figure out all failure tests and fix them.
    
    ### What changes were proposed in this pull request?
    
    The changed files are all the errors raised when we execute 
dev/lint-python. All is related mypy annotation test failure.
    
    ### Why are the changes needed?
    
    We need to resolve them for avoiding break the common commit merge. This 
might improve the developer experience for the first insight of pyspark.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Once the PR merged, users/developers can exec `dev/lint-python` for testing.
    
    Closes #37117 from bzhaoopenstack/mypy-annotations-test.
    
    Lead-authored-by: bzhaoop <[email protected]>
    Co-authored-by: bzhao <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 python/pyspark/ml/util.py                       | 10 +++-------
 python/pyspark/ml/wrapper.py                    |  6 ++----
 python/pyspark/mllib/regression.py              | 10 +++-------
 python/pyspark/pandas/__init__.py               |  4 ++--
 python/pyspark/pandas/frame.py                  |  2 +-
 python/pyspark/pandas/generic.py                |  4 ++--
 python/pyspark/pandas/series.py                 |  2 +-
 python/pyspark/pandas/tests/test_categorical.py |  2 +-
 8 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 14e62ce6217..67aa2124b22 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -536,10 +536,8 @@ class DefaultParamsReader(MLReader[RL]):
         """
         parts = clazz.split(".")
         module = ".".join(parts[:-1])
-        m = __import__(module)
-        for comp in parts[1:]:
-            m = getattr(m, comp)
-        return m
+        m = __import__(module, fromlist=[parts[-1]])
+        return getattr(m, parts[-1])
 
     def load(self, path: str) -> RL:
         metadata = DefaultParamsReader.loadMetadata(path, self.sc)
@@ -696,9 +694,7 @@ class MetaAlgorithmReadWrite:
         elif isinstance(pyInstance, OneVsRest):
             pySubStages = [pyInstance.getClassifier()]
         elif isinstance(pyInstance, OneVsRestModel):
-            pySubStages = [
-                pyInstance.getClassifier()
-            ] + pyInstance.models  # type: ignore[assignment, operator]
+            pySubStages = [pyInstance.getClassifier()] + pyInstance.models  # 
type: ignore[operator]
         else:
             pySubStages = []
 
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index 7853e766244..39685ea631e 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -278,10 +278,8 @@ class JavaParams(JavaWrapper, Params, metaclass=ABCMeta):
             """
             parts = clazz.split(".")
             module = ".".join(parts[:-1])
-            m = __import__(module)
-            for comp in parts[1:]:
-                m = getattr(m, comp)
-            return m
+            m = __import__(module, fromlist=[parts[-1]])
+            return getattr(m, parts[-1])
 
         stage_name = 
java_stage.getClass().getName().replace("org.apache.spark", "pyspark")
         # Generate a default new instance from the stage_name class.
diff --git a/python/pyspark/mllib/regression.py 
b/python/pyspark/mllib/regression.py
index 4f7da0131f6..18f37b4a71a 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -279,12 +279,10 @@ def _regression_train_wrapper(
         weights, intercept, numFeatures, numClasses = train_func(
             data, _convert_to_vector(initial_weights)
         )
-        return modelClass(  # type: ignore[call-arg, return-value]
-            weights, intercept, numFeatures, numClasses
-        )
+        return modelClass(weights, intercept, numFeatures, numClasses)  # 
type: ignore[call-arg]
     else:
         weights, intercept = train_func(data, 
_convert_to_vector(initial_weights))
-        return modelClass(weights, intercept)  # type: ignore[call-arg, 
return-value]
+        return modelClass(weights, intercept)  # type: ignore[call-arg]
 
 
 class LinearRegressionWithSGD:
@@ -838,9 +836,7 @@ class IsotonicRegressionModel(Saveable, 
Loader["IsotonicRegressionModel"]):
         """
         if isinstance(x, RDD):
             return x.map(lambda v: self.predict(v))
-        return np.interp(
-            x, self.boundaries, self.predictions  # type: 
ignore[call-overload, arg-type]
-        )
+        return np.interp(x, self.boundaries, self.predictions)  # type: 
ignore[arg-type]
 
     @since("1.4.0")
     def save(self, sc: SparkContext, path: str) -> None:
diff --git a/python/pyspark/pandas/__init__.py 
b/python/pyspark/pandas/__init__.py
index 56a4f80a13c..e367ef5e252 100644
--- a/python/pyspark/pandas/__init__.py
+++ b/python/pyspark/pandas/__init__.py
@@ -136,12 +136,12 @@ def _auto_patch_pandas() -> None:
     if sys.version_info >= (3, 7):
         # Just in case pandas implements '__class_getitem__' later.
         if not _frame_has_class_getitem:
-            pd.DataFrame.__class_getitem__ = (  # type: 
ignore[assignment,attr-defined]
+            pd.DataFrame.__class_getitem__ = (  # type: ignore[attr-defined]
                 lambda params: DataFrame.__class_getitem__(params)
             )
 
         if not _series_has_class_getitem:
-            pd.Series.__class_getitem__ = (  # type: 
ignore[assignment,attr-defined]
+            pd.Series.__class_getitem__ = (  # type: ignore[attr-defined]
                 lambda params: Series.__class_getitem__(params)
             )
 
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index fc22f2f6989..8fc2b968118 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -9974,7 +9974,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
 
             non_existence_col = [idv for idv in id_vars if idv not in 
column_labels]
             if len(non_existence_col) != 0:
-                raveled_column_labels = np.ravel(column_labels)
+                raveled_column_labels: np.ndarray[Any, np.dtype[Any]] = 
np.ravel(column_labels)
                 missing = [
                     nec for nec in np.ravel(non_existence_col) if nec not in 
raveled_column_labels
                 ]
diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index dbfaedc9321..bd2b68da51f 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -3117,11 +3117,11 @@ class Frame(object, metaclass=ABCMeta):
         if isinstance(self, ps.Series):
             if indexes_increasing:
                 result = first_series(
-                    self.to_frame().loc[before:after]  # type: 
ignore[arg-type, assignment]
+                    self.to_frame().loc[before:after]  # type: ignore[arg-type]
                 ).rename(self.name)
             else:
                 result = first_series(
-                    self.to_frame().loc[after:before]  # type: 
ignore[arg-type,assignment]
+                    self.to_frame().loc[after:before]  # type: ignore[arg-type]
                 ).rename(self.name)
         elif isinstance(self, ps.DataFrame):
             if axis == 0:
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 838077ed7cd..306a2458bb4 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -4738,7 +4738,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
         ser_count = self.value_counts(dropna=dropna, sort=False)
         sdf_count = ser_count._internal.spark_frame
         most_value = ser_count.max()
-        sdf_most_value = sdf_count.filter("count == {}".format(most_value))
+        sdf_most_value = sdf_count.filter("count == 
{}".format(str(most_value)))
         sdf = sdf_most_value.select(
             F.col(SPARK_DEFAULT_INDEX_NAME).alias(SPARK_DEFAULT_SERIES_NAME)
         )
diff --git a/python/pyspark/pandas/tests/test_categorical.py 
b/python/pyspark/pandas/tests/test_categorical.py
index a4746cdda14..a939136aa54 100644
--- a/python/pyspark/pandas/tests/test_categorical.py
+++ b/python/pyspark/pandas/tests/test_categorical.py
@@ -433,7 +433,7 @@ class CategoricalTest(ComparisonTestBase, TestUtils):
 
         pdf, psdf = self.df_pair
 
-        def identity(x) -> ps.Series[psdf.b.dtype]:  # type: 
ignore[name-defined, no-untyped-def]
+        def identity(x) -> ps.Series[psdf.b.dtype]:  # type: 
ignore[name-defined]
             return x
 
         self.assert_eq(


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to