This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 52c43ca8c8d0 [SPARK-55480][PYTHON] Remove all unused noqa for ruff
52c43ca8c8d0 is described below
commit 52c43ca8c8d01ac99f2276675aac1aa14927cecd
Author: Tian Gao <[email protected]>
AuthorDate: Wed Feb 11 20:25:13 2026 +0800
[SPARK-55480][PYTHON] Remove all unused noqa for ruff
### What changes were proposed in this pull request?
Removed all unused `# noqa` comments for linter
### Why are the changes needed?
We accumulated years of `# noqa` comments for linter. A lot of them are
unused (because linter gets smarter). We removed all the unused ones and make
`ruff` check for unused noqa in the future - just like `mypy` did.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
CI, but this should be a comment only change.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #54264 from gaogaotiantian/remove-unused-noqa.
Authored-by: Tian Gao <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
dev/sparktestsupport/modules.py | 6 ++---
pyproject.toml | 3 ++-
python/pyspark/accumulators.py | 2 +-
python/pyspark/errors/__init__.py | 2 +-
python/pyspark/errors/exceptions/tblib.py | 4 +---
python/pyspark/errors_doc_gen.py | 2 +-
python/pyspark/logger/__init__.py | 2 +-
python/pyspark/ml/regression.py | 4 ++--
python/pyspark/mllib/_typing.pyi | 2 +-
python/pyspark/pandas/plot/__init__.py | 2 +-
.../pandas/tests/computation/test_apply_func.py | 6 ++---
.../connect/indexes/test_parity_indexing_adv.py | 2 +-
.../pandas/tests/groupby/test_apply_func.py | 2 +-
python/pyspark/pandas/typedef/__init__.py | 2 +-
python/pyspark/pandas/utils.py | 2 +-
python/pyspark/sql/connect/client/__init__.py | 2 +-
python/pyspark/sql/connect/functions/__init__.py | 4 ++--
python/pyspark/sql/connect/plan.py | 2 +-
python/pyspark/sql/connect/session.py | 2 +-
python/pyspark/sql/context.py | 2 +-
python/pyspark/sql/functions/__init__.py | 4 ++--
python/pyspark/sql/functions/builtin.py | 26 +++++++++++-----------
python/pyspark/sql/plot/__init__.py | 2 +-
python/pyspark/sql/readwriter.py | 6 ++---
python/pyspark/sql/streaming/__init__.py | 6 ++---
python/pyspark/sql/streaming/listener.py | 2 +-
python/pyspark/sql/streaming/query.py | 2 +-
python/pyspark/sql/tests/arrow/test_arrow.py | 2 +-
.../sql/tests/connect/client/test_reattach.py | 2 +-
.../sql/tests/connect/test_connect_collection.py | 14 ++++++------
...t_pandas_transform_with_state_state_variable.py | 2 +-
python/pyspark/sql/tests/pandas/test_pandas_udf.py | 2 +-
.../tests/pandas/test_pandas_udf_grouped_agg.py | 2 +-
python/pyspark/util.py | 4 ++--
python/run-tests.py | 4 ++--
35 files changed, 67 insertions(+), 68 deletions(-)
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 44977d79d89c..861c5d65c5c7 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -674,7 +674,7 @@ pyspark_structured_streaming = Module(
"pyspark.sql.tests.pandas.streaming.test_pandas_transform_with_state",
"pyspark.sql.tests.pandas.streaming.test_pandas_transform_with_state_checkpoint_v2",
"pyspark.sql.tests.pandas.streaming.test_pandas_transform_with_state_state_variable",
-
"pyspark.sql.tests.pandas.streaming.test_pandas_transform_with_state_state_variable_checkpoint_v2",
# noqa: E501
+
"pyspark.sql.tests.pandas.streaming.test_pandas_transform_with_state_state_variable_checkpoint_v2",
"pyspark.sql.tests.pandas.streaming.test_transform_with_state",
"pyspark.sql.tests.pandas.streaming.test_transform_with_state_checkpoint_v2",
"pyspark.sql.tests.pandas.streaming.test_transform_with_state_state_variable",
@@ -1215,9 +1215,9 @@ pyspark_structured_streaming_connect = Module(
"pyspark.sql.tests.connect.streaming.test_parity_foreach_batch",
"pyspark.sql.tests.connect.pandas.streaming.test_parity_pandas_grouped_map_with_state",
"pyspark.sql.tests.connect.pandas.streaming.test_parity_pandas_transform_with_state",
-
"pyspark.sql.tests.connect.pandas.streaming.test_parity_pandas_transform_with_state_state_variable",
# noqa: E501
+
"pyspark.sql.tests.connect.pandas.streaming.test_parity_pandas_transform_with_state_state_variable",
"pyspark.sql.tests.connect.pandas.streaming.test_parity_transform_with_state",
-
"pyspark.sql.tests.connect.pandas.streaming.test_parity_transform_with_state_state_variable",
# noqa: E501
+
"pyspark.sql.tests.connect.pandas.streaming.test_parity_transform_with_state_state_variable",
],
excluded_python_implementations=[
"PyPy" # Skip these tests under PyPy since they require numpy and it
isn't available there
diff --git a/pyproject.toml b/pyproject.toml
index 3609ec593d77..08ced23191aa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,7 +38,8 @@ exclude = [
[tool.ruff.lint]
extend-select = [
- "G010" # logging-warn
+ "G010", # logging-warn
+ "RUF100", # unused-noqa
]
ignore = [
"E402", # Module top level import is disabled for optional import check,
etc.
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 4465bde9bbfb..39791f4bbd01 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -27,7 +27,7 @@ from pyspark.serializers import read_int, CPickleSerializer
from pyspark.errors import PySparkRuntimeError
if TYPE_CHECKING:
- from pyspark._typing import SupportsIAdd # noqa: F401
+ from pyspark._typing import SupportsIAdd
import socketserver.BaseRequestHandler # type: ignore[import-not-found]
diff --git a/python/pyspark/errors/__init__.py
b/python/pyspark/errors/__init__.py
index c20b057b74e6..3293f1fe699b 100644
--- a/python/pyspark/errors/__init__.py
+++ b/python/pyspark/errors/__init__.py
@@ -18,7 +18,7 @@
"""
PySpark exceptions.
"""
-from pyspark.errors.exceptions.base import ( # noqa: F401
+from pyspark.errors.exceptions.base import (
PySparkException,
AnalysisException,
SessionNotSameException,
diff --git a/python/pyspark/errors/exceptions/tblib.py
b/python/pyspark/errors/exceptions/tblib.py
index b444f0fb45d3..6cb4c79cb1f5 100644
--- a/python/pyspark/errors/exceptions/tblib.py
+++ b/python/pyspark/errors/exceptions/tblib.py
@@ -206,9 +206,7 @@ class Traceback:
# noinspection PyBroadException
try:
- exec(
- code, dict(current.tb_frame.f_globals),
dict(current.tb_frame.f_locals)
- ) # noqa: S102
+ exec(code, dict(current.tb_frame.f_globals),
dict(current.tb_frame.f_locals))
except Exception:
next_tb = sys.exc_info()[2].tb_next # type: ignore
if top_tb is None:
diff --git a/python/pyspark/errors_doc_gen.py b/python/pyspark/errors_doc_gen.py
index e1bd94dcec4d..53b8b8d1e12f 100644
--- a/python/pyspark/errors_doc_gen.py
+++ b/python/pyspark/errors_doc_gen.py
@@ -44,7 +44,7 @@ Error classes in PySpark
This is a list of common, named error classes returned by PySpark which are
defined at `error-conditions.json
<https://github.com/apache/spark/blob/master/python/pyspark/errors/error-conditions.json>`_.
When writing PySpark errors, developers must use an error class from the list.
If an appropriate error class is not available, add a new one into the list.
For more information, please refer to `Contributing Error and Exception
<contributing.rst#contributing-error-and-exception>`_.
-""" # noqa
+"""
with open(output_rst_file_path, "w") as f:
f.write(header + "\n\n")
for error_key, error_details in ERROR_CLASSES_MAP.items():
diff --git a/python/pyspark/logger/__init__.py
b/python/pyspark/logger/__init__.py
index 9e629971f0cb..3e5034cfe248 100644
--- a/python/pyspark/logger/__init__.py
+++ b/python/pyspark/logger/__init__.py
@@ -18,6 +18,6 @@
"""
PySpark logging
"""
-from pyspark.logger.logger import PySparkLogger, SPARK_LOG_SCHEMA # noqa: F401
+from pyspark.logger.logger import PySparkLogger, SPARK_LOG_SCHEMA
__all__ = ["PySparkLogger", "SPARK_LOG_SCHEMA"]
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index e67b5d240c4d..3873d83b47c8 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -2180,7 +2180,7 @@ class AFTSurvivalRegression(
0.9,
0.95,
0.99,
- ], # noqa: B005
+ ],
quantilesCol: Optional[str] = None,
aggregationDepth: int = 2,
maxBlockSizeInMB: float = 0.0,
@@ -2220,7 +2220,7 @@ class AFTSurvivalRegression(
0.9,
0.95,
0.99,
- ], # noqa: B005
+ ],
quantilesCol: Optional[str] = None,
aggregationDepth: int = 2,
maxBlockSizeInMB: float = 0.0,
diff --git a/python/pyspark/mllib/_typing.pyi b/python/pyspark/mllib/_typing.pyi
index d34cfc84c7ae..c5af46eb1d60 100644
--- a/python/pyspark/mllib/_typing.pyi
+++ b/python/pyspark/mllib/_typing.pyi
@@ -19,7 +19,7 @@
from typing import List, Tuple, TYPE_CHECKING, TypeVar, Union
from typing_extensions import Literal
-from numpy import ndarray # noqa: F401
+from numpy import ndarray
from py4j.java_gateway import JavaObject
from pyspark.mllib.linalg import Vector
diff --git a/python/pyspark/pandas/plot/__init__.py
b/python/pyspark/pandas/plot/__init__.py
index d00e002266eb..a2dc5adfb956 100644
--- a/python/pyspark/pandas/plot/__init__.py
+++ b/python/pyspark/pandas/plot/__init__.py
@@ -14,4 +14,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.pandas.plot.core import * # noqa: F401,F403
+from pyspark.pandas.plot.core import * # noqa: F403
diff --git a/python/pyspark/pandas/tests/computation/test_apply_func.py
b/python/pyspark/pandas/tests/computation/test_apply_func.py
index e19b5e5748f5..d436fc1f50d2 100644
--- a/python/pyspark/pandas/tests/computation/test_apply_func.py
+++ b/python/pyspark/pandas/tests/computation/test_apply_func.py
@@ -154,7 +154,7 @@ class FrameApplyFunctionMixin:
self.assert_eq(sorted(actual["c0"].to_numpy()),
sorted(expected["a"].to_numpy()))
self.assert_eq(sorted(actual["c1"].to_numpy()),
sorted(expected["b"].to_numpy()))
- def identify2(x) -> ps.DataFrame[slice("a", int), slice("b", int)]: #
noqa: F405
+ def identify2(x) -> ps.DataFrame[slice("a", int), slice("b", int)]:
return x
actual = psdf.apply(identify2, axis=1)
@@ -229,7 +229,7 @@ class FrameApplyFunctionMixin:
self.assert_eq(sorted(actual["c0"].to_numpy()),
sorted(expected["a"].to_numpy()))
self.assert_eq(sorted(actual["c1"].to_numpy()),
sorted(expected["b"].to_numpy()))
- def identify2(x) -> ps.DataFrame[slice("a", int), slice("b", int)]: #
noqa: F405
+ def identify2(x) -> ps.DataFrame[slice("a", int), slice("b", int)]:
return x
actual = psdf.pandas_on_spark.apply_batch(identify2)
@@ -425,7 +425,7 @@ class FrameApplyFunctionMixin:
self.assert_eq(sorted(actual["c0"].to_numpy()),
sorted(expected["a"].to_numpy()))
self.assert_eq(sorted(actual["c1"].to_numpy()),
sorted(expected["b"].to_numpy()))
- def identify2(x) -> ps.DataFrame[slice("a", int), slice("b", int)]: #
noqa: F405
+ def identify2(x) -> ps.DataFrame[slice("a", int), slice("b", int)]:
return x
actual = psdf.pandas_on_spark.transform_batch(identify2)
diff --git
a/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_adv.py
b/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_adv.py
index 4560db23a7b5..eaafe9f82448 100644
--- a/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_adv.py
+++ b/python/pyspark/pandas/tests/connect/indexes/test_parity_indexing_adv.py
@@ -29,7 +29,7 @@ class IndexingAdvParityTests(
if __name__ == "__main__":
- from pyspark.pandas.tests.connect.indexes.test_parity_indexing import * #
noqa: F401
+ from pyspark.pandas.tests.connect.indexes.test_parity_indexing import *
from pyspark.testing import main
diff --git a/python/pyspark/pandas/tests/groupby/test_apply_func.py
b/python/pyspark/pandas/tests/groupby/test_apply_func.py
index f5c0b2ad9e54..25cc21423f32 100644
--- a/python/pyspark/pandas/tests/groupby/test_apply_func.py
+++ b/python/pyspark/pandas/tests/groupby/test_apply_func.py
@@ -145,7 +145,7 @@ class GroupbyApplyFuncMixin:
def add_max2(
x,
- ) -> ps.DataFrame[slice("a", int), slice("b", int), slice("c", int)]:
# noqa: F405
+ ) -> ps.DataFrame[slice("a", int), slice("b", int), slice("c", int)]:
return x + x.min()
actual = psdf.groupby("b").apply(add_max2).sort_index()
diff --git a/python/pyspark/pandas/typedef/__init__.py
b/python/pyspark/pandas/typedef/__init__.py
index 49490674d729..69b0fde3793f 100644
--- a/python/pyspark/pandas/typedef/__init__.py
+++ b/python/pyspark/pandas/typedef/__init__.py
@@ -15,4 +15,4 @@
# limitations under the License.
#
-from pyspark.pandas.typedef.typehints import * # noqa: F401,F403,F405
+from pyspark.pandas.typedef.typehints import * # noqa: F403
diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py
index cedfa2707191..2b55b2721edf 100644
--- a/python/pyspark/pandas/utils.py
+++ b/python/pyspark/pandas/utils.py
@@ -47,7 +47,7 @@ from pyspark.sql import functions as F, Column, DataFrame as
PySparkDataFrame, S
from pyspark.sql.types import DoubleType
from pyspark.sql.utils import is_remote
from pyspark.errors import PySparkTypeError, UnsupportedOperationException
-from pyspark import pandas as ps # noqa: F401
+from pyspark import pandas as ps
from pyspark.pandas._typing import (
Axis,
Label,
diff --git a/python/pyspark/sql/connect/client/__init__.py
b/python/pyspark/sql/connect/client/__init__.py
index 40c05d4905c7..adb4148f60d4 100644
--- a/python/pyspark/sql/connect/client/__init__.py
+++ b/python/pyspark/sql/connect/client/__init__.py
@@ -19,5 +19,5 @@ from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__)
-from pyspark.sql.connect.client.core import * # noqa: F401,F403
+from pyspark.sql.connect.client.core import * # noqa: F403
from pyspark.sql.connect.logging import getLogLevel # noqa: F401
diff --git a/python/pyspark/sql/connect/functions/__init__.py
b/python/pyspark/sql/connect/functions/__init__.py
index 4f3657b643a8..ee7e3bda67b4 100644
--- a/python/pyspark/sql/connect/functions/__init__.py
+++ b/python/pyspark/sql/connect/functions/__init__.py
@@ -19,5 +19,5 @@
from pyspark.testing.utils import should_test_connect
if should_test_connect:
- from pyspark.sql.connect.functions.builtin import * # noqa: F401,F403
- from pyspark.sql.connect.functions import partitioning # noqa: F401,F403
+ from pyspark.sql.connect.functions.builtin import * # noqa: F403
+ from pyspark.sql.connect.functions import partitioning # noqa: F401
diff --git a/python/pyspark/sql/connect/plan.py
b/python/pyspark/sql/connect/plan.py
index 8c214ea5da2b..75354e727678 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -1976,7 +1976,7 @@ class WriteOperation(LogicalPlan):
tsm = self.table_save_method.lower()
if tsm == "save_as_table":
plan.write_operation.table.save_method = (
-
proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE
# noqa: E501
+
proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE
)
elif tsm == "insert_into":
plan.write_operation.table.save_method = (
diff --git a/python/pyspark/sql/connect/session.py
b/python/pyspark/sql/connect/session.py
index 8c73c5dcee41..384d10c2ae58 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -774,7 +774,7 @@ class SparkSession:
configs["spark.sql.session.localRelationChunkSizeBytes"] # type:
ignore[arg-type]
)
max_batch_of_chunks_size_bytes = int(
- configs["spark.sql.session.localRelationBatchOfChunksSizeBytes"]
# type: ignore[arg-type] # noqa: E501
+ configs["spark.sql.session.localRelationBatchOfChunksSizeBytes"]
# type: ignore[arg-type]
)
plan: LogicalPlan = local_relation
if cache_threshold <= _table.nbytes:
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 34f488244d78..9202aca0401b 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -38,7 +38,7 @@ from pyspark.sql.session import _monkey_patch_RDD,
SparkSession
from pyspark.sql.dataframe import DataFrame
from pyspark.sql.readwriter import DataFrameReader
from pyspark.sql.streaming import DataStreamReader
-from pyspark.sql.udf import UDFRegistration # noqa: F401
+from pyspark.sql.udf import UDFRegistration
from pyspark.sql.udtf import UDTFRegistration
from pyspark.errors.exceptions.captured import install_exception_handler
from pyspark.sql.types import AtomicType, DataType, StructType
diff --git a/python/pyspark/sql/functions/__init__.py
b/python/pyspark/sql/functions/__init__.py
index e0094d4f507a..ab563c18fbf1 100644
--- a/python/pyspark/sql/functions/__init__.py
+++ b/python/pyspark/sql/functions/__init__.py
@@ -17,8 +17,8 @@
"""PySpark Functions"""
-from pyspark.sql.functions.builtin import * # noqa: F401,F403
-from pyspark.sql.functions import partitioning # noqa: F401,F403
+from pyspark.sql.functions.builtin import * # noqa: F403
+from pyspark.sql.functions import partitioning # noqa: F401
__all__ = [ # noqa: F405
# Normal functions
diff --git a/python/pyspark/sql/functions/builtin.py
b/python/pyspark/sql/functions/builtin.py
index 5a42a59ff5aa..81f1bd4d02c9 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -61,11 +61,11 @@ from pyspark.sql.udtf import UserDefinedTableFunction,
_create_py_udtf, _create_
# Keep pandas_udf and PandasUDFType import for backwards compatible import;
moved in SPARK-28264
from pyspark.sql.pandas.functions import ( # noqa: F401
- arrow_udf, # noqa: F401
- pandas_udf, # noqa: F401
- ArrowUDFType, # noqa: F401
- PandasUDFType, # noqa: F401
-) # noqa: F401
+ arrow_udf,
+ pandas_udf,
+ ArrowUDFType,
+ PandasUDFType,
+)
from pyspark.sql.utils import (
to_str as _to_str,
@@ -7130,7 +7130,7 @@ def count_min_sketch(
+----------------------------------------------------------------------------------------------------------------------------------------+
|0000000100000000000000640000000200000002000000005ADECCEE00000000153EBE090000000000000033000000000000003100000000000000320000000000000032|
+----------------------------------------------------------------------------------------------------------------------------------------+
- """ # noqa: E501
+ """
_eps = lit(eps)
_conf = lit(confidence)
if seed is None:
@@ -20311,7 +20311,7 @@ def explode(col: "ColumnOrName") -> Column:
| 1| 2|
| 3| 4|
+---+---+
- """ # noqa: E501
+ """
return _invoke_function_over_columns("explode", col)
@@ -20391,7 +20391,7 @@ def posexplode(col: "ColumnOrName") -> Column:
|1 |{1 -> 2, 3 -> 4, 5 -> NULL}|1 |3 |4 |
|1 |{1 -> 2, 3 -> 4, 5 -> NULL}|2 |5 |NULL |
+---+---------------------------+---+---+-----+
- """ # noqa: E501
+ """
return _invoke_function_over_columns("posexplode", col)
@@ -20510,7 +20510,7 @@ def inline(col: "ColumnOrName") -> Column:
|1 |[{1, 2}, NULL, {3, 4}]|NULL|NULL|
|1 |[{1, 2}, NULL, {3, 4}]|3 |4 |
+---+----------------------+----+----+
- """ # noqa: E501
+ """
return _invoke_function_over_columns("inline", col)
@@ -20577,7 +20577,7 @@ def explode_outer(col: "ColumnOrName") -> Column:
|2 |{} |NULL|NULL |
|3 |NULL |NULL|NULL |
+---+---------------------------+----+-----+
- """ # noqa: E501
+ """
return _invoke_function_over_columns("explode_outer", col)
@@ -20644,7 +20644,7 @@ def posexplode_outer(col: "ColumnOrName") -> Column:
|2 |{} |NULL|NULL|NULL |
|3 |NULL |NULL|NULL|NULL |
+---+---------------------------+----+----+-----+
- """ # noqa: E501
+ """
return _invoke_function_over_columns("posexplode_outer", col)
@@ -20700,7 +20700,7 @@ def inline_outer(col: "ColumnOrName") -> Column:
|2 |[] |NULL|NULL|
|3 |NULL |NULL|NULL|
+---+----------------------+----+----+
- """ # noqa: E501
+ """
return _invoke_function_over_columns("inline_outer", col)
@@ -28210,7 +28210,7 @@ def aes_encrypt(
+-------------------------------------------------------------------------------------------------------------+
|Spark SQL
|
+-------------------------------------------------------------------------------------------------------------+
- """ # noqa: E501
+ """
_mode = lit("GCM") if mode is None else mode
_padding = lit("DEFAULT") if padding is None else padding
_iv = lit("") if iv is None else iv
diff --git a/python/pyspark/sql/plot/__init__.py
b/python/pyspark/sql/plot/__init__.py
index 6da07061b2a0..d016db5a96dc 100644
--- a/python/pyspark/sql/plot/__init__.py
+++ b/python/pyspark/sql/plot/__init__.py
@@ -18,4 +18,4 @@
"""
This package includes the plotting APIs for PySpark DataFrame.
"""
-from pyspark.sql.plot.core import * # noqa: F403, F401
+from pyspark.sql.plot.core import * # noqa: F403
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 96c8f8a475b2..6fcd4c30b098 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -468,7 +468,7 @@ class DataFrameReader(OptionUtils):
return
self._df(self._jreader.json(self._spark._sc._jvm.PythonUtils.toSeq(path)))
if not is_remote_only():
- from pyspark.core.rdd import RDD # noqa: F401
+ from pyspark.core.rdd import RDD
if not is_remote_only() and isinstance(path, RDD):
@@ -838,7 +838,7 @@ class DataFrameReader(OptionUtils):
return
self._df(self._jreader.csv(self._spark._sc._jvm.PythonUtils.toSeq(path)))
if not is_remote_only():
- from pyspark.core.rdd import RDD # noqa: F401
+ from pyspark.core.rdd import RDD
if not is_remote_only() and isinstance(path, RDD):
@@ -965,7 +965,7 @@ class DataFrameReader(OptionUtils):
return
self._df(self._jreader.xml(self._spark._sc._jvm.PythonUtils.toSeq(path)))
if not is_remote_only():
- from pyspark.core.rdd import RDD # noqa: F401
+ from pyspark.core.rdd import RDD
if not is_remote_only() and isinstance(path, RDD):
diff --git a/python/pyspark/sql/streaming/__init__.py
b/python/pyspark/sql/streaming/__init__.py
index 279cc774d11b..fe7f0159806f 100644
--- a/python/pyspark/sql/streaming/__init__.py
+++ b/python/pyspark/sql/streaming/__init__.py
@@ -20,8 +20,8 @@ from pyspark.sql.streaming.query import StreamingQuery,
StreamingQueryManager #
from pyspark.sql.streaming.readwriter import DataStreamReader,
DataStreamWriter # noqa: F401
from pyspark.sql.streaming.listener import StreamingQueryListener # noqa: F401
from pyspark.sql.streaming.stateful_processor import ( # noqa: F401
- StatefulProcessor, # noqa: F401
- StatefulProcessorHandle, # noqa: F401
-) # noqa: F401
+ StatefulProcessor,
+ StatefulProcessorHandle,
+)
from pyspark.sql.streaming.tws_tester import TwsTester # noqa: F401
from pyspark.errors import StreamingQueryException # noqa: F401
diff --git a/python/pyspark/sql/streaming/listener.py
b/python/pyspark/sql/streaming/listener.py
index e0ef0c6c4b62..8fd1c8f26ed6 100644
--- a/python/pyspark/sql/streaming/listener.py
+++ b/python/pyspark/sql/streaming/listener.py
@@ -1123,7 +1123,7 @@ def _test() -> None:
globs = pyspark.sql.streaming.listener.__dict__.copy()
try:
spark = SparkSession._getActiveSessionOrCreate()
- except Py4JError: # noqa: F821
+ except Py4JError:
spark = SparkSession(sc) # type: ignore[name-defined] # noqa: F821
globs["spark"] = spark
diff --git a/python/pyspark/sql/streaming/query.py
b/python/pyspark/sql/streaming/query.py
index 36462b6114ec..c3a8507b4c0c 100644
--- a/python/pyspark/sql/streaming/query.py
+++ b/python/pyspark/sql/streaming/query.py
@@ -790,7 +790,7 @@ def _test() -> None:
globs = pyspark.sql.streaming.query.__dict__.copy()
try:
spark = SparkSession._getActiveSessionOrCreate()
- except Py4JError: # noqa: F821
+ except Py4JError:
spark = SparkSession(sc) # type: ignore[name-defined] # noqa: F821
globs["spark"] = spark
diff --git a/python/pyspark/sql/tests/arrow/test_arrow.py
b/python/pyspark/sql/tests/arrow/test_arrow.py
index 3a1aa9a883b7..136f530f805a 100644
--- a/python/pyspark/sql/tests/arrow/test_arrow.py
+++ b/python/pyspark/sql/tests/arrow/test_arrow.py
@@ -71,7 +71,7 @@ if have_pandas:
from pandas.testing import assert_frame_equal
if have_pyarrow:
- import pyarrow as pa # noqa: F401
+ import pyarrow as pa
class ArrowTestsMixin:
diff --git a/python/pyspark/sql/tests/connect/client/test_reattach.py
b/python/pyspark/sql/tests/connect/client/test_reattach.py
index 88d1c85227ae..7e08c4697d90 100644
--- a/python/pyspark/sql/tests/connect/client/test_reattach.py
+++ b/python/pyspark/sql/tests/connect/client/test_reattach.py
@@ -72,7 +72,7 @@ class SparkConnectReattachTestCase(ReusedMixedTestCase,
PandasOnSparkTestUtils):
def check_error(q):
try:
list(q) # Iterate all.
- except Exception as e: # noqa: F841
+ except Exception as e:
return e
e = check_error(query1)
diff --git a/python/pyspark/sql/tests/connect/test_connect_collection.py
b/python/pyspark/sql/tests/connect/test_connect_collection.py
index 75467cca2835..ad48c0508d72 100644
--- a/python/pyspark/sql/tests/connect/test_connect_collection.py
+++ b/python/pyspark/sql/tests/connect/test_connect_collection.py
@@ -253,13 +253,13 @@ class SparkConnectCollectionTests(ReusedMixedTestCase,
PandasOnSparkTestUtils):
)
# test collect nested struct
- #
+------------------------------------------+--------------------------+----------------------------+
# noqa
- # |struct(a, struct(a, struct(c, struct(d))))|struct(a, b, struct(c,
d))| struct(e, f, struct(g))| # noqa
- #
+------------------------------------------+--------------------------+----------------------------+
# noqa
- # | {1, {1, {0, {8}}}}| {1, 4, {0,
8}}|{true, true, {[1, null, 3]}}| # noqa
- # | {2, {2, {-1, {null}}}}| {2, 5, {-1,
null}}| {false, null, {[1, 3]}}| # noqa
- # | {3, {3, {null, {0}}}}| {3, 6, {null,
0}}| {false, null, {[null]}}| # noqa
- #
+------------------------------------------+--------------------------+----------------------------+
# noqa
+ #
+------------------------------------------+--------------------------+----------------------------+
+ # |struct(a, struct(a, struct(c, struct(d))))|struct(a, b, struct(c,
d))| struct(e, f, struct(g))|
+ #
+------------------------------------------+--------------------------+----------------------------+
+ # | {1, {1, {0, {8}}}}| {1, 4, {0,
8}}|{true, true, {[1, null, 3]}}|
+ # | {2, {2, {-1, {null}}}}| {2, 5, {-1,
null}}| {false, null, {[1, 3]}}|
+ # | {3, {3, {null, {0}}}}| {3, 6, {null,
0}}| {false, null, {[null]}}|
+ #
+------------------------------------------+--------------------------+----------------------------+
self.assertEqual(
cdf.select(
CF.struct("a", CF.struct("a", CF.struct("c", CF.struct("d")))),
diff --git
a/python/pyspark/sql/tests/pandas/streaming/test_pandas_transform_with_state_state_variable.py
b/python/pyspark/sql/tests/pandas/streaming/test_pandas_transform_with_state_state_variable.py
index 2b38cdc6b3c3..820903b1d1c6 100644
---
a/python/pyspark/sql/tests/pandas/streaming/test_pandas_transform_with_state_state_variable.py
+++
b/python/pyspark/sql/tests/pandas/streaming/test_pandas_transform_with_state_state_variable.py
@@ -1010,7 +1010,7 @@ class TransformWithStateInPandasStateVariableTests(
if __name__ == "__main__":
- from pyspark.sql.tests.pandas.streaming.test_pandas_transform_with_state
import * # noqa: F401
+ from pyspark.sql.tests.pandas.streaming.test_pandas_transform_with_state
import *
from pyspark.testing import main
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf.py
b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
index eec961218ce7..6fced7557158 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
@@ -390,7 +390,7 @@ class PandasUDFTestsMixin:
# intToDecimalCoercionEnabled is not required for this case
with self.sql_conf(
{
-
"spark.sql.execution.pythonUDF.pandas.intToDecimalCoercionEnabled":
intToDecimalCoercionEnabled # noqa: E501
+
"spark.sql.execution.pythonUDF.pandas.intToDecimalCoercionEnabled":
intToDecimalCoercionEnabled
}
):
result = df.withColumn("decimal_val",
high_precision_udf("id")).collect()
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
index 46e7b430bc02..fb8a229da97e 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
@@ -215,7 +215,7 @@ class GroupedAggPandasUDFTestsMixin:
with self.assertRaises(PySparkNotImplementedError) as pe:
@pandas_udf(ArrayType(YearMonthIntervalType()),
PandasUDFType.GROUPED_AGG)
- def mean_and_std_udf(v): # noqa: F811
+ def mean_and_std_udf(v):
return {v.mean(): v.std()}
self.check_error(
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index fb672e3d1222..e560d7927b9f 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -665,11 +665,11 @@ class PythonEvalType:
SQL_GROUPED_MAP_ARROW_UDF: "ArrowGroupedMapUDFType" = 209
SQL_COGROUPED_MAP_ARROW_UDF: "ArrowCogroupedMapUDFType" = 210
SQL_TRANSFORM_WITH_STATE_PANDAS_UDF:
"PandasGroupedMapUDFTransformWithStateType" = 211
- SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF:
"PandasGroupedMapUDFTransformWithStateInitStateType" = ( # noqa: E501
+ SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF:
"PandasGroupedMapUDFTransformWithStateInitStateType" = (
212
)
SQL_TRANSFORM_WITH_STATE_PYTHON_ROW_UDF:
"GroupedMapUDFTransformWithStateType" = 213
- SQL_TRANSFORM_WITH_STATE_PYTHON_ROW_INIT_STATE_UDF:
"GroupedMapUDFTransformWithStateInitStateType" = ( # noqa: E501
+ SQL_TRANSFORM_WITH_STATE_PYTHON_ROW_INIT_STATE_UDF:
"GroupedMapUDFTransformWithStateInitStateType" = (
214
)
SQL_GROUPED_MAP_ARROW_ITER_UDF: "ArrowGroupedMapIterUDFType" = 215
diff --git a/python/run-tests.py b/python/run-tests.py
index 59dcf3e0f299..c873c26639f5 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -40,8 +40,8 @@ from multiprocessing import Manager
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)),
"../dev/"))
-from sparktestsupport import SPARK_HOME # noqa (suppress pep8 warnings)
-from sparktestsupport.shellutils import which, subprocess_check_output # noqa
+from sparktestsupport import SPARK_HOME
+from sparktestsupport.shellutils import which, subprocess_check_output
from sparktestsupport.modules import all_modules, pyspark_sql # noqa
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]