HyukjinKwon commented on code in PR #48964:
URL: https://github.com/apache/spark/pull/48964#discussion_r1868569335
##########
python/pyspark/errors/utils.py:
##########
@@ -31,21 +31,42 @@
Type,
Optional,
Union,
- TYPE_CHECKING,
overload,
cast,
)
import pyspark
from pyspark.errors.error_classes import ERROR_CLASSES_MAP
-if TYPE_CHECKING:
- from pyspark.sql import SparkSession
-
T = TypeVar("T")
FuncT = TypeVar("FuncT", bound=Callable[..., Any])
_current_origin = threading.local()
+# Providing DataFrame debugging options to reduce performance slowdown.
+# Default is True.
+_enable_debugging_cache = None
+
+
+def is_debugging_enabled() -> bool:
+ global _enable_debugging_cache
+
+ if _enable_debugging_cache is None:
+ from pyspark.sql import SparkSession
Review Comment:
@itholic seems like there can be a circular import issue when we use Spark
Connect only here:
```
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/ml/tests/connect/test_connect_classification.py",
line 24, in <module>
from pyspark.ml.tests.connect.test_legacy_mode_classification import
ClassificationTestsMixin
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/ml/tests/connect/test_legacy_mode_classification.py",
line 26, in <module>
from pyspark.testing.connectutils import should_test_connect,
connect_requirement_message
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/testing/connectutils.py",
line 51, in <module>
from pyspark.sql.connect.dataframe import DataFrame
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/sql/connect/dataframe.py",
line 75, in <module>
from pyspark.sql.connect.group import GroupedData
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/sql/connect/group.py",
line 43, in <module>
from pyspark.sql.connect.functions import builtin as F
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/sql/connect/functions/__init__.py",
line 22, in <module>
from pyspark.sql.connect.functions.builtin import * # noqa: F401,F403
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/sql/connect/functions/builtin.py",
line
[62](https://github.com/apache/spark/actions/runs/12146458952/job/33870426859#step:9:63),
in <module>
from pyspark.sql.connect.udf import _create_py_udf
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/sql/connect/udf.py",
line 38, in <module>
from pyspark.sql.connect.column import Column
File
"/opt/hostedtoolcache/Python/3.11.10/x[64](https://github.com/apache/spark/actions/runs/12146458952/job/33870426859#step:9:65)/lib/python3.11/site-packages/pyspark/sql/connect/column.py",
line 110, in <module>
@with_origin_to_class(["to_plan"])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/errors/utils.py",
line 313, in <lambda>
return lambda cls: with_origin_to_class(cls, ignores)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/errors/utils.py",
line 318, in with_origin_to_class
and is_debugging_enabled()
^^^^^^^^^^^^^^^^^^^^^^
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/errors/utils.py",
line 56, in is_debugging_enabled
spark = SparkSession.getActiveSession()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/sql/utils.py",
line 344, in wrapped
from pyspark.sql.connect.session import SparkSession
File
"/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/sql/connect/session.py",
line 53, in <module>
from pyspark.sql.connect.dataframe import DataFrame
ImportError: cannot import name 'DataFrame' from partially initialized
module 'pyspark.sql.connect.dataframe' (most likely due to a circular import)
(/opt/hostedtoolcache/Python/3.11.10/x64/lib/python3.11/site-packages/pyspark/sql/connect/dataframe.py)
```
https://github.com/apache/spark/actions/runs/12146458952/job/33870426859
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]