This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 12f3c81c26e [SPARK-44961][PYTHON][CONNECT][TESTS] Make PySpark 
(pyspark-connect module) tests passing without any dependency
12f3c81c26e is described below

commit 12f3c81c26ef639842b8a155e5fd5ccfa7705bea
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Fri Aug 25 11:25:00 2023 -0700

    [SPARK-44961][PYTHON][CONNECT][TESTS] Make PySpark (pyspark-connect module) 
tests passing without any dependency
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to fix the tests to properly run or skip when there aren't 
optional dependencies installed.
    
    ### Why are the changes needed?
    
    Currently, it fails as below:
    
    ```
    ./python/run-tests --python-executables=python3 --modules=pyspark-connect
    ...
    2c5289024a/python3__pyspark.sql.connect.window__nvbbzy7q.log)
    Finished test(python3): pyspark.sql.connect.session (0s)
    Traceback (most recent call last):
      File 
"/Users/hyukjin.kwon/workspace/forked/spark/python/pyspark/sql/pandas/utils.py",
 line 27, in require_minimum_pandas_version
        import pandas
    ModuleNotFoundError: No module named 'pandas'
    ```
    
    PySpark tests should pass without optional dependencies.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, test-only.
    
    ### How was this patch tested?
    
    Manually ran as described above.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #42676 from HyukjinKwon/spark-connect-test.
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 python/pyspark/sql/connect/conf.py                         | 4 ++++
 python/pyspark/sql/connect/session.py                      | 3 ++-
 python/pyspark/sql/connect/streaming/__init__.py           | 6 ------
 python/pyspark/sql/connect/streaming/query.py              | 5 +++--
 python/pyspark/sql/connect/streaming/readwriter.py         | 3 ---
 python/pyspark/sql/tests/connect/client/test_client.py     | 9 ++++-----
 python/pyspark/sql/tests/connect/test_parity_functions.py  | 8 +++++---
 python/pyspark/sql/tests/connect/test_parity_pandas_udf.py | 6 ++++--
 python/pyspark/sql/tests/connect/test_parity_readwriter.py | 6 ++++--
 9 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/python/pyspark/sql/connect/conf.py 
b/python/pyspark/sql/connect/conf.py
index d323de716c4..16e992044b2 100644
--- a/python/pyspark/sql/connect/conf.py
+++ b/python/pyspark/sql/connect/conf.py
@@ -14,6 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
 from typing import Any, Optional, Union, cast
 import warnings
 
diff --git a/python/pyspark/sql/connect/session.py 
b/python/pyspark/sql/connect/session.py
index 2905f7e4269..8e234442c20 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -64,7 +64,8 @@ from pyspark.sql.connect.plan import (
     CachedRemoteRelation,
 )
 from pyspark.sql.connect.readwriter import DataFrameReader
-from pyspark.sql.connect.streaming import DataStreamReader, 
StreamingQueryManager
+from pyspark.sql.connect.streaming.readwriter import DataStreamReader
+from pyspark.sql.connect.streaming.query import StreamingQueryManager
 from pyspark.sql.pandas.serializers import ArrowStreamPandasSerializer
 from pyspark.sql.pandas.types import to_arrow_schema, to_arrow_type, 
_deduplicate_field_names
 from pyspark.sql.session import classproperty, SparkSession as PySparkSession
diff --git a/python/pyspark/sql/connect/streaming/__init__.py 
b/python/pyspark/sql/connect/streaming/__init__.py
index cc50ff1a2eb..cce3acad34a 100644
--- a/python/pyspark/sql/connect/streaming/__init__.py
+++ b/python/pyspark/sql/connect/streaming/__init__.py
@@ -14,9 +14,3 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-from pyspark.sql.connect.streaming.query import StreamingQuery  # noqa: F401
-from pyspark.sql.connect.streaming.readwriter import DataStreamReader  # noqa: 
F401
-from pyspark.sql.connect.streaming.readwriter import DataStreamWriter  # noqa: 
F401
-from pyspark.sql.connect.streaming.query import StreamingQueryManager  # noqa: 
F401
-from pyspark.errors import StreamingQueryException  # noqa: F401
diff --git a/python/pyspark/sql/connect/streaming/query.py 
b/python/pyspark/sql/connect/streaming/query.py
index 7cebc0e71ef..65b48099363 100644
--- a/python/pyspark/sql/connect/streaming/query.py
+++ b/python/pyspark/sql/connect/streaming/query.py
@@ -14,6 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
 
 import json
 import sys
@@ -35,8 +38,6 @@ from pyspark.errors.exceptions.connect import (
 )
 from pyspark.errors import PySparkPicklingError
 
-__all__ = ["StreamingQuery", "StreamingQueryManager"]
-
 if TYPE_CHECKING:
     from pyspark.sql.connect.session import SparkSession
 
diff --git a/python/pyspark/sql/connect/streaming/readwriter.py 
b/python/pyspark/sql/connect/streaming/readwriter.py
index 63ec7848d1e..067e4f1b8d6 100644
--- a/python/pyspark/sql/connect/streaming/readwriter.py
+++ b/python/pyspark/sql/connect/streaming/readwriter.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__)
@@ -42,8 +41,6 @@ if TYPE_CHECKING:
     from pyspark.sql.connect.dataframe import DataFrame
     from pyspark.sql._typing import SupportsProcess
 
-__all__ = ["DataStreamReader", "DataStreamWriter"]
-
 
 class DataStreamReader(OptionUtils):
     def __init__(self, client: "SparkSession") -> None:
diff --git a/python/pyspark/sql/tests/connect/client/test_client.py 
b/python/pyspark/sql/tests/connect/client/test_client.py
index 98f68767b8b..2ba42cabf84 100644
--- a/python/pyspark/sql/tests/connect/client/test_client.py
+++ b/python/pyspark/sql/tests/connect/client/test_client.py
@@ -19,16 +19,15 @@ import unittest
 import uuid
 from typing import Optional
 
-from pyspark.sql.connect.client import SparkConnectClient, ChannelBuilder
-import pyspark.sql.connect.proto as proto
 from pyspark.testing.connectutils import should_test_connect, 
connect_requirement_message
 
-from pyspark.sql.connect.client.core import Retrying
-from pyspark.sql.connect.client.reattach import RetryException
-
 if should_test_connect:
     import pandas as pd
     import pyarrow as pa
+    from pyspark.sql.connect.client import SparkConnectClient, ChannelBuilder
+    from pyspark.sql.connect.client.core import Retrying
+    from pyspark.sql.connect.client.reattach import RetryException
+    import pyspark.sql.connect.proto as proto
 
 
 @unittest.skipIf(not should_test_connect, connect_requirement_message)
diff --git a/python/pyspark/sql/tests/connect/test_parity_functions.py 
b/python/pyspark/sql/tests/connect/test_parity_functions.py
index 35ddf96e1fb..4fa1cf31b3b 100644
--- a/python/pyspark/sql/tests/connect/test_parity_functions.py
+++ b/python/pyspark/sql/tests/connect/test_parity_functions.py
@@ -17,10 +17,12 @@
 
 import unittest
 
-from pyspark.errors.exceptions.connect import SparkConnectException
-from pyspark.sql.connect.column import Column
 from pyspark.sql.tests.test_functions import FunctionsTestsMixin
-from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.connectutils import should_test_connect, 
ReusedConnectTestCase
+
+if should_test_connect:
+    from pyspark.errors.exceptions.connect import SparkConnectException
+    from pyspark.sql.connect.column import Column
 
 
 class FunctionsParityTests(FunctionsTestsMixin, ReusedConnectTestCase):
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py 
b/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
index 09d1ab3f4c3..b5433b38dee 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
@@ -14,10 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.connect.types import UnparsedDataType
 from pyspark.sql.functions import pandas_udf, PandasUDFType
 from pyspark.sql.tests.pandas.test_pandas_udf import PandasUDFTestsMixin
-from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.connectutils import should_test_connect, 
ReusedConnectTestCase
+
+if should_test_connect:
+    from pyspark.sql.connect.types import UnparsedDataType
 
 
 class PandasUDFParityTests(PandasUDFTestsMixin, ReusedConnectTestCase):
diff --git a/python/pyspark/sql/tests/connect/test_parity_readwriter.py 
b/python/pyspark/sql/tests/connect/test_parity_readwriter.py
index 2fa3f79a92f..46333b555c3 100644
--- a/python/pyspark/sql/tests/connect/test_parity_readwriter.py
+++ b/python/pyspark/sql/tests/connect/test_parity_readwriter.py
@@ -16,9 +16,11 @@
 #
 import unittest
 
-from pyspark.sql.connect.readwriter import DataFrameWriterV2
 from pyspark.sql.tests.test_readwriter import ReadwriterTestsMixin, 
ReadwriterV2TestsMixin
-from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.connectutils import should_test_connect, 
ReusedConnectTestCase
+
+if should_test_connect:
+    from pyspark.sql.connect.readwriter import DataFrameWriterV2
 
 
 class ReadwriterParityTests(ReadwriterTestsMixin, ReusedConnectTestCase):


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to