This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 12f3c81c26e [SPARK-44961][PYTHON][CONNECT][TESTS] Make PySpark
(pyspark-connect module) tests passing without any dependency
12f3c81c26e is described below
commit 12f3c81c26ef639842b8a155e5fd5ccfa7705bea
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Fri Aug 25 11:25:00 2023 -0700
[SPARK-44961][PYTHON][CONNECT][TESTS] Make PySpark (pyspark-connect module)
tests passing without any dependency
### What changes were proposed in this pull request?
This PR proposes to fix the tests to properly run or skip when there aren't
optional dependencies installed.
### Why are the changes needed?
Currently, it fails as below:
```
./python/run-tests --python-executables=python3 --modules=pyspark-connect
...
2c5289024a/python3__pyspark.sql.connect.window__nvbbzy7q.log)
Finished test(python3): pyspark.sql.connect.session (0s)
Traceback (most recent call last):
File
"/Users/hyukjin.kwon/workspace/forked/spark/python/pyspark/sql/pandas/utils.py",
line 27, in require_minimum_pandas_version
import pandas
ModuleNotFoundError: No module named 'pandas'
```
PySpark tests should pass without optional dependencies.
### Does this PR introduce _any_ user-facing change?
No, test-only.
### How was this patch tested?
Manually ran as described above.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #42676 from HyukjinKwon/spark-connect-test.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
python/pyspark/sql/connect/conf.py | 4 ++++
python/pyspark/sql/connect/session.py | 3 ++-
python/pyspark/sql/connect/streaming/__init__.py | 6 ------
python/pyspark/sql/connect/streaming/query.py | 5 +++--
python/pyspark/sql/connect/streaming/readwriter.py | 3 ---
python/pyspark/sql/tests/connect/client/test_client.py | 9 ++++-----
python/pyspark/sql/tests/connect/test_parity_functions.py | 8 +++++---
python/pyspark/sql/tests/connect/test_parity_pandas_udf.py | 6 ++++--
python/pyspark/sql/tests/connect/test_parity_readwriter.py | 6 ++++--
9 files changed, 26 insertions(+), 24 deletions(-)
diff --git a/python/pyspark/sql/connect/conf.py
b/python/pyspark/sql/connect/conf.py
index d323de716c4..16e992044b2 100644
--- a/python/pyspark/sql/connect/conf.py
+++ b/python/pyspark/sql/connect/conf.py
@@ -14,6 +14,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
from typing import Any, Optional, Union, cast
import warnings
diff --git a/python/pyspark/sql/connect/session.py
b/python/pyspark/sql/connect/session.py
index 2905f7e4269..8e234442c20 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -64,7 +64,8 @@ from pyspark.sql.connect.plan import (
CachedRemoteRelation,
)
from pyspark.sql.connect.readwriter import DataFrameReader
-from pyspark.sql.connect.streaming import DataStreamReader,
StreamingQueryManager
+from pyspark.sql.connect.streaming.readwriter import DataStreamReader
+from pyspark.sql.connect.streaming.query import StreamingQueryManager
from pyspark.sql.pandas.serializers import ArrowStreamPandasSerializer
from pyspark.sql.pandas.types import to_arrow_schema, to_arrow_type,
_deduplicate_field_names
from pyspark.sql.session import classproperty, SparkSession as PySparkSession
diff --git a/python/pyspark/sql/connect/streaming/__init__.py
b/python/pyspark/sql/connect/streaming/__init__.py
index cc50ff1a2eb..cce3acad34a 100644
--- a/python/pyspark/sql/connect/streaming/__init__.py
+++ b/python/pyspark/sql/connect/streaming/__init__.py
@@ -14,9 +14,3 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-
-from pyspark.sql.connect.streaming.query import StreamingQuery # noqa: F401
-from pyspark.sql.connect.streaming.readwriter import DataStreamReader # noqa:
F401
-from pyspark.sql.connect.streaming.readwriter import DataStreamWriter # noqa:
F401
-from pyspark.sql.connect.streaming.query import StreamingQueryManager # noqa:
F401
-from pyspark.errors import StreamingQueryException # noqa: F401
diff --git a/python/pyspark/sql/connect/streaming/query.py
b/python/pyspark/sql/connect/streaming/query.py
index 7cebc0e71ef..65b48099363 100644
--- a/python/pyspark/sql/connect/streaming/query.py
+++ b/python/pyspark/sql/connect/streaming/query.py
@@ -14,6 +14,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
import json
import sys
@@ -35,8 +38,6 @@ from pyspark.errors.exceptions.connect import (
)
from pyspark.errors import PySparkPicklingError
-__all__ = ["StreamingQuery", "StreamingQueryManager"]
-
if TYPE_CHECKING:
from pyspark.sql.connect.session import SparkSession
diff --git a/python/pyspark/sql/connect/streaming/readwriter.py
b/python/pyspark/sql/connect/streaming/readwriter.py
index 63ec7848d1e..067e4f1b8d6 100644
--- a/python/pyspark/sql/connect/streaming/readwriter.py
+++ b/python/pyspark/sql/connect/streaming/readwriter.py
@@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-
from pyspark.sql.connect.utils import check_dependencies
check_dependencies(__name__)
@@ -42,8 +41,6 @@ if TYPE_CHECKING:
from pyspark.sql.connect.dataframe import DataFrame
from pyspark.sql._typing import SupportsProcess
-__all__ = ["DataStreamReader", "DataStreamWriter"]
-
class DataStreamReader(OptionUtils):
def __init__(self, client: "SparkSession") -> None:
diff --git a/python/pyspark/sql/tests/connect/client/test_client.py
b/python/pyspark/sql/tests/connect/client/test_client.py
index 98f68767b8b..2ba42cabf84 100644
--- a/python/pyspark/sql/tests/connect/client/test_client.py
+++ b/python/pyspark/sql/tests/connect/client/test_client.py
@@ -19,16 +19,15 @@ import unittest
import uuid
from typing import Optional
-from pyspark.sql.connect.client import SparkConnectClient, ChannelBuilder
-import pyspark.sql.connect.proto as proto
from pyspark.testing.connectutils import should_test_connect,
connect_requirement_message
-from pyspark.sql.connect.client.core import Retrying
-from pyspark.sql.connect.client.reattach import RetryException
-
if should_test_connect:
import pandas as pd
import pyarrow as pa
+ from pyspark.sql.connect.client import SparkConnectClient, ChannelBuilder
+ from pyspark.sql.connect.client.core import Retrying
+ from pyspark.sql.connect.client.reattach import RetryException
+ import pyspark.sql.connect.proto as proto
@unittest.skipIf(not should_test_connect, connect_requirement_message)
diff --git a/python/pyspark/sql/tests/connect/test_parity_functions.py
b/python/pyspark/sql/tests/connect/test_parity_functions.py
index 35ddf96e1fb..4fa1cf31b3b 100644
--- a/python/pyspark/sql/tests/connect/test_parity_functions.py
+++ b/python/pyspark/sql/tests/connect/test_parity_functions.py
@@ -17,10 +17,12 @@
import unittest
-from pyspark.errors.exceptions.connect import SparkConnectException
-from pyspark.sql.connect.column import Column
from pyspark.sql.tests.test_functions import FunctionsTestsMixin
-from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.connectutils import should_test_connect,
ReusedConnectTestCase
+
+if should_test_connect:
+ from pyspark.errors.exceptions.connect import SparkConnectException
+ from pyspark.sql.connect.column import Column
class FunctionsParityTests(FunctionsTestsMixin, ReusedConnectTestCase):
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
b/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
index 09d1ab3f4c3..b5433b38dee 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
@@ -14,10 +14,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-from pyspark.sql.connect.types import UnparsedDataType
from pyspark.sql.functions import pandas_udf, PandasUDFType
from pyspark.sql.tests.pandas.test_pandas_udf import PandasUDFTestsMixin
-from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.connectutils import should_test_connect,
ReusedConnectTestCase
+
+if should_test_connect:
+ from pyspark.sql.connect.types import UnparsedDataType
class PandasUDFParityTests(PandasUDFTestsMixin, ReusedConnectTestCase):
diff --git a/python/pyspark/sql/tests/connect/test_parity_readwriter.py
b/python/pyspark/sql/tests/connect/test_parity_readwriter.py
index 2fa3f79a92f..46333b555c3 100644
--- a/python/pyspark/sql/tests/connect/test_parity_readwriter.py
+++ b/python/pyspark/sql/tests/connect/test_parity_readwriter.py
@@ -16,9 +16,11 @@
#
import unittest
-from pyspark.sql.connect.readwriter import DataFrameWriterV2
from pyspark.sql.tests.test_readwriter import ReadwriterTestsMixin,
ReadwriterV2TestsMixin
-from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.connectutils import should_test_connect,
ReusedConnectTestCase
+
+if should_test_connect:
+ from pyspark.sql.connect.readwriter import DataFrameWriterV2
class ReadwriterParityTests(ReadwriterTestsMixin, ReusedConnectTestCase):
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]