[spark] branch master updated: [SPARK-42009][CONNECT][TESTS] Reuse pyspark.sql.tests.test_serde test cases

gurwls223 Thu, 12 Jan 2023 01:16:15 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new f5924cf6bee [SPARK-42009][CONNECT][TESTS] Reuse 
pyspark.sql.tests.test_serde test cases
f5924cf6bee is described below

commit f5924cf6beef1253af7cc89f6b292569caff3973
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Thu Jan 12 18:15:06 2023 +0900

    [SPARK-42009][CONNECT][TESTS] Reuse pyspark.sql.tests.test_serde test cases
    
    ### What changes were proposed in this pull request?
    
    This PR reuses PySpark `pyspark.sql.tests.test_serde` tests in Spark 
Connect that pass for now.
    
    ### Why are the changes needed?
    
    To make sure on the test coverage.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, test-only.
    
    ### How was this patch tested?
    
    Manually ran it in my local.
    
    Closes #39527 from HyukjinKwon/SPARK-42009.
    
    Authored-by: Hyukjin Kwon <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 dev/sparktestsupport/modules.py                    |  1 +
 .../pyspark/sql/tests/connect/test_parity_serde.py | 58 ++++++++++++++++++++++
 python/pyspark/sql/tests/test_serde.py             |  6 ++-
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 0ffffe39323..785dcf42498 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -520,6 +520,7 @@ pyspark_connect = Module(
         "pyspark.sql.tests.connect.test_connect_column",
         "pyspark.sql.tests.connect.test_parity_datasources",
         "pyspark.sql.tests.connect.test_parity_catalog",
+        "pyspark.sql.tests.connect.test_parity_serde",
         "pyspark.sql.tests.connect.test_parity_functions",
         "pyspark.sql.tests.connect.test_parity_group",
         "pyspark.sql.tests.connect.test_parity_dataframe",
diff --git a/python/pyspark/sql/tests/connect/test_parity_serde.py 
b/python/pyspark/sql/tests/connect/test_parity_serde.py
new file mode 100644
index 00000000000..ba198636126
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_serde.py
@@ -0,0 +1,58 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.tests.test_serde import SerdeTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class SerdeParityTests(SerdeTestsMixin, ReusedConnectTestCase):
+    # TODO(SPARK-42014): Support aware datetimes for createDataFrame
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_filter_with_datetime_timezone(self):
+        super().test_filter_with_datetime_timezone()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on 
them.")
+    def test_int_array_serialization(self):
+        super().test_int_array_serialization()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on 
them.")
+    def test_serialize_nested_array_and_map(self):
+        super().test_serialize_nested_array_and_map()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on 
them.")
+    def test_struct_in_map(self):
+        super().test_struct_in_map()
+
+    # TODO(SPARK-42014): Support aware datetimes for createDataFrame
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_time_with_timezone(self):
+        super().test_time_with_timezone()
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_serde import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", 
verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_serde.py 
b/python/pyspark/sql/tests/test_serde.py
index e8017cfd38a..f24a716f4a6 100644
--- a/python/pyspark/sql/tests/test_serde.py
+++ b/python/pyspark/sql/tests/test_serde.py
@@ -26,7 +26,7 @@ from pyspark.sql.types import StructType, StructField, 
DecimalType, BinaryType
 from pyspark.testing.sqlutils import ReusedSQLTestCase, UTCOffsetTimezone
 
 
-class SerdeTests(ReusedSQLTestCase):
+class SerdeTestsMixin:
     def test_serialize_nested_array_and_map(self):
         d = [Row(lst=[Row(a=1, b="s")], d={"key": Row(c=1.0, d="2")})]
         rdd = self.sc.parallelize(d)
@@ -140,6 +140,10 @@ class SerdeTests(ReusedSQLTestCase):
         self.assertEqual(df.first().col, bytearray(b"abcd"))
 
 
+class SerdeTests(SerdeTestsMixin, ReusedSQLTestCase):
+    pass
+
+
 if __name__ == "__main__":
     import unittest
     from pyspark.sql.tests.test_serde import *  # noqa: F401


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-42009][CONNECT][TESTS] Reuse pyspark.sql.tests.test_serde test cases

Reply via email to