spark git commit: [SPARK-22379][PYTHON] Reduce duplication setUpClass and tearDownClass in PySpark SQL tests

ueshin Sun, 29 Oct 2017 19:51:07 -0700

Repository: spark
Updated Branches:
  refs/heads/master 1fe27612d -> 188b47e68



[SPARK-22379][PYTHON] Reduce duplication setUpClass and tearDownClass in 
PySpark SQL tests

## What changes were proposed in this pull request?

This PR propose to add `ReusedSQLTestCase` which deduplicate `setUpClass` and  
`tearDownClass` in `sql/tests.py`.

## How was this patch tested?

Jenkins tests and manual tests.

Author: hyukjinkwon <[email protected]>

Closes #19595 from HyukjinKwon/reduce-dupe.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/188b47e6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/188b47e6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/188b47e6

Branch: refs/heads/master
Commit: 188b47e68350731da775efccc2cda9c61610aa14
Parents: 1fe2761
Author: hyukjinkwon <[email protected]>
Authored: Mon Oct 30 11:50:22 2017 +0900
Committer: Takuya UESHIN <[email protected]>
Committed: Mon Oct 30 11:50:22 2017 +0900

----------------------------------------------------------------------
 python/pyspark/sql/tests.py | 63 ++++++++++++++--------------------------
 1 file changed, 21 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/188b47e6/python/pyspark/sql/tests.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 8ed37c9..483f39a 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -179,6 +179,18 @@ class MyObject(object):
         self.value = value
 
 
+class ReusedSQLTestCase(ReusedPySparkTestCase):
+    @classmethod
+    def setUpClass(cls):
+        ReusedPySparkTestCase.setUpClass()
+        cls.spark = SparkSession(cls.sc)
+
+    @classmethod
+    def tearDownClass(cls):
+        ReusedPySparkTestCase.tearDownClass()
+        cls.spark.stop()
+
+
 class DataTypeTests(unittest.TestCase):
     # regression test for SPARK-6055
     def test_data_type_eq(self):
@@ -214,21 +226,19 @@ class DataTypeTests(unittest.TestCase):
         self.assertRaises(TypeError, struct_field.typeName)
 
 
-class SQLTests(ReusedPySparkTestCase):
+class SQLTests(ReusedSQLTestCase):
 
     @classmethod
     def setUpClass(cls):
-        ReusedPySparkTestCase.setUpClass()
+        ReusedSQLTestCase.setUpClass()
         cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
         os.unlink(cls.tempdir.name)
-        cls.spark = SparkSession(cls.sc)
         cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
         cls.df = cls.spark.createDataFrame(cls.testData)
 
     @classmethod
     def tearDownClass(cls):
-        ReusedPySparkTestCase.tearDownClass()
-        cls.spark.stop()
+        ReusedSQLTestCase.tearDownClass()
         shutil.rmtree(cls.tempdir.name, ignore_errors=True)
 
     def test_sqlcontext_reuses_sparksession(self):
@@ -2623,17 +2633,7 @@ class HiveSparkSubmitTests(SparkSubmitTests):
         self.assertTrue(os.path.exists(metastore_path))
 
 
-class SQLTests2(ReusedPySparkTestCase):
-
-    @classmethod
-    def setUpClass(cls):
-        ReusedPySparkTestCase.setUpClass()
-        cls.spark = SparkSession(cls.sc)
-
-    @classmethod
-    def tearDownClass(cls):
-        ReusedPySparkTestCase.tearDownClass()
-        cls.spark.stop()
+class SQLTests2(ReusedSQLTestCase):
 
     # We can't include this test into SQLTests because we will stop class's 
SparkContext and cause
     # other tests failed.
@@ -3082,12 +3082,12 @@ class DataTypeVerificationTests(unittest.TestCase):
 
 
 @unittest.skipIf(not _have_arrow, "Arrow not installed")
-class ArrowTests(ReusedPySparkTestCase):
+class ArrowTests(ReusedSQLTestCase):
 
     @classmethod
     def setUpClass(cls):
         from datetime import datetime
-        ReusedPySparkTestCase.setUpClass()
+        ReusedSQLTestCase.setUpClass()
 
         # Synchronize default timezone between Python and Java
         cls.tz_prev = os.environ.get("TZ", None)  # save current tz if set
@@ -3095,7 +3095,6 @@ class ArrowTests(ReusedPySparkTestCase):
         os.environ["TZ"] = tz
         time.tzset()
 
-        cls.spark = SparkSession(cls.sc)
         cls.spark.conf.set("spark.sql.session.timeZone", tz)
         cls.spark.conf.set("spark.sql.execution.arrow.enabled", "true")
         cls.schema = StructType([
@@ -3116,8 +3115,7 @@ class ArrowTests(ReusedPySparkTestCase):
         if cls.tz_prev is not None:
             os.environ["TZ"] = cls.tz_prev
         time.tzset()
-        ReusedPySparkTestCase.tearDownClass()
-        cls.spark.stop()
+        ReusedSQLTestCase.tearDownClass()
 
     def assertFramesEqual(self, df_with_arrow, df_without):
         msg = ("DataFrame from Arrow is not equal" +
@@ -3169,17 +3167,7 @@ class ArrowTests(ReusedPySparkTestCase):
 
 
 @unittest.skipIf(not _have_pandas or not _have_arrow, "Pandas or Arrow not 
installed")
-class VectorizedUDFTests(ReusedPySparkTestCase):
-
-    @classmethod
-    def setUpClass(cls):
-        ReusedPySparkTestCase.setUpClass()
-        cls.spark = SparkSession(cls.sc)
-
-    @classmethod
-    def tearDownClass(cls):
-        ReusedPySparkTestCase.tearDownClass()
-        cls.spark.stop()
+class VectorizedUDFTests(ReusedSQLTestCase):
 
     def test_vectorized_udf_basic(self):
         from pyspark.sql.functions import pandas_udf, col
@@ -3498,16 +3486,7 @@ class VectorizedUDFTests(ReusedPySparkTestCase):
 
 
 @unittest.skipIf(not _have_pandas or not _have_arrow, "Pandas or Arrow not 
installed")
-class GroupbyApplyTests(ReusedPySparkTestCase):
-    @classmethod
-    def setUpClass(cls):
-        ReusedPySparkTestCase.setUpClass()
-        cls.spark = SparkSession(cls.sc)
-
-    @classmethod
-    def tearDownClass(cls):
-        ReusedPySparkTestCase.tearDownClass()
-        cls.spark.stop()
+class GroupbyApplyTests(ReusedSQLTestCase):
 
     def assertFramesEqual(self, expected, result):
         msg = ("DataFrames are not equal: " +


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-22379][PYTHON] Reduce duplication setUpClass and tearDownClass in PySpark SQL tests

Reply via email to