Repository: spark Updated Branches: refs/heads/master 1fe27612d -> 188b47e68
[SPARK-22379][PYTHON] Reduce duplication setUpClass and tearDownClass in PySpark SQL tests ## What changes were proposed in this pull request? This PR propose to add `ReusedSQLTestCase` which deduplicate `setUpClass` and `tearDownClass` in `sql/tests.py`. ## How was this patch tested? Jenkins tests and manual tests. Author: hyukjinkwon <[email protected]> Closes #19595 from HyukjinKwon/reduce-dupe. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/188b47e6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/188b47e6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/188b47e6 Branch: refs/heads/master Commit: 188b47e68350731da775efccc2cda9c61610aa14 Parents: 1fe2761 Author: hyukjinkwon <[email protected]> Authored: Mon Oct 30 11:50:22 2017 +0900 Committer: Takuya UESHIN <[email protected]> Committed: Mon Oct 30 11:50:22 2017 +0900 ---------------------------------------------------------------------- python/pyspark/sql/tests.py | 63 ++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 42 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/188b47e6/python/pyspark/sql/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 8ed37c9..483f39a 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -179,6 +179,18 @@ class MyObject(object): self.value = value +class ReusedSQLTestCase(ReusedPySparkTestCase): + @classmethod + def setUpClass(cls): + ReusedPySparkTestCase.setUpClass() + cls.spark = SparkSession(cls.sc) + + @classmethod + def tearDownClass(cls): + ReusedPySparkTestCase.tearDownClass() + cls.spark.stop() + + class DataTypeTests(unittest.TestCase): # regression test for SPARK-6055 def test_data_type_eq(self): @@ -214,21 +226,19 @@ class DataTypeTests(unittest.TestCase): self.assertRaises(TypeError, struct_field.typeName) -class SQLTests(ReusedPySparkTestCase): +class SQLTests(ReusedSQLTestCase): @classmethod def setUpClass(cls): - ReusedPySparkTestCase.setUpClass() + ReusedSQLTestCase.setUpClass() cls.tempdir = tempfile.NamedTemporaryFile(delete=False) os.unlink(cls.tempdir.name) - cls.spark = SparkSession(cls.sc) cls.testData = [Row(key=i, value=str(i)) for i in range(100)] cls.df = cls.spark.createDataFrame(cls.testData) @classmethod def tearDownClass(cls): - ReusedPySparkTestCase.tearDownClass() - cls.spark.stop() + ReusedSQLTestCase.tearDownClass() shutil.rmtree(cls.tempdir.name, ignore_errors=True) def test_sqlcontext_reuses_sparksession(self): @@ -2623,17 +2633,7 @@ class HiveSparkSubmitTests(SparkSubmitTests): self.assertTrue(os.path.exists(metastore_path)) -class SQLTests2(ReusedPySparkTestCase): - - @classmethod - def setUpClass(cls): - ReusedPySparkTestCase.setUpClass() - cls.spark = SparkSession(cls.sc) - - @classmethod - def tearDownClass(cls): - ReusedPySparkTestCase.tearDownClass() - cls.spark.stop() +class SQLTests2(ReusedSQLTestCase): # We can't include this test into SQLTests because we will stop class's SparkContext and cause # other tests failed. @@ -3082,12 +3082,12 @@ class DataTypeVerificationTests(unittest.TestCase): @unittest.skipIf(not _have_arrow, "Arrow not installed") -class ArrowTests(ReusedPySparkTestCase): +class ArrowTests(ReusedSQLTestCase): @classmethod def setUpClass(cls): from datetime import datetime - ReusedPySparkTestCase.setUpClass() + ReusedSQLTestCase.setUpClass() # Synchronize default timezone between Python and Java cls.tz_prev = os.environ.get("TZ", None) # save current tz if set @@ -3095,7 +3095,6 @@ class ArrowTests(ReusedPySparkTestCase): os.environ["TZ"] = tz time.tzset() - cls.spark = SparkSession(cls.sc) cls.spark.conf.set("spark.sql.session.timeZone", tz) cls.spark.conf.set("spark.sql.execution.arrow.enabled", "true") cls.schema = StructType([ @@ -3116,8 +3115,7 @@ class ArrowTests(ReusedPySparkTestCase): if cls.tz_prev is not None: os.environ["TZ"] = cls.tz_prev time.tzset() - ReusedPySparkTestCase.tearDownClass() - cls.spark.stop() + ReusedSQLTestCase.tearDownClass() def assertFramesEqual(self, df_with_arrow, df_without): msg = ("DataFrame from Arrow is not equal" + @@ -3169,17 +3167,7 @@ class ArrowTests(ReusedPySparkTestCase): @unittest.skipIf(not _have_pandas or not _have_arrow, "Pandas or Arrow not installed") -class VectorizedUDFTests(ReusedPySparkTestCase): - - @classmethod - def setUpClass(cls): - ReusedPySparkTestCase.setUpClass() - cls.spark = SparkSession(cls.sc) - - @classmethod - def tearDownClass(cls): - ReusedPySparkTestCase.tearDownClass() - cls.spark.stop() +class VectorizedUDFTests(ReusedSQLTestCase): def test_vectorized_udf_basic(self): from pyspark.sql.functions import pandas_udf, col @@ -3498,16 +3486,7 @@ class VectorizedUDFTests(ReusedPySparkTestCase): @unittest.skipIf(not _have_pandas or not _have_arrow, "Pandas or Arrow not installed") -class GroupbyApplyTests(ReusedPySparkTestCase): - @classmethod - def setUpClass(cls): - ReusedPySparkTestCase.setUpClass() - cls.spark = SparkSession(cls.sc) - - @classmethod - def tearDownClass(cls): - ReusedPySparkTestCase.tearDownClass() - cls.spark.stop() +class GroupbyApplyTests(ReusedSQLTestCase): def assertFramesEqual(self, expected, result): msg = ("DataFrames are not equal: " + --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
