xinrong-databricks commented on a change in pull request #32177:
URL: https://github.com/apache/spark/pull/32177#discussion_r614264742
##########
File path: python/pyspark/testing/utils.py
##########
@@ -171,3 +209,393 @@ def search_jar(project_relative_path,
sbt_jar_name_prefix, mvn_jar_name_prefix):
raise Exception("Found multiple JARs: %s; please remove all but one" %
(", ".join(jars)))
else:
return jars[0]
+
+
+# Utilities below are used mainly in pyspark/pandas
+class SQLTestUtils(object):
+ """
+ This util assumes the instance of this to have 'spark' attribute, having a
spark session.
+ It is usually used with 'ReusedSQLTestCase' class but can be used if you
feel sure the
+ the implementation of this class has 'spark' attribute.
+ """
+
+ @contextmanager
+ def sql_conf(self, pairs):
+ """
+ A convenient context manager to test some configuration specific
logic. This sets
+ `value` to the configuration `key` and then restores it back when it
exits.
+ """
+ assert hasattr(self, "spark"), "it should have 'spark' attribute,
having a spark session."
+
+ with sqlc(pairs, spark=self.spark):
+ yield
+
+ @contextmanager
+ def database(self, *databases):
+ """
+ A convenient context manager to test with some specific databases.
This drops the given
+ databases if it exists and sets current database to "default" when it
exits.
+ """
+ assert hasattr(self, "spark"), "it should have 'spark' attribute,
having a spark session."
+
+ try:
+ yield
+ finally:
+ for db in databases:
+ self.spark.sql("DROP DATABASE IF EXISTS %s CASCADE" % db)
+ self.spark.catalog.setCurrentDatabase("default")
+
+ @contextmanager
+ def table(self, *tables):
+ """
+ A convenient context manager to test with some specific tables. This
drops the given tables
+ if it exists.
+ """
+ assert hasattr(self, "spark"), "it should have 'spark' attribute,
having a spark session."
+
+ try:
+ yield
+ finally:
+ for t in tables:
+ self.spark.sql("DROP TABLE IF EXISTS %s" % t)
+
+ @contextmanager
+ def tempView(self, *views):
+ """
+ A convenient context manager to test with some specific views. This
drops the given views
+ if it exists.
+ """
+ assert hasattr(self, "spark"), "it should have 'spark' attribute,
having a spark session."
+
+ try:
+ yield
+ finally:
+ for v in views:
+ self.spark.catalog.dropTempView(v)
+
+ @contextmanager
+ def function(self, *functions):
+ """
+ A convenient context manager to test with some specific functions.
This drops the given
+ functions if it exists.
+ """
+ assert hasattr(self, "spark"), "it should have 'spark' attribute,
having a spark session."
+
+ try:
+ yield
+ finally:
+ for f in functions:
+ self.spark.sql("DROP FUNCTION IF EXISTS %s" % f)
+
+
+class ReusedSQLTestCase(unittest.TestCase, SQLTestUtils):
+ @classmethod
+ def setUpClass(cls):
+ cls.spark = default_session()
+ cls.spark.conf.set(SPARK_CONF_ARROW_ENABLED, True)
+
+ @classmethod
+ def tearDownClass(cls):
+ # We don't stop Spark session to reuse across all tests.
+ # The Spark session will be started and stopped at PyTest session
level.
+ # Please see databricks/koalas/conftest.py.
+ pass
+
+ def assertPandasEqual(self, left, right, check_exact=True):
Review comment:
Would it be fine to keep `ReusedSQLTestCase` in
`python/pyspark/testing/pandasutils.py` since it has pandas-only testing
functions?
Shall we rename it to avoid confusion with `ReusedSQLTestCase` in
`python/pyspark/testing/sqlutils.py`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]