Yikun commented on a change in pull request #33174:
URL: https://github.com/apache/spark/pull/33174#discussion_r663592408
##########
File path: python/run-tests.py
##########
@@ -40,6 +44,111 @@
from sparktestsupport.shellutils import which, subprocess_check_output # noqa
from sparktestsupport.modules import all_modules, pyspark_sql # noqa
+# Make sure logging config before any possible logging print
+logging.basicConfig(stream=sys.stdout, format="%(message)s")
+LOGGER = logging.getLogger()
+
+
+def _contain_unittests_class(module_name, slow=False):
+ """
+ Check if the module with specific module has classes are derived from
unittest.TestCase.
+ Such as:
+ pyspark.tests.test_appsubmit, it will return True, because there is
SparkSubmitTests which is
+ included under the module of pyspark.tests.test_appsubmit, inherits from
unittest.TestCase.
+
+ Parameters
+ ----------
+ module_name : str
+ The module name to be check
+ slow : bool
+ Return True if module contains unittests and is_slow_test is marked as
True.
+
+ Returns
+ -------
+ True if contains unittest classes otherwise False. An
``ModuleNotFoundError`` will raise if the
+ module is not found.
+
+ >>> _contain_unittests_class("pyspark.tests.test_appsubmit")
+ True
+ >>> _contain_unittests_class("pyspark.conf")
+ False
+ >>> _contain_unittests_class("pyspark.pandas.tests.test_dataframe",
slow=True)
+ True
+ >>> _contain_unittests_class("pyspark.pandas.tests.test_dataframe")
+ False
+ """
+ module = import_module(module_name)
+ for _, _class in inspect.getmembers(module, inspect.isclass):
+ if issubclass(_class, unittest.TestCase):
+ if slow and hasattr(module, 'is_slow_test'):
+ return True
+ if not slow and not hasattr(module, 'is_slow_test'):
+ return True
+ return False
+
+
+def _discover_python_unittests(paths):
+ """Discover the python module which contains unittests under paths.
+
+ Such as:
+ ['pyspark/tests'], it will return the set of module name under the path of
pyspark/tests, like
+ {'pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast', ...}
+
+ Parameters
+ ----------
+ paths : list
+ Paths of modules to be discovered.
+
+ Returns
+ -------
+ A set of complete test module name discovered under specified paths
+
+ >>> sorted([x for x in _discover_python_unittests(['pyspark/tests'])])
Review comment:
Yes, this doctest is to make sure the _discover_python_unittests work in
real env.
Such as if we forgot to add `pyspark.pandas.tests.test_series`, the CI would
be failed due to some error like:
```Python
**********************************************************************
File "./python/run-tests.py", line 116, in
__main__._discover_python_unittests
Failed example:
sorted([x for x in _discover_python_unittests([("pyspark/pandas/tests",
"slow")])])
# doctest: +NORMALIZE_WHITESPACE
Expected:
['pyspark.pandas.tests.indexes.test_base',
'pyspark.pandas.tests.indexes.test_datetime',
'pyspark.pandas.tests.test_dataframe',
'pyspark.pandas.tests.test_groupby',
'pyspark.pandas.tests.test_indexing',
'pyspark.pandas.tests.test_ops_on_diff_frames',
'pyspark.pandas.tests.test_ops_on_diff_frames_groupby',
'pyspark.pandas.tests.test_series']
Got:
['pyspark.pandas.tests.indexes.test_base',
'pyspark.pandas.tests.indexes.test_datetime',
'pyspark.pandas.tests.test_dataframe', 'pyspark.pandas.tests.test_groupby',
'pyspark.pandas.tests.test_indexing',
'pyspark.pandas.tests.test_ops_on_diff_frames',
'pyspark.pandas.tests.test_ops_on_diff_frames_groupby',
'pyspark.pandas.tests.test_series', 'pyspark.pandas.tests.test_series']
**********************************************************************
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]