xinrong-databricks commented on a change in pull request #33174:
URL: https://github.com/apache/spark/pull/33174#discussion_r663140875
##########
File path: python/run-tests.py
##########
@@ -40,6 +44,111 @@
from sparktestsupport.shellutils import which, subprocess_check_output # noqa
from sparktestsupport.modules import all_modules, pyspark_sql # noqa
+# Make sure logging config before any possible logging print
+logging.basicConfig(stream=sys.stdout, format="%(message)s")
+LOGGER = logging.getLogger()
+
+
+def _contain_unittests_class(module_name, slow=False):
+ """
+ Check if the module with specific module has classes are derived from
unittest.TestCase.
+ Such as:
+ pyspark.tests.test_appsubmit, it will return True, because there is
SparkSubmitTests which is
+ included under the module of pyspark.tests.test_appsubmit, inherits from
unittest.TestCase.
+
+ Parameters
+ ----------
+ module_name : str
+ The module name to be check
+ slow : bool
+ Return True if module contains unittests and is_slow_test is marked as
True.
+
+ Returns
+ -------
+ True if contains unittest classes otherwise False. An
``ModuleNotFoundError`` will raise if the
+ module is not found.
+
+ >>> _contain_unittests_class("pyspark.tests.test_appsubmit")
+ True
+ >>> _contain_unittests_class("pyspark.conf")
+ False
+ >>> _contain_unittests_class("pyspark.pandas.tests.test_dataframe",
slow=True)
+ True
+ >>> _contain_unittests_class("pyspark.pandas.tests.test_dataframe")
+ False
+ """
+ module = import_module(module_name)
+ for _, _class in inspect.getmembers(module, inspect.isclass):
+ if issubclass(_class, unittest.TestCase):
+ if slow and hasattr(module, 'is_slow_test'):
+ return True
+ if not slow and not hasattr(module, 'is_slow_test'):
+ return True
+ return False
+
+
+def _discover_python_unittests(paths):
+ """Discover the python module which contains unittests under paths.
+
+ Such as:
+ ['pyspark/tests'], it will return the set of module name under the path of
pyspark/tests, like
+ {'pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast', ...}
+
+ Parameters
+ ----------
+ paths : list
+ Paths of modules to be discovered.
+
+ Returns
+ -------
+ A set of complete test module name discovered under specified paths
+
+ >>> sorted([x for x in _discover_python_unittests(['pyspark/tests'])])
+ ... # doctest: +NORMALIZE_WHITESPACE
+ ['pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast',
'pyspark.tests.test_conf',
+ 'pyspark.tests.test_context', 'pyspark.tests.test_daemon',
'pyspark.tests.test_install_spark',
+ 'pyspark.tests.test_join', 'pyspark.tests.test_pin_thread',
'pyspark.tests.test_profiler',
+ 'pyspark.tests.test_rdd', 'pyspark.tests.test_rddbarrier',
'pyspark.tests.test_readwrite',
+ 'pyspark.tests.test_serializers', 'pyspark.tests.test_shuffle',
+ 'pyspark.tests.test_taskcontext', 'pyspark.tests.test_util',
'pyspark.tests.test_worker']
+ >>> sorted([x for x in
_discover_python_unittests([("pyspark/pandas/tests", "slow")])])
+ ... # doctest: +NORMALIZE_WHITESPACE
+ ['pyspark.pandas.tests.indexes.test_base',
'pyspark.pandas.tests.indexes.test_datetime',
+ 'pyspark.pandas.tests.test_dataframe', 'pyspark.pandas.tests.test_groupby',
+ 'pyspark.pandas.tests.test_indexing',
'pyspark.pandas.tests.test_ops_on_diff_frames',
+ 'pyspark.pandas.tests.test_ops_on_diff_frames_groupby',
'pyspark.pandas.tests.test_series',
+ 'pyspark.pandas.tests.test_stats']
+ >>> sorted([x for x in _discover_python_unittests([('pyspark/tests',
'slow')])])
+ []
+ """
+ if not paths:
+ return []
+ modules = set()
+ pyspark_path = os.path.join(SPARK_HOME, "python")
+ for path in paths:
+ slow_only = False
+ if isinstance(path, tuple) and len(path) == 2 and path[1] == "slow":
+ slow_only = True
+ path = path[0]
+ real_path = os.path.join(pyspark_path, path)
+ prefix = path.replace('/', '.')
+ # Travel the module under the real_path
+ for importer, module_name, ispkg in pkgutil.walk_packages([real_path],
prefix=prefix+'.'):
+ if _contain_unittests_class(module_name, slow_only):
+ modules.add(module_name)
+
+ return sorted(list(modules))
+
+
+def _append_discovred_goals(all_modules):
Review comment:
nit: `discovered`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]