Yikun commented on a change in pull request #33174:
URL: https://github.com/apache/spark/pull/33174#discussion_r663592493



##########
File path: python/run-tests.py
##########
@@ -40,6 +44,111 @@
 from sparktestsupport.shellutils import which, subprocess_check_output  # noqa
 from sparktestsupport.modules import all_modules, pyspark_sql  # noqa
 
+# Make sure logging config before any possible logging print
+logging.basicConfig(stream=sys.stdout, format="%(message)s")
+LOGGER = logging.getLogger()
+
+
+def _contain_unittests_class(module_name, slow=False):
+    """
+    Check if the module with specific module has classes are derived from 
unittest.TestCase.
+    Such as:
+    pyspark.tests.test_appsubmit, it will return True, because there is 
SparkSubmitTests which is
+    included under the module of pyspark.tests.test_appsubmit, inherits from 
unittest.TestCase.
+
+    Parameters
+    ----------
+    module_name : str
+        The module name to be check
+    slow : bool
+        Return True if module contains unittests and is_slow_test is marked as 
True.
+
+    Returns
+    -------
+    True if contains unittest classes otherwise False. An 
``ModuleNotFoundError`` will raise if the
+    module is not found.
+
+    >>> _contain_unittests_class("pyspark.tests.test_appsubmit")
+    True
+    >>> _contain_unittests_class("pyspark.conf")
+    False
+    >>> _contain_unittests_class("pyspark.pandas.tests.test_dataframe", 
slow=True)
+    True
+    >>> _contain_unittests_class("pyspark.pandas.tests.test_dataframe")
+    False
+    """
+    module = import_module(module_name)
+    for _, _class in inspect.getmembers(module, inspect.isclass):
+        if issubclass(_class, unittest.TestCase):
+            if slow and hasattr(module, 'is_slow_test'):
+                return True
+            if not slow and not hasattr(module, 'is_slow_test'):
+                return True
+    return False
+
+
+def _discover_python_unittests(paths):
+    """Discover the python module which contains unittests under paths.
+
+    Such as:
+    ['pyspark/tests'], it will return the set of module name under the path of 
pyspark/tests, like
+    {'pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast', ...}
+
+    Parameters
+    ----------
+    paths : list
+        Paths of modules to be discovered.
+
+    Returns
+    -------
+    A set of complete test module name discovered under specified paths
+
+    >>> sorted([x for x in _discover_python_unittests(['pyspark/tests'])])
+    ... # doctest: +NORMALIZE_WHITESPACE
+    ['pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast', 
'pyspark.tests.test_conf',
+    'pyspark.tests.test_context', 'pyspark.tests.test_daemon', 
'pyspark.tests.test_install_spark',
+    'pyspark.tests.test_join', 'pyspark.tests.test_pin_thread', 
'pyspark.tests.test_profiler',
+    'pyspark.tests.test_rdd', 'pyspark.tests.test_rddbarrier', 
'pyspark.tests.test_readwrite',
+    'pyspark.tests.test_serializers', 'pyspark.tests.test_shuffle',
+    'pyspark.tests.test_taskcontext', 'pyspark.tests.test_util', 
'pyspark.tests.test_worker']
+    >>> sorted([x for x in 
_discover_python_unittests([("pyspark/pandas/tests", "slow")])])
+    ... # doctest: +NORMALIZE_WHITESPACE
+    ['pyspark.pandas.tests.indexes.test_base', 
'pyspark.pandas.tests.indexes.test_datetime',
+    'pyspark.pandas.tests.test_dataframe', 'pyspark.pandas.tests.test_groupby',
+    'pyspark.pandas.tests.test_indexing', 
'pyspark.pandas.tests.test_ops_on_diff_frames',
+    'pyspark.pandas.tests.test_ops_on_diff_frames_groupby', 
'pyspark.pandas.tests.test_series',
+    'pyspark.pandas.tests.test_stats']
+    >>> sorted([x for x in _discover_python_unittests([('pyspark/tests', 
'slow')])])
+    []
+    """
+    if not paths:
+        return []
+    modules = set()
+    pyspark_path = os.path.join(SPARK_HOME, "python")
+    for path in paths:
+        slow_only = False
+        if isinstance(path, tuple) and len(path) == 2 and path[1] == "slow":

Review comment:
       Sure




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to