Yikun commented on a change in pull request #33174:
URL: https://github.com/apache/spark/pull/33174#discussion_r662248302
##########
File path: python/run-tests.py
##########
@@ -40,6 +42,99 @@
from sparktestsupport.shellutils import which, subprocess_check_output # noqa
from sparktestsupport.modules import all_modules, pyspark_sql # noqa
+# Make sure logging config before any possible logging print
+logging.basicConfig(stream=sys.stdout, format="%(message)s")
+LOGGER = logging.getLogger()
+
+
+def _get_module_from_name(name):
+ __import__(name)
+ return sys.modules[name]
+
+
+def _discover_python_unittests(paths, discover_slow=False):
+ """Discover the python module which contains unittests under paths.
+
+ Such as:
+ ['pyspark/tests'], it will return the set of module name under the path of
pyspark/tests, like
+ {'pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast', ...}
+
+ Parameters
+ ----------
+ paths : list
+ Paths of modules to be discovered.
+ discover_slow : bool
+ If True, will only discover slow tests
+ If False, will discover all tests except slow tests
+
+ Returns
+ -------
+ A set of complete test module name discovered under specified paths
+
+ >>> sorted([x for x in _discover_python_unittests(['pyspark/tests'])])
+ ... # doctest: +NORMALIZE_WHITESPACE
+ ['pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast',
'pyspark.tests.test_conf',
+ 'pyspark.tests.test_context', 'pyspark.tests.test_daemon',
'pyspark.tests.test_install_spark',
+ 'pyspark.tests.test_join', 'pyspark.tests.test_pin_thread',
'pyspark.tests.test_profiler',
+ 'pyspark.tests.test_rdd', 'pyspark.tests.test_rddbarrier',
'pyspark.tests.test_readwrite',
+ 'pyspark.tests.test_serializers', 'pyspark.tests.test_shuffle',
+ 'pyspark.tests.test_taskcontext', 'pyspark.tests.test_util',
'pyspark.tests.test_worker']
+ >>> sorted([x for x in
_discover_python_unittests([("pyspark/pandas/tests", "slow")])])
+ ... # doctest: +NORMALIZE_WHITESPACE
+ ['pyspark.pandas.tests.indexes.test_base',
'pyspark.pandas.tests.indexes.test_datetime',
+ 'pyspark.pandas.tests.test_dataframe', 'pyspark.pandas.tests.test_groupby',
+ 'pyspark.pandas.tests.test_indexing',
'pyspark.pandas.tests.test_ops_on_diff_frames',
+ 'pyspark.pandas.tests.test_ops_on_diff_frames_groupby',
'pyspark.pandas.tests.test_series',
+ 'pyspark.pandas.tests.test_stats']
+ """
+
+ def add_test_module(testcases, modules, slow):
+ """Append the testcases module names to modules set"""
+ if isinstance(testcases, Iterable):
+ for test_case in testcases:
+ add_test_module(test_case, modules, slow)
+ else:
+ name = testcases.__module__
+ module = _get_module_from_name(name)
+ if slow and hasattr(module, 'is_slow_test'):
+ modules.add(name)
+ if not slow and not hasattr(module, 'is_slow_test'):
+ modules.add(name)
+
+ if not paths:
+ return []
+ modules = set()
+ pyspark_path = os.path.join(SPARK_HOME, "python")
+ for path in paths:
+ if isinstance(path, tuple) and len(path) >= 2 and path[1] == "slow":
Review comment:
reset discover_slow
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]