Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/21107#discussion_r182779098
--- Diff: python/run-tests.py ---
@@ -152,65 +171,17 @@ def parse_opts():
return opts
-def _check_dependencies(python_exec, modules_to_test):
- if "COVERAGE_PROCESS_START" in os.environ:
- # Make sure if coverage is installed.
- try:
- subprocess_check_output(
- [python_exec, "-c", "import coverage"],
- stderr=open(os.devnull, 'w'))
- except:
- print_red("Coverage is not installed in Python executable '%s'
"
- "but 'COVERAGE_PROCESS_START' environment variable
is set, "
- "exiting." % python_exec)
- sys.exit(-1)
-
- # If we should test 'pyspark-sql', it checks if PyArrow and Pandas are
installed and
- # explicitly prints out. See SPARK-23300.
- if pyspark_sql in modules_to_test:
- # TODO(HyukjinKwon): Relocate and deduplicate these version
specifications.
- minimum_pyarrow_version = '0.8.0'
- minimum_pandas_version = '0.19.2'
-
- try:
- pyarrow_version = subprocess_check_output(
- [python_exec, "-c", "import pyarrow;
print(pyarrow.__version__)"],
- universal_newlines=True,
- stderr=open(os.devnull, 'w')).strip()
- if LooseVersion(pyarrow_version) >=
LooseVersion(minimum_pyarrow_version):
- LOGGER.info("Will test PyArrow related features against
Python executable "
- "'%s' in '%s' module." % (python_exec,
pyspark_sql.name))
- else:
- LOGGER.warning(
- "Will skip PyArrow related features against Python
executable "
- "'%s' in '%s' module. PyArrow >= %s is required;
however, PyArrow "
- "%s was found." % (
- python_exec, pyspark_sql.name,
minimum_pyarrow_version, pyarrow_version))
- except:
- LOGGER.warning(
- "Will skip PyArrow related features against Python
executable "
- "'%s' in '%s' module. PyArrow >= %s is required; however,
PyArrow "
- "was not found." % (python_exec, pyspark_sql.name,
minimum_pyarrow_version))
-
- try:
- pandas_version = subprocess_check_output(
- [python_exec, "-c", "import pandas;
print(pandas.__version__)"],
- universal_newlines=True,
- stderr=open(os.devnull, 'w')).strip()
- if LooseVersion(pandas_version) >=
LooseVersion(minimum_pandas_version):
- LOGGER.info("Will test Pandas related features against
Python executable "
- "'%s' in '%s' module." % (python_exec,
pyspark_sql.name))
- else:
- LOGGER.warning(
- "Will skip Pandas related features against Python
executable "
- "'%s' in '%s' module. Pandas >= %s is required;
however, Pandas "
- "%s was found." % (
- python_exec, pyspark_sql.name,
minimum_pandas_version, pandas_version))
- except:
- LOGGER.warning(
- "Will skip Pandas related features against Python
executable "
- "'%s' in '%s' module. Pandas >= %s is required; however,
Pandas "
- "was not found." % (python_exec, pyspark_sql.name,
minimum_pandas_version))
+def _check_coverage(python_exec):
+ # Make sure if coverage is installed.
+ try:
+ subprocess_check_output(
+ [python_exec, "-c", "import coverage"],
+ stderr=open(os.devnull, 'w'))
+ except:
+ print_red("Coverage is not installed in Python executable '%s' "
+ "but 'COVERAGE_PROCESS_START' environment variable is
set, "
+ "exiting." % python_exec)
+ sys.exit(-1)
--- End diff --
Just in case anyone is worried:
```
Got an exception while trying to store skipped test output:
Traceback (most recent call last):
File "./python/run-tests.py", line 116, in run_individual_python_test
per_test_output.seek()
TypeError: seek() takes at least 1 argument (0 given)
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]