Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21107#discussion_r182779098
  
    --- Diff: python/run-tests.py ---
    @@ -152,65 +171,17 @@ def parse_opts():
         return opts
     
     
    -def _check_dependencies(python_exec, modules_to_test):
    -    if "COVERAGE_PROCESS_START" in os.environ:
    -        # Make sure if coverage is installed.
    -        try:
    -            subprocess_check_output(
    -                [python_exec, "-c", "import coverage"],
    -                stderr=open(os.devnull, 'w'))
    -        except:
    -            print_red("Coverage is not installed in Python executable '%s' 
"
    -                      "but 'COVERAGE_PROCESS_START' environment variable 
is set, "
    -                      "exiting." % python_exec)
    -            sys.exit(-1)
    -
    -    # If we should test 'pyspark-sql', it checks if PyArrow and Pandas are 
installed and
    -    # explicitly prints out. See SPARK-23300.
    -    if pyspark_sql in modules_to_test:
    -        # TODO(HyukjinKwon): Relocate and deduplicate these version 
specifications.
    -        minimum_pyarrow_version = '0.8.0'
    -        minimum_pandas_version = '0.19.2'
    -
    -        try:
    -            pyarrow_version = subprocess_check_output(
    -                [python_exec, "-c", "import pyarrow; 
print(pyarrow.__version__)"],
    -                universal_newlines=True,
    -                stderr=open(os.devnull, 'w')).strip()
    -            if LooseVersion(pyarrow_version) >= 
LooseVersion(minimum_pyarrow_version):
    -                LOGGER.info("Will test PyArrow related features against 
Python executable "
    -                            "'%s' in '%s' module." % (python_exec, 
pyspark_sql.name))
    -            else:
    -                LOGGER.warning(
    -                    "Will skip PyArrow related features against Python 
executable "
    -                    "'%s' in '%s' module. PyArrow >= %s is required; 
however, PyArrow "
    -                    "%s was found." % (
    -                        python_exec, pyspark_sql.name, 
minimum_pyarrow_version, pyarrow_version))
    -        except:
    -            LOGGER.warning(
    -                "Will skip PyArrow related features against Python 
executable "
    -                "'%s' in '%s' module. PyArrow >= %s is required; however, 
PyArrow "
    -                "was not found." % (python_exec, pyspark_sql.name, 
minimum_pyarrow_version))
    -
    -        try:
    -            pandas_version = subprocess_check_output(
    -                [python_exec, "-c", "import pandas; 
print(pandas.__version__)"],
    -                universal_newlines=True,
    -                stderr=open(os.devnull, 'w')).strip()
    -            if LooseVersion(pandas_version) >= 
LooseVersion(minimum_pandas_version):
    -                LOGGER.info("Will test Pandas related features against 
Python executable "
    -                            "'%s' in '%s' module." % (python_exec, 
pyspark_sql.name))
    -            else:
    -                LOGGER.warning(
    -                    "Will skip Pandas related features against Python 
executable "
    -                    "'%s' in '%s' module. Pandas >= %s is required; 
however, Pandas "
    -                    "%s was found." % (
    -                        python_exec, pyspark_sql.name, 
minimum_pandas_version, pandas_version))
    -        except:
    -            LOGGER.warning(
    -                "Will skip Pandas related features against Python 
executable "
    -                "'%s' in '%s' module. Pandas >= %s is required; however, 
Pandas "
    -                "was not found." % (python_exec, pyspark_sql.name, 
minimum_pandas_version))
    +def _check_coverage(python_exec):
    +    # Make sure if coverage is installed.
    +    try:
    +        subprocess_check_output(
    +            [python_exec, "-c", "import coverage"],
    +            stderr=open(os.devnull, 'w'))
    +    except:
    +        print_red("Coverage is not installed in Python executable '%s' "
    +                  "but 'COVERAGE_PROCESS_START' environment variable is 
set, "
    +                  "exiting." % python_exec)
    +        sys.exit(-1)
    --- End diff --
    
    Just in case anyone is worried: 
    
    ```
    
    Got an exception while trying to store skipped test output:
    Traceback (most recent call last):
      File "./python/run-tests.py", line 116, in run_individual_python_test
        per_test_output.seek()
    TypeError: seek() takes at least 1 argument (0 given)
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to