[GitHub] spark pull request: [SPARK-7017][Build][Project Infra]: Refactor d...

brennonyork Sat, 25 Apr 2015 18:04:57 -0700

Github user brennonyork commented on a diff in the pull request:

    https://github.com/apache/spark/pull/5694#discussion_r29105800
  
    --- Diff: dev/run-tests ---
    @@ -17,239 +17,394 @@
     # limitations under the License.
     #
     
    -# Go to the Spark project root directory
    -FWDIR="$(cd "`dirname $0`"/..; pwd)"
    -cd "$FWDIR"
    +import os
    +import re
    +import shutil
    +import subprocess as sp
    +
    +# Set the Spark project root directory
    +spark_proj_root = os.path.abspath("..")
    +# Set the user 'HOME' directory
    +user_home_dir = os.environ.get("HOME")
    +# Set the sbt maven profile arguments environment variable name
    +sbt_maven_profile_args_env = "SBT_MAVEN_PROFILES_ARGS"
    +# Set the amplab jenkins build tool environment variable name
    +amplab_jenkins_build_tool_env = "AMPLAB_JENKINS_BUILD_TOOL"
    +# Set the amplab jenkins build tool environment value
    +amplab_jenkins_build_tool = os.environ.get(amplab_jenkins_build_tool_env)
    +# Set whether we're on an Amplab Jenkins box by checking for a specific
    +# environment variable
    +amplab_jenkins = os.environ.get("AMPLAB_JENKINS")
    +# Set the pattern for sbt output e.g. "[info] Resolving ..."
    +resolving_re = "^.*[info].*Resolving"
    +# Set the pattern for sbt output e.g. "[warn] Merging ..."
    +merging_re = "^.*[warn].*Merging"
    +# Set the pattern for sbt output e.g. "[info] Including ..."
    +including_re = "^.*[info].*Including"
    +# Compile the various regex patterns into a filter
    +sbt_output_filter = re.compile(resolving_re + "|" + 
    +                               merging_re + "|" +
    +                               including_re)
    +
    +def get_error_codes(err_code_file):
    +    """Function to retrieve all block numbers from the `run-tests-codes.sh`
    +    file to maintain backwards compatibility with the `run-tests-jenkins` 
    +    script"""
    +    
    +    with open(err_code_file, 'r') as f:
    +        err_codes = [e.split()[1].strip().split('=') 
    +                     for e in f if e.startswith("readonly")]
    +        return dict(err_codes)
    +
    +def rm_r(path):
    +    """Given an arbitrary path properly remove it with the correct python
    +    construct if it exists
    +    - from: http://stackoverflow.com/a/9559881""";
    +
    +    if os.path.isdir(path):
    +        shutil.rmtree(path)
    +    elif os.path.exists(path):
    +        os.remove(path)
    +
    +def lineno():
    +    """Returns the current line number in our program
    +    - from: http://stackoverflow.com/a/3056059""";
    +
    +    return inspect.currentframe().f_back.f_lineno
    +
    +def set_sbt_maven_profile_args():
    +    """Properly sets the SBT environment variable arguments with additional
    +    checks to determine if this is running on an Amplab Jenkins machine"""
    +
    +    # base environment values for sbt_maven_profile_args_env which will be 
appended on
    +    sbt_maven_profile_args_base = ["-Pkinesis-asl"]
    +
    +    sbt_maven_profile_arg_dict = {
    +        "hadoop1.0" : ["-Dhadoop.version=1.0.4"],
    +        "hadoop2.0" : ["-Dhadoop.version=2.0.0-mr1-cdh4.1.1"],
    +        "hadoop2.2" : ["-Pyarn", "-Phadoop-2.2", "-Dhadoop.version=2.2.0"],
    +        "hadoop2.3" : ["-Pyarn", "-Phadoop-2.3", "-Dhadoop.version=2.3.0"],
    +    }
    +
    +    # set the SBT maven build profile argument environment variable and 
ensure
    +    # we build against the right version of Hadoop
    +    if os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE"):
    +        os.environ[sbt_maven_profile_args_env] = \
    +            " ".join(sbt_maven_profile_arg_dict.get(ajbp, []) 
    +                     + sbt_maven_profile_args_base)
    +    else:
    +        os.environ[sbt_maven_profile_args_env] = \
    +            " ".join(sbt_maven_profile_arg_dict.get("hadoop2.3", [])
    +                     + sbt_maven_profile_args_base)
    +
    +def is_exe(path):
    +    """Check if a given path is an executable file
    +    - from: http://stackoverflow.com/a/377028""";
    +
    +    return os.path.isfile(path) and os.access(path, os.X_OK)
    +
    +def which(program):
    +    """Find and return the given program by its absolute path or 'None'
    +    - from: http://stackoverflow.com/a/377028""";
    +
    +    fpath, fname = os.path.split(program)
    +
    +    if fpath:
    +        if is_exe(program):
    +            return program
    +    else:
    +        for path in os.environ.get("PATH").split(os.pathsep):
    +            path = path.strip('"')
    +            exe_file = os.path.join(path, program)
    +            if is_exe(exe_file):
    +                return exe_file
    +    return None
    +
    +def determine_java_executable():
    +    """Will return the *best* path possible for a 'java' executable or 
`None`"""
    +
    +    java_home = os.environ.get("JAVA_HOME")
    +
    +    # check if there is an executable at $JAVA_HOME/bin/java
    +    java_exe = which(os.path.join(java_home, "bin/java"))
    +    # if the java_exe wasn't set, check for a `java` version on the $PATH
    +    return java_exe if java_exe else which("java")
    +
    +def determine_java_version(java_exe):
    +    """Given a valid java executable will return its version in tuple 
format as:
    +    [<major-version>, <minor-version>, <patch-version>, 
<update-version>]"""
    +
    +    raw_output = sp.check_output([java_exe, "-version"], stderr=sp.STDOUT)
    +    raw_version_str = raw_output.split('\n')[0] # eg 'java version 
"1.8.0_25"'
    +    version_str = raw_version_str.split()[-1].strip('"') # eg '1.8.0_25'
    +    version, update = version_str.split('_') # eg ['1.8.0', '25']
    +
    +    # map over the values and convert them to integers
    +    return map(lambda x: int(x), version.split('.') + [update])
    +
    +def multi_starts_with(orig_str, *prefixes):
    +    """Takes a string and an abritrary number of prefixes then checks the
    +    original string for any of the possible prefixes passed in"""
    +
    +    for s in prefixes:
    +        if orig_str.startswith(s):
    +            return True
    +    return False
    +
    +# This function current acts to determine if SQL tests need to be run in
    +# addition to the core test suite *or* if _only_ SQL tests need to be run
    +# as the git logs show that to be the only thing touched. In the future
    +# this function will act more generically to help further segregate the
    +# test suite runner (hence the function name).
    +# @return a set of unique test names
    +def determine_test_suite():
    +    test_suite = list()
    +
    +    if amplab_jenkins:
    +        sp.Popen(['git', 'fetch', 'origin', 'master:master']).wait()
    +
    +        raw_output = sp.check_output(['git', 'diff', '--name-only', 
'master'])
    +        # remove any empty strings
    +        changed_files = [f for f in raw_output.split('\n') if f]
    +
    +        # find any sql files
    +        sql_files = [f for f in changed_files
    +                     if multi_starts_with(f, 
    +                                          "sql/", 
    +                                          "bin/spark-sql", 
    +                                          "sbin/start-thriftserver.sh")]
    +
    +        non_sql_files = set(changed_files).difference(set(sql_files))
    +
    +        if non_sql_files:
    +            test_suite.append("CORE")
    +        if sql_files:
    +            print "[info] Detected changes in SQL. Will run Hive test 
suite."
    +            test_suite.append("SQL")
    +            if not non_sql_files:
    +                print "[info] Detected no changes except in SQL. Will only 
run SQL tests."
    +        return set(test_suite)
    +    else:
    +        # we aren't in the Amplab environment so merely run all tests
    +        test_suite.append("CORE")
    +        test_suite.append("SQL")
    +        return set(test_suite)
    +
    +def set_title_and_block(title, err_block):
    +    os.environ["CURRENT_BLOCK"] = error_codes[err_block]
    +    line_str = "".join(['='] * 72)
    +
    +    print
    +    print line_str
    +    print title
    +    print line_str
    +
    +def run_cmd(cmd):
    +    """Given a command as a list of arguments will attempt to execute the
    +    command and, on failure, print an error message"""
    +
    +    if not isinstance(cmd, list):
    +        cmd = cmd.split()
    +    try:
    +        sp.check_output(cmd)
    +    except sp.CalledProcessError as e:
    +        print "[error] running", e.cmd, "; received return code", 
e.returncode
    +        exit(e.returncode)
    +
    +def run_apache_rat_checks():
    +    set_title_and_block("Running Apache RAT checks", "BLOCK_RAT")
    +    run_cmd(["./dev/check-license"])
    +
    +def run_scala_style_checks():
    +    set_title_and_block("Running Scala style checks", "BLOCK_SCALA_STYLE")
    +    run_cmd(["./dev/lint-scala"])
    +
    +def run_python_style_checks():
    +    set_title_and_block("Running Python style checks", 
"BLOCK_PYTHON_STYLE")
    +    run_cmd(["./dev/lint-python"])
    +
    +def exec_maven(mvn_args = []):
    +    """Will call Maven in the current directory with the list of mvn_args 
passed
    +    in and returns the subprocess for any further processing"""
    +
    +    return sp.Popen(["./build/mvn"] + mvn_args)
    +
    +def exec_sbt(sbt_args = []):
    +    """Will call SBT in the current directory with the list of mvn_args 
passed
    +    in and returns the subprocess for any further processing"""
    +
    +    # NOTE: echo "q" is needed because sbt on encountering a build file
    +    # with failure (either resolution or compilation) prompts the user for
    +    # input either q, r, etc to quit or retry. This echo is there to make 
it
    +    # not block.
    +    echo_proc = sp.Popen(["echo", "\"q\n\""])
    +    sbt_proc = sp.Popen(["./build/sbt"] + sbt_args,
    +                        stdin=echo_proc.stdout,
    +                        stdout=sp.PIPE)
    +    echo_proc.wait()
    --- End diff --
    
    For this I actually tried every way I could to get `communicate()` to work, 
but, as it turns out, that will block and buffer the resultant `stdout` stream 
into a variable (i.e. `communicate()` returns a tuple of `(stdout, stderr)` and 
blocks). This was the main reason why I couldn't do that because I then 
couldn't execute the filter logic with the regex and would explode Jenkins 
logs. I'll look further, but [here is the 
doc](https://docs.python.org/2/library/subprocess.html#subprocess.Popen.communicate)
 that goes into it a bit further.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] spark pull request: [SPARK-7017][Build][Project Infra]: Refactor d...

Reply via email to