This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push: new fb28285 IMPALA-9107 (part 2): Add script to use the m2 archive tarball fb28285 is described below commit fb282852ef52d72079a86c55a90982ffac567cc7 Author: Joe McDonnell <joemcdonn...@cloudera.com> AuthorDate: Thu Apr 2 17:28:45 2020 -0700 IMPALA-9107 (part 2): Add script to use the m2 archive tarball This adds a script to find an appropriate m2 archive tarball, download it, and use it to prepopulate the ~/.m2 directory. The script uses the JSON interface for Jenkins to search through the all-build-options-ub1604 builds on jenkins.impala.io to find one that: 1. Is building the "master" branch 2. Has the m2_archive.tar.gz Then, it downloads the m2 archive and uses it to populate ~/.m2. It does not overwrite or remove any files already in ~/.m2. The build scripts that call populate_m2_directory.py do not rely on the script succeeding. They will continue even if the script fails. This also modifies the build-all-flag-combinations.sh script to only build the m2 archive if the GENERATE_M2_ARCHIVE environment variable is true. GENERATE_M2_ARCHIVE=true will clear out the ~/.m2 directory to build an accurate m2 archive. Precommit jobs will use GENERATE_M2_ARCHIVE=false, which will allow them to use the m2 archive to speed up the build. Testing: - Ran gerrify-verify-dryrun - Tested locally Change-Id: I5065658d8c0514550927161855b0943fa7b3a402 Reviewed-on: http://gerrit.cloudera.org:8080/15735 Reviewed-by: Joe McDonnell <joemcdonn...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- bin/bootstrap_build.sh | 5 + bin/bootstrap_system.sh | 5 + bin/jenkins/build-all-flag-combinations.sh | 17 ++- bin/jenkins/populate_m2_directory.py | 172 +++++++++++++++++++++++++++++ 4 files changed, 195 insertions(+), 4 deletions(-) diff --git a/bin/bootstrap_build.sh b/bin/bootstrap_build.sh index 1168bb0..a450ef7 100755 --- a/bin/bootstrap_build.sh +++ b/bin/bootstrap_build.sh @@ -54,4 +54,9 @@ if [ ! -d /usr/local/apache-maven-3.5.4 ]; then sudo ln -s /usr/local/apache-maven-3.5.4/bin/mvn /usr/local/bin fi +# Try to prepopulate the m2 directory to save time +if ! bin/jenkins/populate_m2_directory.py ; then + echo "Failed to prepopulate the m2 directory. Continuing..." +fi + ./buildall.sh -notests -so diff --git a/bin/bootstrap_system.sh b/bin/bootstrap_system.sh index a52083d..18cce2b 100755 --- a/bin/bootstrap_system.sh +++ b/bin/bootstrap_system.sh @@ -471,3 +471,8 @@ fi cd "$HADOOP_LZO_HOME" time -p ant package cd "$IMPALA_HOME" + +# Try to prepopulate the m2 directory to save time +if ! bin/jenkins/populate_m2_directory.py ; then + echo "Failed to prepopulate the m2 directory. Continuing..." +fi diff --git a/bin/jenkins/build-all-flag-combinations.sh b/bin/jenkins/build-all-flag-combinations.sh index a6a0d2c..9209e48 100755 --- a/bin/jenkins/build-all-flag-combinations.sh +++ b/bin/jenkins/build-all-flag-combinations.sh @@ -32,6 +32,8 @@ export IMPALA_MAVEN_OPTIONS="-U" . bin/impala-config.sh +: ${GENERATE_M2_ARCHIVE:=false} + # These are configurations for buildall. CONFIGS=( # Test gcc builds with and without -so: @@ -46,6 +48,13 @@ CONFIGS=( FAILED="" +if [[ "$GENERATE_M2_ARCHIVE" == true ]]; then + # The m2 archive relies on parsing the maven log to get a list of jars downloaded + # from particular repositories. To accurately produce the archive every time, we + # need to clear out the ~/.m2 directory before producing the archive. + rm -rf ~/.m2 +fi + TMP_DIR=$(mktemp -d) function onexit { echo "$0: Cleaning up temporary directory" @@ -53,8 +62,6 @@ function onexit { } trap onexit EXIT -mkdir -p ${TMP_DIR} - for CONFIG in "${CONFIGS[@]}"; do DESCRIPTION="Options $CONFIG" @@ -91,7 +98,9 @@ then exit 1 fi -# Make a tarball of the .m2 directory -bin/jenkins/archive_m2_directory.sh logs/mvn/mvn_accumulated.log logs/m2_archive.tar.gz +if [[ "$GENERATE_M2_ARCHIVE" == true ]]; then + # Make a tarball of the .m2 directory + bin/jenkins/archive_m2_directory.sh logs/mvn/mvn_accumulated.log logs/m2_archive.tar.gz +fi # Note: The exit callback handles cleanup of the temp directory. diff --git a/bin/jenkins/populate_m2_directory.py b/bin/jenkins/populate_m2_directory.py new file mode 100755 index 0000000..1570189 --- /dev/null +++ b/bin/jenkins/populate_m2_directory.py @@ -0,0 +1,172 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import subprocess +import os +import shutil +from tempfile import mkdtemp + +ALL_BUILD_OPTIONS_JOB = "all-build-options-ub1604" +JENKINS_IMPALA_IO = "jenkins.impala.io" +M2_ARCHIVE_NAME = "m2_archive.tar.gz" + + +class JenkinsBuild(object): + """ + Basic information about a Jenkins build (number, url) to allow retrieving + more detailed information. + """ + def __init__(self, number, url): + self.number = number + self.url = url + + +class JenkinsBuildDetails(object): + """ + Detailed information about the parameters and artifacts for a particular + Jenkins build. + """ + def __init__(self, parameter_dict, artifact_dict): + self.parameter_dict = parameter_dict + self.artifact_dict = artifact_dict + + +def get_build_list(jenkins_server, job): + """ + Get the list of recent builds for the specified job on the jenkins server. + This returns a list of JenkinsBuild objects containing the build numbers + and corresponding urls. + """ + # Make a temporary directory + tmpdir = mkdtemp() + + json_dict = {} + try: + # This uses Jenkin's JSON API to get the list of build numbers for this job + # along with the URL to each build. This downloads the JSON to a temporary file + # and reads it back. This uses wget to avoid any python dependencies. + json_url_tmpl = "https://{0}/job/{1}/api/json?tree=builds[number,url]&pretty=true" + json_url = json_url_tmpl.format(jenkins_server, job) + json_filename = os.path.join(tmpdir, "job_{0}_build_list.json".format(job)) + subprocess.check_call(["wget", "-q", json_url, "-O", json_filename]) + # Open the JSON file + with open(json_filename) as f: + json_dict = json.load(f) + finally: + # Cleanup temporary directory + shutil.rmtree(tmpdir) + + # Convert the JSON dictionaries to JenkinsBuild objects + builds = [] + for build_info in json_dict["builds"]: + builds.append(JenkinsBuild(build_info["number"], build_info["url"])) + + return builds + + +def get_build_details(build): + """ + Download detailed build information for the build number at the provided URL using + the Jenkins JSON API. This returns a JenkinsBuildDetails, which includes information + about the parameters of the Jenkins job and the artifacts produced by the Jenkins job. + """ + + tmpdir = mkdtemp() + json_dict = {} + try: + # This downloads a json job to the temporary directory + json_url = "{0}/api/json?&pretty=true".format(build.url) + json_filename = os.path.join(tmpdir, "build_details_{0}.json".format(build.number)) + subprocess.check_call(["wget", "-q", json_url, "-O", json_filename]) + # Parse the JSON file + json_dict = {} + with open(json_filename) as f: + json_dict = json.load(f) + finally: + shutil.rmtree(tmpdir) + + # Convert the JSON dictionaries to a JenkinsBuildDetail object + parameter_dict = {} + parameter_section = None + for section in json_dict["actions"]: + if "parameters" in section: + parameter_section = section["parameters"] + break + for parameter in parameter_section: + parameter_dict[parameter["name"]] = parameter["value"] + + artifact_dict = {} + for artifact in json_dict["artifacts"]: + artifact_url = "{0}/artifact/{1}".format(build.url, artifact["relativePath"]) + artifact_dict[artifact["fileName"]] = artifact_url + + return JenkinsBuildDetails(parameter_dict, artifact_dict) + + +def get_m2_archive_url(jenkins_server, jenkins_job): + # Get the JSON list of builds for the all-build-options-ub1604 job. This code + # is specific to how the Jenkins job is structured (i.e. parameters, archives), + # so this is not generic. + build_list = get_build_list(jenkins_server, jenkins_job) + + for build in build_list: + # Go get more detailed information about the job + build_details = get_build_details(build) + # There are two criteria for a valid m2 archive: + # 1. The build is based on the master branch + # 2. The build has the appropriate m2 archive artifact + is_master_build = "IMPALA_REPO_BRANCH" in build_details.parameter_dict and \ + build_details.parameter_dict["IMPALA_REPO_BRANCH"] == "master" + has_m2_archive = M2_ARCHIVE_NAME in build_details.artifact_dict + if is_master_build and has_m2_archive: + return build_details.artifact_dict[M2_ARCHIVE_NAME] + + return None + + +def download_and_unpack_m2_archive(url, directory): + print("Downloading m2 archive from {0} to {1}".format(url, directory)) + tarball_name = os.path.basename(url) + tmp_tarball_location = os.path.join(directory, tarball_name) + subprocess.check_call(["wget", "-q", url, "-O", tmp_tarball_location]) + m2_directory = os.path.expanduser("~/.m2") + if not os.path.exists(m2_directory): + print("{0} does not exist, creating...".format(m2_directory)) + os.makedirs(m2_directory) + print("Unpacking {0} to {1}".format(tarball_name, m2_directory)) + tar_command = ["tar", "-zxf", tmp_tarball_location] + # Unpack into m2 directory, but don't overwrite any files + tar_command.extend(["-C", m2_directory, "--skip-old-files"]) + subprocess.check_call(tar_command) + + +def main(): + m2_archive_url = get_m2_archive_url(JENKINS_IMPALA_IO, ALL_BUILD_OPTIONS_JOB) + if not m2_archive_url: + print("Could not find any m2 archive for {0} {1}".format(JENKINS_IMPALA_IO, + ALL_BUILD_OPTIONS_JOB)) + tmpdir = mkdtemp() + try: + download_and_unpack_m2_archive(m2_archive_url, tmpdir) + finally: + shutil.rmtree(tmpdir) + + +if __name__ == "__main__": + main()