IMPALA-3778: Fix ASF packaging build

The tarballs in IMPALA_HOME/infra/python/deps and the thirdparty
directory have been removed in the ASF repository. All Python
dependencies and CDH components must now be downloaded as part of every
build. This caused the ASF packaging build to fail. Before this patch,
we used the system pip to download the Python dependencies, which caused
flakiness and inconsistency on different operating systems. This patch
fixes the problem by using our own script (which requires Python 2.6+ to
be installed on the system), to download all the files in
requirements.txt.

Also replaced all whl and zip Python packages with tar.gz to make it
consistent with the ASF build.

Change-Id: Ibe5a743096cda2059bd330805d324983f6730e19
Reviewed-on: http://gerrit.cloudera.org:8080/3647
Reviewed-by: Jim Apple <[email protected]>
Tested-by: Taras Bobrovytsky <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/baf8fe20
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/baf8fe20
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/baf8fe20

Branch: refs/heads/master
Commit: baf8fe202c1e212bbed3c73a6a63017eceb4a180
Parents: 7441032
Author: Taras Bobrovytsky <[email protected]>
Authored: Sat Jul 9 02:07:17 2016 +0000
Committer: Taras Bobrovytsky <[email protected]>
Committed: Thu Jul 14 19:04:45 2016 +0000

----------------------------------------------------------------------
 bin/impala-config.sh                    |  48 ++++++-----
 infra/python/deps/download_requirements |  28 ++----
 infra/python/deps/find_py26.py          |  41 +++++++++
 infra/python/deps/pip_download.py       | 124 ++++++++++++++++++++-------
 infra/python/deps/requirements.txt      |   2 +
 5 files changed, 170 insertions(+), 73 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/baf8fe20/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 9db640a..481d2cb 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -66,9 +66,14 @@ fi
 # If enabled, debug symbols are added to cross-compiled IR.
 : ${ENABLE_IMPALA_IR_DEBUG_INFO=false}
 
+if [ -d $IMPALA_HOME/thirdparty ]; then
+  NO_THIRDPARTY=false
+else
+  NO_THIRDPARTY=true
+fi
 # If true, download and use the CDH components from S3 instead of the ones
 # in $IMPALA_HOME/thirdparty.
-: ${DOWNLOAD_CDH_COMPONENTS=false}
+: ${DOWNLOAD_CDH_COMPONENTS=$NO_THIRDPARTY}
 
 export IMPALA_TOOLCHAIN
 export SKIP_TOOLCHAIN_BOOTSTRAP
@@ -426,26 +431,27 @@ alias 
gerrit-verify-merge="${IMPALA_AUX_TEST_HOME}/jenkins/gerrit-verify-merge.s
 # A marker in the environment to prove that we really did source this file
 export IMPALA_CONFIG_SOURCED=1
 
-echo "IMPALA_HOME            = $IMPALA_HOME"
-echo "HADOOP_HOME            = $HADOOP_HOME"
-echo "HADOOP_CONF_DIR        = $HADOOP_CONF_DIR"
-echo "MINI_DFS_BASE_DATA_DIR = $MINI_DFS_BASE_DATA_DIR"
-echo "HIVE_HOME              = $HIVE_HOME"
-echo "HIVE_CONF_DIR          = $HIVE_CONF_DIR"
-echo "HBASE_HOME             = $HBASE_HOME"
-echo "HBASE_CONF_DIR         = $HBASE_CONF_DIR"
-echo "MINIKDC_HOME           = $MINIKDC_HOME"
-echo "THRIFT_HOME            = $THRIFT_HOME"
-echo "HADOOP_LZO             = $HADOOP_LZO"
-echo "IMPALA_LZO             = $IMPALA_LZO"
-echo "CLASSPATH              = $CLASSPATH"
-echo "LIBHDFS_OPTS           = $LIBHDFS_OPTS"
-echo "PYTHONPATH             = $PYTHONPATH"
-echo "JAVA_HOME              = $JAVA_HOME"
-echo "LD_LIBRARY_PATH        = $LD_LIBRARY_PATH"
-echo "LD_PRELOAD             = $LD_PRELOAD"
-echo "POSTGRES_JDBC_DRIVER   = $POSTGRES_JDBC_DRIVER"
-echo "IMPALA_TOOLCHAIN       = $IMPALA_TOOLCHAIN"
+echo "IMPALA_HOME             = $IMPALA_HOME"
+echo "HADOOP_HOME             = $HADOOP_HOME"
+echo "HADOOP_CONF_DIR         = $HADOOP_CONF_DIR"
+echo "MINI_DFS_BASE_DATA_DIR  = $MINI_DFS_BASE_DATA_DIR"
+echo "HIVE_HOME               = $HIVE_HOME"
+echo "HIVE_CONF_DIR           = $HIVE_CONF_DIR"
+echo "HBASE_HOME              = $HBASE_HOME"
+echo "HBASE_CONF_DIR          = $HBASE_CONF_DIR"
+echo "MINIKDC_HOME            = $MINIKDC_HOME"
+echo "THRIFT_HOME             = $THRIFT_HOME"
+echo "HADOOP_LZO              = $HADOOP_LZO"
+echo "IMPALA_LZO              = $IMPALA_LZO"
+echo "CLASSPATH               = $CLASSPATH"
+echo "LIBHDFS_OPTS            = $LIBHDFS_OPTS"
+echo "PYTHONPATH              = $PYTHONPATH"
+echo "JAVA_HOME               = $JAVA_HOME"
+echo "LD_LIBRARY_PATH         = $LD_LIBRARY_PATH"
+echo "LD_PRELOAD              = $LD_PRELOAD"
+echo "POSTGRES_JDBC_DRIVER    = $POSTGRES_JDBC_DRIVER"
+echo "IMPALA_TOOLCHAIN        = $IMPALA_TOOLCHAIN"
+echo "DOWNLOAD_CDH_COMPONENTS = $DOWNLOAD_CDH_COMPONENTS"
 
 # Kerberos things.  If the cluster exists and is kerberized, source
 # the required environment.  This is required for any hadoop tool to

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/baf8fe20/infra/python/deps/download_requirements
----------------------------------------------------------------------
diff --git a/infra/python/deps/download_requirements 
b/infra/python/deps/download_requirements
index eee7a1b..e054653 100755
--- a/infra/python/deps/download_requirements
+++ b/infra/python/deps/download_requirements
@@ -2,31 +2,15 @@
 
 set -euo pipefail
 
-# Prefer the virtualenv pip and python since this is what will actually be 
used during the
-# installation and it may be a different version than the system default.
-VIRTUAL_ENV_PIP="$IMPALA_HOME"/infra/python/env/bin/pip
-if [[ -e "$VIRTUAL_ENV_PIP" ]]; then
-  PIP="$VIRTUAL_ENV_PIP"
-  # Assume python is also available in the virtualenv.
-  PYTHON="$IMPALA_HOME"/infra/python/env/bin/python
-else
-  PIP=pip
-  PYTHON=python
-fi
-
 DIR=$(dirname "$0")
-# Download but don't install all packages listed in requirements.txt.
-# Don't use pip to download Impyla because its setup.py requires a newer 
version
-# of setuptools than is available on some systems we support (e.g. CentOS 6).
-"$PIP" -q install --download "$DIR" -r <(grep -v "impyla" 
"$DIR"/requirements.txt)
 
-# Directly download required packages not listed in requirements.txt.
+pushd $DIR
+PY26=$(./find_py26.py)
+# Directly download packages listed in requirements.txt, but don't install 
them.
+$PY26 pip_download.py
 # For virtualenv, other scripts rely on the .tar.gz package (not a .whl 
package).
+$PY26 pip_download.py virtualenv 13.1.0
 # kudu-python is downloaded separately because pip install attempts to execute 
a
 # setup.py subcommand for kudu-python that can fail even if the download 
succeeds.
-pushd $DIR
-$PYTHON ./pip_download.py virtualenv 13.1.0
-$PYTHON ./pip_download.py kudu-python 0.1.1
-IMPYLA_VERSION=$(grep impyla ./requirements.txt | grep -o '[0-9.]*')
-$PYTHON ./pip_download.py impyla ${IMPYLA_VERSION}
+$PY26 pip_download.py kudu-python 0.1.1
 popd

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/baf8fe20/infra/python/deps/find_py26.py
----------------------------------------------------------------------
diff --git a/infra/python/deps/find_py26.py b/infra/python/deps/find_py26.py
new file mode 100755
index 0000000..2ba5d23
--- /dev/null
+++ b/infra/python/deps/find_py26.py
@@ -0,0 +1,41 @@
+#!/usr/bin/python
+# Copyright (c) 2015 Cloudera, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script finds Python 2.6 or higher on the system and outputs the
+# system command to stdout. The script exits with a nonzero exit code if
+# Python 2.6+ is not present.
+
+import os
+import subprocess
+import sys
+import textwrap
+
+def detect_python_cmd():
+  '''Returns the system command that provides python 2.6 or greater.'''
+  paths = os.getenv("PATH").split(os.path.pathsep)
+  for cmd in ("python", "python27", "python2.7", "python-27", "python-2.7", 
"python26",
+      "python2.6", "python-26", "python-2.6"):
+    for path in paths:
+      cmd_path = os.path.join(path, cmd)
+      if not os.path.exists(cmd_path) or not os.access(cmd_path, os.X_OK):
+        continue
+      exit = subprocess.call([cmd_path, "-c", textwrap.dedent("""
+          import sys
+          sys.exit(int(sys.version_info[:2] < (2, 6)))""")])
+      if exit == 0:
+        return cmd_path
+  raise Exception("Could not find minimum required python version 2.6")
+
+print detect_python_cmd()

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/baf8fe20/infra/python/deps/pip_download.py
----------------------------------------------------------------------
diff --git a/infra/python/deps/pip_download.py 
b/infra/python/deps/pip_download.py
index b1e1fa7..a3268b6 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -1,42 +1,106 @@
 #!/usr/bin/python
+# Copyright (c) 2015 Cloudera, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Implement the basic 'pip download' functionality in a way that gives us more 
control
 # over which archive type is downloaded and what post-download steps are 
executed.
-import hashlib
+# This script requires Python 2.6+.
+
 import json
 import os.path
-from urllib import urlopen, URLopener
 import sys
+from hashlib import md5
+from time import sleep
+from urllib import urlopen, URLopener
 
-pkg_name = sys.argv[1]
-pkg_version = sys.argv[2]
-pkg_type = 'sdist' # Don't download wheel archives for now
-pkg_info = json.loads(urlopen('https://pypi.python.org/pypi/%s/json' % 
pkg_name).read())
+NUM_TRIES = 3
 
 def check_md5sum(filename, expected_md5):
-  expected_md5 = pkg['md5_digest']
-  actual_md5 = hashlib.md5(open(filename).read()).hexdigest()
+  actual_md5 = md5(open(filename).read()).hexdigest()
   return actual_md5 == expected_md5
 
-found = False
-downloader = URLopener()
-for pkg in pkg_info['releases'][pkg_version]:
-  if pkg['packagetype'] == pkg_type:
-    filename = pkg['filename']
-    expected_md5 = pkg['md5_digest']
-    print "Downloading %s from %s " % (filename, pkg['url'])
-    if os.path.isfile(filename) and check_md5sum(filename, expected_md5):
-      print "File with matching md5sum already exists, skipping download."
-      found = True
-      break
-    downloader.retrieve(pkg['url'], filename)
-    actual_md5 = hashlib.md5(open(filename).read()).hexdigest()
-    if not check_md5sum(filename, expected_md5):
-      print "MD5 mismatch in file %s." % filename
-      sys.exit(1)
-    found = True
-    break
-
-if not found:
-  print "Could not find archive to download for %s %s %s" % (pkg_name, 
pkg_version,
-      pkg_type)
+def retry(func):
+  '''Retry decorator.'''
+
+  def wrapper(*args, **kwargs):
+    for _ in xrange(NUM_TRIES):
+      try:
+        result = func(*args, **kwargs)
+        if result: return result
+      except Exception as e:
+        print e
+      sleep(5)
+    print "Download failed after several attempts."
+    sys.exit(1)
+
+  return wrapper
+
+@retry
+def download_package(pkg_name, pkg_version):
+  '''Download the required package. Sometimes the download can be flaky, so we 
use the
+  retry decorator.'''
+  pkg_type = 'sdist' # Don't download wheel archives for now
+  pkg_info = json.loads(urlopen('https://pypi.python.org/pypi/%s/json' % 
pkg_name).read())
+
+  downloader = URLopener()
+  for pkg in pkg_info['releases'][pkg_version]:
+    if pkg['packagetype'] == pkg_type:
+      filename = pkg['filename']
+      expected_md5 = pkg['md5_digest']
+      if os.path.isfile(filename) and check_md5sum(filename, expected_md5):
+        print "File with matching md5sum already exists, skipping download."
+        return True
+      print "Downloading %s from %s " % (filename, pkg['url'])
+      downloader.retrieve(pkg['url'], filename)
+      actual_md5 = md5(open(filename).read()).hexdigest()
+      if check_md5sum(filename, expected_md5):
+        return True
+      else:
+        print "MD5 mismatch in file %s." % filename
+        return False
+  print "Could not find archive to download for %s %s %s" % (
+      pkg_name, pkg_version, pkg_type)
   sys.exit(1)
+
+def main():
+  if len(sys.argv) > 1:
+    _, pkg_name, pkg_version = sys.argv
+    download_package(pkg_name, pkg_version)
+  else:
+    # If the package name and version are not specified in the command line 
arguments,
+    # download the packages that in requirements.txt.
+    f = open("requirements.txt", 'r')
+    try:
+      # requirements.txt follows the standard pip grammar.
+      for line in f:
+        # A hash symbol ("#") represents a comment that should be ignored.
+        hash_index = line.find('#')
+        if hash_index != -1:
+          line = line[:hash_index]
+        # A semi colon (";") specifies some additional condition for when the 
package
+        # should be installed (for example a specific OS). We can ignore this 
and download
+        # the package anyways because the installation 
script(bootstrap_virtualenv.py) can
+        # take it into account.
+        semi_colon_index = line.find(';')
+        if semi_colon_index != -1:
+          line = line[:semi_colon_index]
+        l = line.strip()
+        if len(l) > 0:
+          pkg_name, pkg_version = l.split('==')
+          download_package(pkg_name.strip(), pkg_version.strip())
+    finally:
+      f.close()
+
+if __name__ == '__main__':
+  main()

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/baf8fe20/infra/python/deps/requirements.txt
----------------------------------------------------------------------
diff --git a/infra/python/deps/requirements.txt 
b/infra/python/deps/requirements.txt
index b3ebfb1..77e4be9 100644
--- a/infra/python/deps/requirements.txt
+++ b/infra/python/deps/requirements.txt
@@ -19,6 +19,7 @@ cm-api == 10.0.0
   readline == 6.2.4.1; sys_platform == 'darwin'
 Fabric == 1.10.2
   paramiko == 1.15.2
+    ecdsa == 0.13
   pycrypto == 2.6.1
 Flask == 0.10.1
   Jinja2 == 2.8
@@ -73,3 +74,4 @@ texttable == 0.8.3
 
 # For dev purposes, not used in scripting. Version 1.2.1 is the latest that 
supports 2.6.
 ipython == 1.2.1
+  apipkg == 1.4

Reply via email to