Repository: incubator-impala
Updated Branches:
  refs/heads/master 9f4d9ff68 -> d5cefe07c

IMPALA-4407: Move Impala setup procedures to main repo

Before this change, Impala has relied on a chef setup in for setting up a development
environment. This has a number of downsides:

1. It makes understanding what the script is doing difficult: there
are 40k or so lines in that repo last I checked.

2. It makes porting to new distributions difficult unless the
providers of various chef "recipes" have already ported their code.

3. It makes coordinated changes between the main repo and the
impala-setup repo more awkward.

This patch adds a shell script to replace that repo. It works on
Ubuntu 14.04 and 16.04, while impala-setup repo only works on 14.04
and the now-unmaintained Ubuntu 15.04.

Change-Id: I728abfa806ecd9461dfb443278c2a464714d984c
Reviewed-by: Jim Apple <>
Tested-by: Impala Public Jenkins


Branch: refs/heads/master
Commit: d5cefe07c931a0d3bf02bca97bbba05400d91a48
Parents: 9f4d9ff
Author: Jim Apple <>
Authored: Wed Aug 2 20:36:17 2017 -0700
Committer: Impala Public Jenkins <>
Committed: Fri Aug 11 06:03:04 2017 +0000

 bin/ | 202 +++++++++++++++++++++++++++++++-------
 1 file changed, 164 insertions(+), 38 deletions(-)
diff --git a/bin/ b/bin/
index f04a0b1..4754b55 100755
--- a/bin/
+++ b/bin/
@@ -18,63 +18,189 @@
 # under the License.
 # This script bootstraps a development environment from almost nothing; it is 
known to
-# work on Ubuntu 14.04, and it definitely clobbers some local environment, so 
it's best to
-# run this in a sandbox first, like a VM or docker.
+# work on Ubuntu 14.04 and 16.04. It clobbers some local environment and system
+# configurations, so it is best to run this in a fresh install. It also sets 
up the
+# ~/.bashrc for the calling user and with some 
+# variables to make Impala compile and run after this script is complete.
 # The intended user is a person who wants to start contributing code to 
Impala. This
-# script serves as an executable reference point for how to get started.
+# script serves as an executable reference point for how to get started. It 
takes about
+# two hours to complete.
-# At this time, it completes in about 6.5 hours. It generates and loads the 
test data and
-# metadata without using a snapshot (which takes about 3 hours) and it then 
runs the full
-# testsuite (frontend, backend, end-to-end, JDBC, and custom cluster) in "core"
-# exploration mode.
+# To run this in a Docker container:
+#   1. Run with --privileged
+#   2. Give the container a non-root sudoer wih NOPASSWD:
+#      apt-get update
+#      apt-get install sudo
+#      adduser --disabled-password --gecos '' impdev
+#      echo 'impdev ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+#   3. Run this script as that user: su - impdev -c /
+set -eu -o pipefail
+if [[ -t 1 ]] # if on an interactive terminal
+  echo "This script will clobber some system settings. Are you sure you want 
+  echo -n "continue? "
+  while true
+  do
+    read -p "[yes/no] " ANSWER
+    ANSWER=$(echo "$ANSWER" | tr /a-z/ /A-Z/)
+    if [[ $ANSWER = YES ]]
+    then
+      break
+    elif [[ $ANSWER = NO ]]
+    then
+      echo "OK, Bye!"
+      exit 1
+    fi
+  done
+  export DEBIAN_FRONTEND=noninteractive
-set -eux -o pipefail
+set -x
+source /etc/lsb-release
-if [[ ! -d "${HOMEDIR}" ]]
+if ! [[ $DISTRIB_ID = Ubuntu ]]
-    echo "${HOMEDIR} is needed for installing Impala dependencies"
-    exit 1
+  echo "This script only supports Ubuntu" >&2
+  exit 1
-if [[ -z "${JAVA_HOME}" ]]
+if ! [[ $DISTRIB_RELEASE = 14.04 || $DISTRIB_RELEASE = 16.04 ]]
-    echo "JAVA_HOME must be set to install Impala dependencies"
-    exit 1
+  echo "This script only supports Ubuntu 14.04 and 16.04" >&2
+  exit 1
-if ! sudo true
+sudo apt-get update
+sudo apt-get --yes install apt-utils
+sudo apt-get --yes install git
+# If there is no Impala git repo, get one now
+if ! [[ -d ~/Impala ]]
-    echo "Passwordless sudo is needed for this script"
-    exit 1
+  time -p git clone ~/Impala
+cd ~/Impala
+echo "$SET_IMPALA_HOME" >> ~/.bashrc
+sudo apt-get --yes install ccache g++ gcc libffi-dev liblzo2-dev libkrb5-dev \
+     libsasl2-dev libssl-dev make maven ninja-build ntp ntpdate python-dev \
+     python-setuptools postgresql ssh wget vim-common
-# Place to download setup scripts
-TMPDIR=$(mktemp -d)
-function cleanup {
-    rm -rf "${TMPDIR}"
-trap cleanup EXIT
+if ! { service --status-all | grep -E '^ \[ \+ \]  ssh$'; }
+  sudo service ssh start
-# Install build and test pre-reqs
-pushd "${TMPDIR}"
-git clone "${IMPALA_SETUP_REPO_URL}" impala-setup
-cd impala-setup
-chmod +x ./
-sudo ./
+# TODO: config ccache to give it plenty of space
+# TODO: check that there is enough space on disk to do a build and data load
+# TODO: make this work with non-bash shells
-# HDFS bug workaround
-echo " $(hostname -s) $(hostname)" | sudo tee -a /etc/hosts
+if [[ $DISTRIB_RELEASE = 14.04 ]]
+sudo apt-get --yes install openjdk-${JDK_VERSION}-jdk
+SET_JAVA_HOME="export JAVA_HOME=/usr/lib/jvm/java-${JDK_VERSION}-openjdk-amd64"
+echo "$SET_JAVA_HOME" >> "${IMPALA_HOME}/bin/"
+eval "$SET_JAVA_HOME"
+sudo service ntp stop
+sudo ntpdate
+# If on EC2, use Amazon's ntp servers
+if which dmidecode && { sudo dmidecode -s bios-version | grep amazon; }
+  sudo sed -i 's/ubuntu\.pool/amazon\.pool/' /etc/ntp.conf
+  grep amazon /etc/ntp.conf
+  grep ubuntu /etc/ntp.conf
+# While it is nice to have ntpd running to keep the clock in sync, that does 
not work in a
+# --privileged docker container, and a non-privileged container cannot run 
ntpdate, which
+# is strictly needed by Kudu.
+# TODO: Make privileged docker start ntpd
+sudo service ntp start || grep docker /proc/1/cgroup
+# IMPALA-3932, IMPALA-3926
+if [[ $DISTRIB_RELEASE = 16.04 ]]
+elif [[ $DISTRIB_RELEASE = 14.04 ]]
+echo "$SET_LD_LIBRARY_PATH" >> "${IMPALA_HOME}/bin/"
+# TODO: What are the security implications of this?
+for PG_AUTH_FILE in /etc/postgresql/*/main/pg_hba.conf
+  sudo sed -ri 's/local +all +all +peer/local all all trust/g' $PG_AUTH_FILE
+sudo service postgresql restart
+sudo /etc/init.d/postgresql reload
+sudo service postgresql restart
+# Set up postgress for HMS
+if ! [[ 1 = $(sudo -u postgres psql -At -c "SELECT count(*) FROM pg_roles 
WHERE rolname = 'hiveuser';") ]]
+  sudo -u postgres psql -c "CREATE ROLE hiveuser LOGIN PASSWORD 'password';"
+sudo -u postgres psql -c "ALTER ROLE hiveuser WITH CREATEDB;"
+sudo -u postgres psql -c "SELECT * FROM pg_roles WHERE rolname = 'hiveuser';"
+# Setup ssh to ssh to localhost
+mkdir -p ~/.ssh
+chmod go-rwx ~/.ssh
+if ! [[ -f ~/.ssh/id_rsa ]]
+  ssh-keygen -t rsa -N '' -q -f ~/.ssh/id_rsa
+cat ~/.ssh/ >> ~/.ssh/authorized_keys
 echo "NoHostAuthenticationForLocalhost yes" >> ~/.ssh/config
+ssh localhost whoami
+# Workarounds for HDFS networking issues
+echo " $(hostname -s) $(hostname)" | sudo tee -a /etc/hosts
+# In Docker, one can change /etc/hosts as above but not with sed -i. The error 
message is
+# "sed: cannot rename /etc/sedc3gPj8: Device or resource busy". The following 
lines are
+# basically sed -i but with cp instead of mv for -i part.
+sed 's/' /etc/hosts > "${NEW_HOSTS}"
+diff -u /etc/hosts "${NEW_HOSTS}"
+sudo cp "${NEW_HOSTS}" /etc/hosts
+rm "${NEW_HOSTS}"
+sudo mkdir -p /var/lib/hadoop-hdfs
+sudo chown $(whoami) /var/lib/hadoop-hdfs/
+# TODO: restrict this to only the users it is needed for
+echo "* - nofile 1048576" | sudo tee -a /etc/security/limits.conf
+# LZO is not needed to compile or run Impala, but it is needed for the data 
+if ! [[ -d ~/Impala-lzo ]]
+  git clone ~/Impala-lzo
+if ! [[ -d ~/hadoop-lzo ]]
+  git clone ~/hadoop-lzo
+cd ~/hadoop-lzo/
+time -p ant package
-pushd "$(dirname $0)/.."
-export IMPALA_HOME="$(pwd)"
 source bin/
-./ -noclean -format -testdata -build_shared_libs
+export NUM_CONCURRENT_TESTS=$(nproc)
+time -p ./ -noclean -format -testdata -skiptests
+# To then run the tests:
+# time -p bin/

Reply via email to