Repository: incubator-impala
Updated Branches:
  refs/heads/master 9f4d9ff68 -> d5cefe07c


IMPALA-4407: Move Impala setup procedures to main repo

Before this change, Impala has relied on a chef setup in
https://github.com/awleblang/impala-setup for setting up a development
environment. This has a number of downsides:

1. It makes understanding what the script is doing difficult: there
are 40k or so lines in that repo last I checked.

2. It makes porting to new distributions difficult unless the
providers of various chef "recipes" have already ported their code.

3. It makes coordinated changes between the main repo and the
impala-setup repo more awkward.

This patch adds a shell script to replace that repo. It works on
Ubuntu 14.04 and 16.04, while impala-setup repo only works on 14.04
and the now-unmaintained Ubuntu 15.04.

Change-Id: I728abfa806ecd9461dfb443278c2a464714d984c
Reviewed-on: http://gerrit.cloudera.org:8080/7587
Reviewed-by: Jim Apple <jbapple-imp...@apache.org>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/d5cefe07
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/d5cefe07
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/d5cefe07

Branch: refs/heads/master
Commit: d5cefe07c931a0d3bf02bca97bbba05400d91a48
Parents: 9f4d9ff
Author: Jim Apple <jbapple-imp...@apache.org>
Authored: Wed Aug 2 20:36:17 2017 -0700
Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org>
Committed: Fri Aug 11 06:03:04 2017 +0000

----------------------------------------------------------------------
 bin/bootstrap_development.sh | 202 +++++++++++++++++++++++++++++++-------
 1 file changed, 164 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/d5cefe07/bin/bootstrap_development.sh
----------------------------------------------------------------------
diff --git a/bin/bootstrap_development.sh b/bin/bootstrap_development.sh
index f04a0b1..4754b55 100755
--- a/bin/bootstrap_development.sh
+++ b/bin/bootstrap_development.sh
@@ -18,63 +18,189 @@
 # under the License.
 
 # This script bootstraps a development environment from almost nothing; it is 
known to
-# work on Ubuntu 14.04, and it definitely clobbers some local environment, so 
it's best to
-# run this in a sandbox first, like a VM or docker.
+# work on Ubuntu 14.04 and 16.04. It clobbers some local environment and system
+# configurations, so it is best to run this in a fresh install. It also sets 
up the
+# ~/.bashrc for the calling user and impala-config-local.sh with some 
environment
+# variables to make Impala compile and run after this script is complete.
 #
 # The intended user is a person who wants to start contributing code to 
Impala. This
-# script serves as an executable reference point for how to get started.
+# script serves as an executable reference point for how to get started. It 
takes about
+# two hours to complete.
 #
-# At this time, it completes in about 6.5 hours. It generates and loads the 
test data and
-# metadata without using a snapshot (which takes about 3 hours) and it then 
runs the full
-# testsuite (frontend, backend, end-to-end, JDBC, and custom cluster) in "core"
-# exploration mode.
+# To run this in a Docker container:
+#
+#   1. Run with --privileged
+#   2. Give the container a non-root sudoer wih NOPASSWD:
+#      apt-get update
+#      apt-get install sudo
+#      adduser --disabled-password --gecos '' impdev
+#      echo 'impdev ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+#   3. Run this script as that user: su - impdev -c /bootstrap_development.sh
+
+set -eu -o pipefail
+
+if [[ -t 1 ]] # if on an interactive terminal
+then
+  echo "This script will clobber some system settings. Are you sure you want 
to"
+  echo -n "continue? "
+  while true
+  do
+    read -p "[yes/no] " ANSWER
+    ANSWER=$(echo "$ANSWER" | tr /a-z/ /A-Z/)
+    if [[ $ANSWER = YES ]]
+    then
+      break
+    elif [[ $ANSWER = NO ]]
+    then
+      echo "OK, Bye!"
+      exit 1
+    fi
+  done
+else
+  export DEBIAN_FRONTEND=noninteractive
+fi
 
-set -eux -o pipefail
+set -x
 
-HOMEDIR="/home/$(whoami)/"
+source /etc/lsb-release
 
-if [[ ! -d "${HOMEDIR}" ]]
+if ! [[ $DISTRIB_ID = Ubuntu ]]
 then
-    echo "${HOMEDIR} is needed for installing Impala dependencies"
-    exit 1
+  echo "This script only supports Ubuntu" >&2
+  exit 1
 fi
 
-if [[ -z "${JAVA_HOME}" ]]
+if ! [[ $DISTRIB_RELEASE = 14.04 || $DISTRIB_RELEASE = 16.04 ]]
 then
-    echo "JAVA_HOME must be set to install Impala dependencies"
-    exit 1
+  echo "This script only supports Ubuntu 14.04 and 16.04" >&2
+  exit 1
 fi
 
-if ! sudo true
+sudo apt-get update
+sudo apt-get --yes install apt-utils
+sudo apt-get --yes install git
+
+# If there is no Impala git repo, get one now
+if ! [[ -d ~/Impala ]]
 then
-    echo "Passwordless sudo is needed for this script"
-    exit 1
+  time -p git clone 
https://git-wip-us.apache.org/repos/asf/incubator-impala.git ~/Impala
 fi
+cd ~/Impala
+SET_IMPALA_HOME="export IMPALA_HOME=$(pwd)"
+echo "$SET_IMPALA_HOME" >> ~/.bashrc
+eval "$SET_IMPALA_HOME"
 
-IMPALA_SETUP_REPO_URL="https://github.com/awleblang/impala-setup";
+sudo apt-get --yes install ccache g++ gcc libffi-dev liblzo2-dev libkrb5-dev \
+     libsasl2-dev libssl-dev make maven ninja-build ntp ntpdate python-dev \
+     python-setuptools postgresql ssh wget vim-common
 
-# Place to download setup scripts
-TMPDIR=$(mktemp -d)
-function cleanup {
-    rm -rf "${TMPDIR}"
-}
-trap cleanup EXIT
+if ! { service --status-all | grep -E '^ \[ \+ \]  ssh$'; }
+then
+  sudo service ssh start
+fi
 
-# Install build and test pre-reqs
-pushd "${TMPDIR}"
-git clone "${IMPALA_SETUP_REPO_URL}" impala-setup
-cd impala-setup
-chmod +x ./install.sh
-sudo ./install.sh
-popd
+# TODO: config ccache to give it plenty of space
+# TODO: check that there is enough space on disk to do a build and data load
+# TODO: make this work with non-bash shells
 
-# HDFS bug workaround
-echo "127.0.0.1 $(hostname -s) $(hostname)" | sudo tee -a /etc/hosts
+JDK_VERSION=8
+if [[ $DISTRIB_RELEASE = 14.04 ]]
+then
+  JDK_VERSION=7
+fi
+sudo apt-get --yes install openjdk-${JDK_VERSION}-jdk
+SET_JAVA_HOME="export JAVA_HOME=/usr/lib/jvm/java-${JDK_VERSION}-openjdk-amd64"
+echo "$SET_JAVA_HOME" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
+eval "$SET_JAVA_HOME"
+
+sudo service ntp stop
+sudo ntpdate us.pool.ntp.org
+# If on EC2, use Amazon's ntp servers
+if which dmidecode && { sudo dmidecode -s bios-version | grep amazon; }
+then
+  sudo sed -i 's/ubuntu\.pool/amazon\.pool/' /etc/ntp.conf
+  grep amazon /etc/ntp.conf
+  grep ubuntu /etc/ntp.conf
+fi
+# While it is nice to have ntpd running to keep the clock in sync, that does 
not work in a
+# --privileged docker container, and a non-privileged container cannot run 
ntpdate, which
+# is strictly needed by Kudu.
+# TODO: Make privileged docker start ntpd
+sudo service ntp start || grep docker /proc/1/cgroup
+
+# IMPALA-3932, IMPALA-3926
+if [[ $DISTRIB_RELEASE = 16.04 ]]
+then
+  SET_LD_LIBRARY_PATH='export 
LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}'
+elif [[ $DISTRIB_RELEASE = 14.04 ]]
+then
+  SET_LD_LIBRARY_PATH="unset LD_LIBRARY_PATH"
+fi
+echo "$SET_LD_LIBRARY_PATH" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
+eval "$SET_LD_LIBRARY_PATH"
+
+# TODO: What are the security implications of this?
+for PG_AUTH_FILE in /etc/postgresql/*/main/pg_hba.conf
+do
+  sudo sed -ri 's/local +all +all +peer/local all all trust/g' $PG_AUTH_FILE
+done
+sudo service postgresql restart
+sudo /etc/init.d/postgresql reload
+sudo service postgresql restart
+
+# Set up postgress for HMS
+if ! [[ 1 = $(sudo -u postgres psql -At -c "SELECT count(*) FROM pg_roles 
WHERE rolname = 'hiveuser';") ]]
+then
+  sudo -u postgres psql -c "CREATE ROLE hiveuser LOGIN PASSWORD 'password';"
+fi
+sudo -u postgres psql -c "ALTER ROLE hiveuser WITH CREATEDB;"
+sudo -u postgres psql -c "SELECT * FROM pg_roles WHERE rolname = 'hiveuser';"
+
+# Setup ssh to ssh to localhost
+mkdir -p ~/.ssh
+chmod go-rwx ~/.ssh
+if ! [[ -f ~/.ssh/id_rsa ]]
+then
+  ssh-keygen -t rsa -N '' -q -f ~/.ssh/id_rsa
+fi
+cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
 echo "NoHostAuthenticationForLocalhost yes" >> ~/.ssh/config
+ssh localhost whoami
+
+# Workarounds for HDFS networking issues
+echo "127.0.0.1 $(hostname -s) $(hostname)" | sudo tee -a /etc/hosts
+# In Docker, one can change /etc/hosts as above but not with sed -i. The error 
message is
+# "sed: cannot rename /etc/sedc3gPj8: Device or resource busy". The following 
lines are
+# basically sed -i but with cp instead of mv for -i part.
+NEW_HOSTS=$(mktemp)
+sed 's/127.0.1.1/127.0.0.1/g' /etc/hosts > "${NEW_HOSTS}"
+diff -u /etc/hosts "${NEW_HOSTS}"
+sudo cp "${NEW_HOSTS}" /etc/hosts
+rm "${NEW_HOSTS}"
+
+sudo mkdir -p /var/lib/hadoop-hdfs
+sudo chown $(whoami) /var/lib/hadoop-hdfs/
+
+# TODO: restrict this to only the users it is needed for
+echo "* - nofile 1048576" | sudo tee -a /etc/security/limits.conf
+
+# LZO is not needed to compile or run Impala, but it is needed for the data 
load
+if ! [[ -d ~/Impala-lzo ]]
+then
+  git clone https://github.com/cloudera/impala-lzo.git ~/Impala-lzo
+fi
+if ! [[ -d ~/hadoop-lzo ]]
+then
+  git clone https://github.com/cloudera/hadoop-lzo.git ~/hadoop-lzo
+fi
+cd ~/hadoop-lzo/
+time -p ant package
+cd "$IMPALA_HOME"
 
-pushd "$(dirname $0)/.."
-export IMPALA_HOME="$(pwd)"
 export MAX_PYTEST_FAILURES=0
 source bin/impala-config.sh
-./buildall.sh -noclean -format -testdata -build_shared_libs
-popd
+export NUM_CONCURRENT_TESTS=$(nproc)
+time -p ./buildall.sh -noclean -format -testdata -skiptests
+
+# To then run the tests:
+# time -p bin/run-all-tests.sh

Reply via email to