Author: tomwhite Date: Wed Feb 21 13:16:47 2007 New Revision: 510224 URL: http://svn.apache.org/viewvc?view=rev&rev=510224 Log: HADOOP-952. Create a public (shared) Hadoop EC2 AMI.
Added: lucene/hadoop/trunk/src/contrib/ec2/bin/hadoop-ec2 lucene/hadoop/trunk/src/contrib/ec2/bin/launch-hadoop-cluster lucene/hadoop/trunk/src/contrib/ec2/bin/login-hadoop-cluster lucene/hadoop/trunk/src/contrib/ec2/bin/start-hadoop Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/contrib/ec2/README.txt lucene/hadoop/trunk/src/contrib/ec2/bin/create-hadoop-image lucene/hadoop/trunk/src/contrib/ec2/bin/hadoop-ec2-env.sh.template lucene/hadoop/trunk/src/contrib/ec2/bin/image/create-hadoop-image-remote lucene/hadoop/trunk/src/contrib/ec2/bin/image/hadoop-init lucene/hadoop/trunk/src/contrib/ec2/bin/run-hadoop-cluster Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=510224&r1=510223&r2=510224 ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Wed Feb 21 13:16:47 2007 @@ -86,9 +86,13 @@ directly, so that user code is no longer required in the JobTracker. (omalley via cutting) -26. HAOOP-1006. Remove obsolete '-local' option from test code. +26. HADOOP-1006. Remove obsolete '-local' option from test code. (Gautam Kowshik via cutting) +27. HADOOP-952. Create a public (shared) Hadoop EC2 AMI. + The EC2 scripts now support launch of public AMIs. + (tomwhite) + Release 0.11.2 - 2007-02-16 Modified: lucene/hadoop/trunk/src/contrib/ec2/README.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/ec2/README.txt?view=diff&rev=510224&r1=510223&r2=510224 ============================================================================== --- lucene/hadoop/trunk/src/contrib/ec2/README.txt (original) +++ lucene/hadoop/trunk/src/contrib/ec2/README.txt Wed Feb 21 13:16:47 2007 @@ -3,6 +3,10 @@ This collection of scripts allows you to run Hadoop clusters on Amazon.com's Elastic Compute Cloud (EC2) service described at: http://aws.amazon.com/ec2 + +To get help, type the following in a shell: + + bin/hadoop-ec2 For full instructions, please visit the Hadoop wiki at: Modified: lucene/hadoop/trunk/src/contrib/ec2/bin/create-hadoop-image URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/ec2/bin/create-hadoop-image?view=diff&rev=510224&r1=510223&r2=510224 ============================================================================== --- lucene/hadoop/trunk/src/contrib/ec2/bin/create-hadoop-image (original) +++ lucene/hadoop/trunk/src/contrib/ec2/bin/create-hadoop-image Wed Feb 21 13:16:47 2007 @@ -35,17 +35,17 @@ # Copy setup scripts scp $SSH_OPTS "$bin"/hadoop-ec2-env.sh "[EMAIL PROTECTED]:" -scp $SSH_OPTS "$bin"/image/hadoop-init "[EMAIL PROTECTED]:/etc/init.d/hadoop-init" +scp $SSH_OPTS "$bin"/image/hadoop-init "[EMAIL PROTECTED]:" scp $SSH_OPTS "$bin"/image/create-hadoop-image-remote "[EMAIL PROTECTED]:" # Copy private key and certificate (for bundling image) -scp $SSH_OPTS $EC2_KEYDIR/pk-*.pem "[EMAIL PROTECTED]:" -scp $SSH_OPTS $EC2_KEYDIR/cert-*.pem "[EMAIL PROTECTED]:" +scp $SSH_OPTS $EC2_KEYDIR/pk-*.pem "[EMAIL PROTECTED]:/mnt" +scp $SSH_OPTS $EC2_KEYDIR/cert-*.pem "[EMAIL PROTECTED]:/mnt" # Connect to it ssh $SSH_OPTS "[EMAIL PROTECTED]" './create-hadoop-image-remote' # Register image -ec2-register $S3_BUCKET/image.manifest.xml +ec2-register $S3_BUCKET/hadoop-$HADOOP_VERSION.manifest.xml echo "Terminate with: ec2-terminate-instances $BOOTING_INSTANCE" Added: lucene/hadoop/trunk/src/contrib/ec2/bin/hadoop-ec2 URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/ec2/bin/hadoop-ec2?view=auto&rev=510224 ============================================================================== --- lucene/hadoop/trunk/src/contrib/ec2/bin/hadoop-ec2 (added) +++ lucene/hadoop/trunk/src/contrib/ec2/bin/hadoop-ec2 Wed Feb 21 13:16:47 2007 @@ -0,0 +1,36 @@ +#!/bin/sh + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +# if no args specified, show usage +if [ $# = 0 ]; then + echo "Usage: hadoop-ec2 COMMAND" + echo "where COMMAND is one of:" + echo " create-image create a Hadoop AMI" + echo " launch-cluster launch a cluster of Hadoop EC2 instances" + echo " start-hadoop start Hadoop daemons on a cluster" + echo " login login to the master node of the Hadoop EC2 cluster" + echo " run 'launch-cluster', 'start-hadoop', 'login'" + echo " terminate-cluster terminate a cluster of Hadoop EC2 instances" + exit 1 +fi + +# get arguments +COMMAND=$1 +shift + +if [ "$COMMAND" = "create-image" ] ; then + . "$bin"/create-hadoop-image +elif [ "$COMMAND" = "launch-cluster" ] ; then + . "$bin"/launch-hadoop-cluster +elif [ "$COMMAND" = "start-hadoop" ] ; then + . "$bin"/start-hadoop +elif [ "$COMMAND" = "run" ] ; then + . "$bin"/run-hadoop-cluster +elif [ "$COMMAND" = "login" ] ; then + . "$bin"/login-hadoop-cluster +elif [ "$COMMAND" = "terminate-cluster" ] ; then + . "$bin"/terminate-hadoop-cluster +fi + \ No newline at end of file Modified: lucene/hadoop/trunk/src/contrib/ec2/bin/hadoop-ec2-env.sh.template URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/ec2/bin/hadoop-ec2-env.sh.template?view=diff&rev=510224&r1=510223&r2=510224 ============================================================================== --- lucene/hadoop/trunk/src/contrib/ec2/bin/hadoop-ec2-env.sh.template (original) +++ lucene/hadoop/trunk/src/contrib/ec2/bin/hadoop-ec2-env.sh.template Wed Feb 21 13:16:47 2007 @@ -9,32 +9,46 @@ # Your Amazon AWS secret access key. AWS_SECRET_ACCESS_KEY= -# The Amazon S3 bucket where the Hadoop AMI you create will be stored. -S3_BUCKET= - -# SSH options used when connecting to EC2 instances. -# Change the -i option to be the absolute path to your keypair that you set up in the Amazon Getting Started guide. -SSH_OPTS='-i /home/<yourname>/id_rsa-gsg-keypair -o StrictHostKeyChecking=no' - # Location of EC2 keys. # The default setting is probably OK if you set up EC2 following the Amazon Getting Started guide. -EC2_KEYDIR=~/.ec2 +EC2_KEYDIR=`dirname "$EC2_PRIVATE_KEY"` -# The download URL for the Sun JDK. Visit http://java.sun.com/javase/downloads/index_jdk5.jsp and get the URL for the "Linux self-extracting file". -JAVA_BINARY_URL='' +# The EC2 key name used to launch instances. +# The default is the value used in the Amazon Getting Started guide. +KEY_NAME=gsg-keypair + +# Where your EC2 private key is stored (created when following the Amazon Getting Started guide). +# You need to change this if you don't store this with your other EC2 keys. +PRIVATE_KEY_PATH=`echo "$EC2_KEYDIR"/"id_rsa-$KEY_NAME"` -# The version number of the installed JDK. -JAVA_VERSION=1.5.0_11 +# SSH options used when connecting to EC2 instances. +SSH_OPTS=`echo -i "$PRIVATE_KEY_PATH" -o StrictHostKeyChecking=no` + +# The version of Hadoop to use. +HADOOP_VERSION=0.11.2 + +# The Amazon S3 bucket where the Hadoop AMI is stored. +# The default value is for public images, so can be left if you are using running a public image. +# Change this value only if you are creating your own (private) AMI +# so you can store it in a bucket you own. +S3_BUCKET=hadoop-ec2-images # The EC2 group to run your cluster in. GROUP=hadoop-cluster-group -# The version of Hadoop to install. -HADOOP_VERSION=0.10.1 - # The hostname of the master node in the cluster. You need to be able to set the DNS for this host to point to the master's IP address. # See http://www.dyndns.com/services/dns/dyndns/, for example. MASTER_HOST= # The number of nodes in your cluster. NO_INSTANCES=2 + +# +# The following variables are only used when creating an AMI. +# + +# The download URL for the Sun JDK. Visit http://java.sun.com/javase/downloads/index_jdk5.jsp and get the URL for the "Linux self-extracting file". +JAVA_BINARY_URL='' + +# The version number of the installed JDK. +JAVA_VERSION=1.5.0_11 Modified: lucene/hadoop/trunk/src/contrib/ec2/bin/image/create-hadoop-image-remote URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/ec2/bin/image/create-hadoop-image-remote?view=diff&rev=510224&r1=510223&r2=510224 ============================================================================== --- lucene/hadoop/trunk/src/contrib/ec2/bin/image/create-hadoop-image-remote (original) +++ lucene/hadoop/trunk/src/contrib/ec2/bin/image/create-hadoop-image-remote Wed Feb 21 13:16:47 2007 @@ -6,6 +6,9 @@ bin=`cd "$bin"; pwd` . "$bin"/hadoop-ec2-env.sh +# Remove environment script since it contains sensitive information +rm -f "$bin"/hadoop-ec2-env.sh + # Install Java cd /usr/local wget -nv -O java.bin $JAVA_BINARY_URL @@ -28,19 +31,20 @@ /usr/local/hadoop-$HADOOP_VERSION/conf/hadoop-env.sh mkdir -p /mnt/hadoop/logs -# Do Hadoop configuration for master hostname and cluster size on instance startup for runlevels 3 and 4. -# Runlevel 4 is used by Xen. See http://developer.amazonwebservices.com/connect/message.jspa?messageID=45948#45948 -ln -s /etc/init.d/hadoop-init /etc/rc3.d/S99hadoop-init -ln -s /etc/init.d/hadoop-init /etc/rc4.d/S99hadoop-init - -# Configure networking -ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa -cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys +# Do configuration on instance startup +echo "/root/hadoop-init" >> /etc/rc.d/rc.local + +# Configure networking. +# Delete SSH authorized_keys since it includes the key it was launched with. (Note that it is re-populated when an instance starts.) +rm -f /root/.ssh/authorized_keys +# Ensure logging in to new hosts is seamless. echo ' StrictHostKeyChecking no' >> /etc/ssh/ssh_config # Bundle and upload image cd ~root -ec2-bundle-vol -d /mnt -k ~root/pk-*.pem -c ~root/cert-*.pem -u $AWS_ACCOUNT_ID -s 1536 -p hadoop-$HADOOP_VERSION +# Don't need to delete .bash_history since it isn't written until exit. +ec2-bundle-vol -d /mnt -k /mnt/pk-*.pem -c /mnt/cert-*.pem -u $AWS_ACCOUNT_ID -s 1536 -p hadoop-$HADOOP_VERSION +rm /mnt/pk-*.pem /mnt/cert-*.pem ec2-upload-bundle -b $S3_BUCKET -m /mnt/hadoop-$HADOOP_VERSION.manifest.xml -a $AWS_ACCESS_KEY_ID -s $AWS_SECRET_ACCESS_KEY # End Modified: lucene/hadoop/trunk/src/contrib/ec2/bin/image/hadoop-init URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/ec2/bin/image/hadoop-init?view=diff&rev=510224&r1=510223&r2=510224 ============================================================================== --- lucene/hadoop/trunk/src/contrib/ec2/bin/image/hadoop-init (original) +++ lucene/hadoop/trunk/src/contrib/ec2/bin/image/hadoop-init Wed Feb 21 13:16:47 2007 @@ -1,16 +1,14 @@ -#!/bin/sh - -start() { - USER_DATA=`wget -q -O - http://169.254.169.254/1.0/user-data` - NO_INSTANCES=`python -c "print '$USER_DATA'.split(',')[0]"` - MASTER_HOST=`python -c "print '$USER_DATA'.split(',')[1]"` - HADOOP_HOME=`ls -d /usr/local/hadoop-*` - echo $NO_INSTANCES, $MASTER_HOST, $HADOOP_HOME - - sed -i -e "s|# export HADOOP_MASTER=.*|export HADOOP_MASTER=$MASTER_HOST:$HADOOP_HOME|" \ - $HADOOP_HOME/conf/hadoop-env.sh +# Use parameters passed in during launch to configure Hadoop +USER_DATA=`wget -q -O - http://169.254.169.254/1.0/user-data` +NO_INSTANCES=`python -c "print '$USER_DATA'.split(',')[0]"` +MASTER_HOST=`python -c "print '$USER_DATA'.split(',')[1]"` +HADOOP_HOME=`ls -d /usr/local/hadoop-*` +echo $NO_INSTANCES, $MASTER_HOST, $HADOOP_HOME + +sed -i -e "s|# export HADOOP_MASTER=.*|export HADOOP_MASTER=$MASTER_HOST:$HADOOP_HOME|" \ + $HADOOP_HOME/conf/hadoop-env.sh - cat > $HADOOP_HOME/conf/hadoop-site.xml <<EOF +cat > $HADOOP_HOME/conf/hadoop-site.xml <<EOF <?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> @@ -34,7 +32,7 @@ </configuration> EOF - cat > $HADOOP_HOME/conf/mapred-default.xml <<EOF +cat > $HADOOP_HOME/conf/mapred-default.xml <<EOF <?xml version="1.0"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> @@ -52,22 +50,3 @@ </configuration> EOF - -} - -case "$1" in - start) - start - ;; - stop) - ;; - status) - ;; - restart|reload|condrestart) - ;; - *) - echo $"Usage: $0 {start|stop|restart|reload|status}" - exit 1 -esac - -exit 0 \ No newline at end of file Added: lucene/hadoop/trunk/src/contrib/ec2/bin/launch-hadoop-cluster URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/ec2/bin/launch-hadoop-cluster?view=auto&rev=510224 ============================================================================== --- lucene/hadoop/trunk/src/contrib/ec2/bin/launch-hadoop-cluster (added) +++ lucene/hadoop/trunk/src/contrib/ec2/bin/launch-hadoop-cluster Wed Feb 21 13:16:47 2007 @@ -0,0 +1,41 @@ +#!/bin/sh +# Launch an EC2 cluster of Hadoop instances. + +# Import variables +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` +. "$bin"/hadoop-ec2-env.sh + +ec2-describe-group | grep $GROUP > /dev/null +if [ ! $? -eq 0 ]; then + echo "Creating group $GROUP" + ec2-add-group $GROUP -d "Group for Hadoop clusters." + ec2-authorize $GROUP -p 22 # ssh + ec2-authorize $GROUP -p 50030 # JobTracker web interface + ec2-authorize $GROUP -p 50060 # TaskTracker web interface + ec2-authorize $GROUP -o $GROUP -u $AWS_ACCOUNT_ID +fi + +# Finding Hadoop image +AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep $HADOOP_VERSION | grep available | awk '{print $2}'` + +# Start a cluster +echo "Starting cluster with AMI $AMI_IMAGE" +RUN_INSTANCES_OUTPUT=`ec2-run-instances $AMI_IMAGE -n $NO_INSTANCES -g $GROUP -k gsg-keypair -d "$NO_INSTANCES,$MASTER_HOST" | grep INSTANCE | awk '{print $2}'` +for instance in $RUN_INSTANCES_OUTPUT; do + echo "Waiting for instance $instance to start" + while true; do + printf "." + HOSTNAME=`ec2-describe-instances $instance | grep running | awk '{print $4}'` + if [ ! -z $HOSTNAME ]; then + echo "started as $HOSTNAME" + break; + fi + sleep 1 + done +done + +echo "Appointing master" +MASTER_EC2_HOST=`ec2-describe-instances | grep INSTANCE | grep running | awk '{if ($7 == 0) print $4}'` +MASTER_IP=`dig +short $MASTER_EC2_HOST` +echo "Master is $MASTER_EC2_HOST. Please set up DNS so $MASTER_HOST points to $MASTER_IP." Added: lucene/hadoop/trunk/src/contrib/ec2/bin/login-hadoop-cluster URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/ec2/bin/login-hadoop-cluster?view=auto&rev=510224 ============================================================================== --- lucene/hadoop/trunk/src/contrib/ec2/bin/login-hadoop-cluster (added) +++ lucene/hadoop/trunk/src/contrib/ec2/bin/login-hadoop-cluster Wed Feb 21 13:16:47 2007 @@ -0,0 +1,10 @@ +#!/bin/sh +# Login to the master node of a running Hadoop EC2 cluster. + +# Import variables +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` +. "$bin"/hadoop-ec2-env.sh + +echo "Logging in to master $MASTER_HOST." +ssh $SSH_OPTS "[EMAIL PROTECTED]" Modified: lucene/hadoop/trunk/src/contrib/ec2/bin/run-hadoop-cluster URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/ec2/bin/run-hadoop-cluster?view=diff&rev=510224&r1=510223&r2=510224 ============================================================================== --- lucene/hadoop/trunk/src/contrib/ec2/bin/run-hadoop-cluster (original) +++ lucene/hadoop/trunk/src/contrib/ec2/bin/run-hadoop-cluster Wed Feb 21 13:16:47 2007 @@ -1,60 +1,18 @@ #!/bin/sh -# Launch an EC2 cluster of Hadoop instances and connect to the master. +# Launch an EC2 cluster of Hadoop instances, start Hadoop, and connect to the master. # Import variables bin=`dirname "$0"` bin=`cd "$bin"; pwd` -. "$bin"/hadoop-ec2-env.sh -ec2-describe-group | grep $GROUP > /dev/null -if [ ! $? -eq 0 ]; then - echo "Creating group $GROUP" - ec2-add-group $GROUP -d "Group for Hadoop clusters." - ec2-authorize $GROUP -p 22 # ssh - ec2-authorize $GROUP -p 50030 # JobTracker web interface - ec2-authorize $GROUP -p 50060 # TaskTracker web interface - ec2-authorize $GROUP -o $GROUP -u $AWS_ACCOUNT_ID +if ! "$bin"/launch-hadoop-cluster ; then + exit $? fi -# Finding Hadoop image -AMI_IMAGE=`ec2-describe-images -a | grep $S3_BUCKET | grep available | awk '{print $2}'` - -# Start a cluster -echo "Starting cluster with AMI $AMI_IMAGE" -RUN_INSTANCES_OUTPUT=`ec2-run-instances $AMI_IMAGE -n $NO_INSTANCES -g $GROUP -k gsg-keypair -d "$NO_INSTANCES,$MASTER_HOST" | grep INSTANCE | awk '{print $2}'` -for instance in $RUN_INSTANCES_OUTPUT; do - echo "Waiting for instance $instance to start" - while true; do - printf "." - HOSTNAME=`ec2-describe-instances $instance | grep running | awk '{print $4}'` - if [ ! -z $HOSTNAME ]; then - echo "started as $HOSTNAME" - break; - fi - sleep 1 - done -done - -echo "Appointing master" -MASTER_EC2_HOST=`ec2-describe-instances | grep INSTANCE | grep running | awk '{if ($7 == 0) print $4}'` -MASTER_IP=`dig +short $MASTER_EC2_HOST` -echo "Master is $MASTER_EC2_HOST. Please set up DNS so $MASTER_HOST points to $MASTER_IP then press return to continue." +echo "Press return to continue." read dummy echo "Waiting before trying to connect..." sleep 30 -echo "Creating slaves file and copying to master" -ec2-describe-instances | grep INSTANCE | grep running | awk '{if ($7 != 0) print $4}' > slaves -scp $SSH_OPTS slaves "[EMAIL PROTECTED]:/usr/local/hadoop-$HADOOP_VERSION/conf/slaves" - -echo "Formatting new cluster's filesystem" -ssh $SSH_OPTS "[EMAIL PROTECTED]" "/usr/local/hadoop-$HADOOP_VERSION/bin/hadoop namenode -format" - -echo "Starting cluster" -ssh $SSH_OPTS "[EMAIL PROTECTED]" "/usr/local/hadoop-$HADOOP_VERSION/bin/start-all.sh" - -echo "Finished - check progress at http://$MASTER_HOST:50030/" - -echo "Logging in to master $MASTER_HOST." -ssh $SSH_OPTS "[EMAIL PROTECTED]" +"$bin"/start-hadoop \ No newline at end of file Added: lucene/hadoop/trunk/src/contrib/ec2/bin/start-hadoop URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/ec2/bin/start-hadoop?view=auto&rev=510224 ============================================================================== --- lucene/hadoop/trunk/src/contrib/ec2/bin/start-hadoop (added) +++ lucene/hadoop/trunk/src/contrib/ec2/bin/start-hadoop Wed Feb 21 13:16:47 2007 @@ -0,0 +1,39 @@ +#!/bin/sh +# Start Hadoop on a cluster. + +# Import variables +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` +. "$bin"/hadoop-ec2-env.sh + +echo "Asking master to say hello" +if ! ssh $SSH_OPTS "[EMAIL PROTECTED]" echo "hello" ; then + echo "SSH failed for [EMAIL PROTECTED]" + exit 1 +fi + +echo "Creating slaves file and copying to master" +ec2-describe-instances | grep INSTANCE | grep running | awk '{if ($7 != 0) print $4}' > slaves +scp $SSH_OPTS slaves "[EMAIL PROTECTED]:/usr/local/hadoop-$HADOOP_VERSION/conf/slaves" + +echo "Copying private key to master" +scp $SSH_OPTS $PRIVATE_KEY_PATH "[EMAIL PROTECTED]:/root/.ssh/id_rsa" +ssh $SSH_OPTS "[EMAIL PROTECTED]" "chmod 600 /root/.ssh/id_rsa" + +echo "Copying private key to slaves" +for slave in `cat slaves`; do + scp $SSH_OPTS $PRIVATE_KEY_PATH "[EMAIL PROTECTED]:/root/.ssh/id_rsa" + ssh $SSH_OPTS "[EMAIL PROTECTED]" "chmod 600 /root/.ssh/id_rsa" + sleep 1 +done + +echo "Formatting new cluster's filesystem" +ssh $SSH_OPTS "[EMAIL PROTECTED]" "/usr/local/hadoop-$HADOOP_VERSION/bin/hadoop namenode -format" + +echo "Starting cluster" +ssh $SSH_OPTS "[EMAIL PROTECTED]" "/usr/local/hadoop-$HADOOP_VERSION/bin/start-all.sh" + +echo "Finished - check progress at http://$MASTER_HOST:50030/" + +echo "Logging in to master $MASTER_HOST." +ssh $SSH_OPTS "[EMAIL PROTECTED]"