Author: gates Date: Mon Mar 17 10:40:59 2008 New Revision: 637987 URL: http://svn.apache.org/viewvc?rev=637987&view=rev Log: Removed Yahoo specific scripts/pig.pl, replaced with generic bash script bin/pig. Moved startHOD.expect to bin.
Added: incubator/pig/trunk/bin/ incubator/pig/trunk/bin/pig incubator/pig/trunk/bin/startHOD.expect Removed: incubator/pig/trunk/scripts/pig.pl incubator/pig/trunk/scripts/startHOD.expect Modified: incubator/pig/trunk/CHANGES.txt Modified: incubator/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/incubator/pig/trunk/CHANGES.txt?rev=637987&r1=637986&r2=637987&view=diff ============================================================================== --- incubator/pig/trunk/CHANGES.txt (original) +++ incubator/pig/trunk/CHANGES.txt Mon Mar 17 10:40:59 2008 @@ -163,3 +163,6 @@ PIG-129: making sure that temp files are stored in task's home dir and cleaned up + + PIG-115: Removed Yahoo specific scripts/pig.pl, replaced with generic + bash script bin/pig. Moved startHOD.expect to bin (joa23 via gates). Added: incubator/pig/trunk/bin/pig URL: http://svn.apache.org/viewvc/incubator/pig/trunk/bin/pig?rev=637987&view=auto ============================================================================== --- incubator/pig/trunk/bin/pig (added) +++ incubator/pig/trunk/bin/pig Mon Mar 17 10:40:59 2008 @@ -0,0 +1,175 @@ +#!/usr/bin/env bash +# +# The Pig command script +# +# Environment Variables +# +# JAVA_HOME The java implementation to use. Overrides JAVA_HOME. +# +# PIG_CLASSPATH Extra Java CLASSPATH entries. +# +# PIG_HEAPSIZE The maximum amount of heap to use, in MB. +# Default is 1000. +# +# PIG_OPTS Extra Java runtime options. +# +# PIG_CONF_DIR Alternate conf dir. Default is ${PIG_HOME}/conf. +# +# PIG_ROOT_LOGGER The root appender. Default is INFO,console +# +# PIG_HADOOP_VERSION Version of hadoop to run with. Default is 15 (0.15). + +cygwin=false +case "`uname`" in +CYGWIN*) cygwin=true;; +esac +debug=false + +# filter command line parameter +for f in $@; do + if [[ $f = "-secretDebugCmd" ]]; then + debug=true + else + remaining="${remaining} $f" + fi +done + +# resolve links - $0 may be a softlink +this="$0" +while [ -h "$this" ]; do + ls=`ls -ld "$this"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '.*/.*' > /dev/null; then + this="$link" + else + this=`dirname "$this"`/"$link" + fi +done + +# convert relative path to absolute path +bin=`dirname "$this"` +script=`basename "$this"` +bin=`unset CDPATH; cd "$bin"; pwd` +this="$bin/$script" + +# the root of the Pig installation +export PIG_HOME=`dirname "$this"`/.. + +#check to see if the conf dir is given as an optional argument +if [ $# -gt 1 ] +then + if [ "--config" = "$1" ] + then + shift + confdir=$1 + shift + PIG_CONF_DIR=$confdir + fi +fi + +# Allow alternate conf dir location. +PIG_CONF_DIR="${PIG_CONF_DIR:-$PIG_HOME/conf}" + +if [ -f "${PIG_CONF_DIR}/pig-env.sh" ]; then + . "${PIG_CONF_DIR}/pig-env.sh" +fi + +# some Java parameters +if [ "$JAVA_HOME" != "" ]; then + #echo "run java in $JAVA_HOME" + JAVA_HOME=$JAVA_HOME +fi + +if [ "$JAVA_HOME" = "" ]; then + echo "Error: JAVA_HOME is not set." + exit 1 +fi + +JAVA=$JAVA_HOME/bin/java +JAVA_HEAP_MAX=-Xmx1000m + +# check envvars which might override default args +if [ "$PIG_HEAPSIZE" != "" ]; then + JAVA_HEAP_MAX="-Xmx""$PIG_HEAPSIZE""m" +fi + +# CLASSPATH initially contains $PIG_CONF_DIR +CLASSPATH="${PIG_CONF_DIR}" +CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar + +# for developers, add Pig classes to CLASSPATH +if [ -d "$PIG_HOME/build/classes" ]; then + CLASSPATH=${CLASSPATH}:$PIG_HOME/build/classes +fi +if [ -d "$PIG_HOME/build/test/classes" ]; then + CLASSPATH=${CLASSPATH}:$PIG_HOME/build/test/classes +fi + +# so that filenames w/ spaces are handled correctly in loops below +IFS= + +# for releases, add core pig to CLASSPATH +for f in $PIG_HOME/pig-*-core.jar; do + CLASSPATH=${CLASSPATH}:$f; +done + +# during development pig jar might be in build +for f in $PIG_HOME/build/pig-*-core.jar; do + CLASSPATH=${CLASSPATH}:$f; +done + +# Set the version for Hadoop, default to 15 +PIG_HADOOP_VERSION="${PIG_HADOOP_VERSION:-15}" +# add libs to CLASSPATH. There can be more than one version of the hadoop +# libraries in the lib dir, so don't blindly add them all. Only add the one +# that matche PIG_HADOOP_VERSION. +for f in $PIG_HOME/lib/*.jar; do + IS_HADOOP=`echo $f | grep hadoop` + if [ "${IS_HADOOP}x" == "x" ]; then + CLASSPATH=${CLASSPATH}:$f; + else + IS_RIGHT_VER=`echo $f | grep hadoop${PIG_HADOOP_VERSION}.jar` + if [ "${IS_RIGHT_VER}x" != "x" ]; then + CLASSPATH=${CLASSPATH}:$f; + fi + fi +done + +# add user-specified CLASSPATH last +if [ "$PIG_CLASSPATH" != "" ]; then + CLASSPATH=${CLASSPATH}:${PIG_CLASSPATH} +fi + +# default log directory & file +if [ "$PIG_LOG_DIR" = "" ]; then + PIG_LOG_DIR="$PIG_HOME/logs" +fi +if [ "$PIG_LOGFILE" = "" ]; then + PIG_LOGFILE='pig.log' +fi + +# cygwin path translation +if $cygwin; then + CLASSPATH=`cygpath -p -w "$CLASSPATH"` + PIG_HOME=`cygpath -d "$PIG_HOME"` + PIG_LOG_DIR=`cygpath -d "$PIG_LOG_DIR"` +fi + +# restore ordinary behaviour +unset IFS + +CLASS=org.apache.pig.Main + +PIG_OPTS="$PIG_OPTS -Dpig.log.dir=$PIG_LOG_DIR" +PIG_OPTS="$PIG_OPTS -Dpig.log.file=$PIG_LOGFILE" +PIG_OPTS="$PIG_OPTS -Dpig.home.dir=$PIG_HOME" +PIG_OPTS="$PIG_OPTS -Dpig.root.logger=${PIG_ROOT_LOGGER:-INFO,console,DRFA}" + +# run it +if [ "$debug" == "true" ]; then + echo "dry run:" + echo "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS ${remaining} + echo +else + exec "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS ${remaining} +fi Added: incubator/pig/trunk/bin/startHOD.expect URL: http://svn.apache.org/viewvc/incubator/pig/trunk/bin/startHOD.expect?rev=637987&view=auto ============================================================================== --- incubator/pig/trunk/bin/startHOD.expect (added) +++ incubator/pig/trunk/bin/startHOD.expect Mon Mar 17 10:40:59 2008 @@ -0,0 +1,79 @@ +#!/usr/bin/expect +# +# This is a wretched expect script to startup HOD and scrap the necessary +# information we need to run Pig. Tragically, we can't just pipe HOD's output +# into a script, so we have to use expect. Also the real information we need +# is not given to us on stdout; rather, we get the name of the configuration +# file with the information we need on stdout. We have to write actual TCL to +# parse the file. +# + +# +# Quick and dirty parser to extract the value of mapred.job.tracker +# + +trap handleExit {SIGINT SIGTERM SIGHUP SIGABRT SIGPIPE} + +proc handleExit {} { + send "exit\n" + set timeout 20 + expect "do not CTL-C" + puts "Exiting" + exit +} + +proc extractMapRedHostPort {file} { + set fh [open $file r] + set line [read $fh] + close $fh + regexp {>mapred.job.tracker</name>[^<]*<value>([^<]*)</value>} $line match sub + return $sub +} + +# +# Quick and dirty parser to extract the value of fs.default.name +# +proc extractDFSHostPort {file} { + set fh [open $file r] + set line [read $fh] + close $fh + regexp {>fs.default.name</name>[^<]*<value>([^<]*)</value>} $line match sub + return $sub +} + +set mOpt {"-m" "15"} +foreach i $argv { + if {$i == "-m"} { + set mOpt {}; + } +} + +log_user 0 +set timeout -1 +#spawn /export/crawlspace/kryptonite/hod/current/bin/hod -n [join [concat $argv $mOpt]] +#set args [split [join [concat $argv $mOpt]]] +set args [concat $argv $mOpt] +spawn -ignore {SIGHUP} /export/crawlspace/kryptonite/hod/current/bin/hod -n [lindex $args 0 ] [lindex $args 1] [lindex $args 2] [lindex $args 3] [lindex $args 4] [lindex $args 5] [lindex $args 6 ] [lindex $args 7] [lindex $args 8] [lindex $args 9] [lindex $args 10] + +expect "HDFS UI on " +expect "\n" +puts -nonewline "hdfsUI: $expect_out(buffer)" + +expect "Mapred UI on " +expect "\n" +puts -nonewline "mapredUI: $expect_out(buffer)" + +expect "Hadoop config file in: " +expect "\n" +puts -nonewline "hadoopConf: $expect_out(buffer)" + +puts "hdfs: [extractDFSHostPort [string trim $expect_out(buffer)]]\r" +puts "mapred: [extractMapRedHostPort [string trim $expect_out(buffer)]]\r" + +# +# Now just wait forever. Eventually we will be ruthlessly killed. +# +expect_user { + eof { handleExit } + timeout {exp_continue} +}