jhsenjaliya commented on a change in pull request #2578: [GOBBLIN-707]-rewrite 
gobblin script combining all gobblin modes and …
URL: https://github.com/apache/incubator-gobblin/pull/2578#discussion_r268408431
 
 

 ##########
 File path: bin/gobblin.sh
 ##########
 @@ -17,50 +17,410 @@
 # limitations under the License.
 #
 
-calling_dir() {
-  echo "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-}
-classpath() {
-  DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-
-  for i in `ls $DIR/../lib`
-  do
-      if [[ $i != hadoop* ]]
-      then
-        CLASSPATH=${CLASSPATH:+${CLASSPATH}:}$DIR/../lib/$i
-      else
-        
HADOOP_CLASSPATH=${HADOOP_CLASSPATH:+${HADOOP_CLASSPATH}:}$DIR/../lib/$i
-      fi
-  done
-
-  if [ ! -z "$HADOOP_HOME" ] && [ -f $HADOOP_HOME/bin/hadoop ]
-  then
-    HADOOP_CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath)
-  fi
-
-  CLASSPATH=$CLASSPATH:$HADOOP_CLASSPATH
-
-  if [ ! -z "$GOBBLIN_ADDITIONAL_JARS" ]
-  then
-     CLASSPATH=$GOBBLIN_ADDITIONAL_JARS:$CLASSPATH
-  fi
-
-  echo $CLASSPATH
+# JAVA_HOME is required.
+if [[ -z "$JAVA_HOME" ]]; then
+    echo -e "\nError: Environment variable JAVA_HOME not set!\n"
+    exit 1
+fi
+
+# gobblin global vars, All these can be overridden by specified values in 
gobblin-env.sh
+GOBBLIN_VERSION="0.15.0"
+
+GOBBLIN_HOME="$(cd `dirname $0`/..; pwd)"
+GOBBLIN_LIB=${GOBBLIN_HOME}/lib
+GOBBLIN_BIN=${GOBBLIN_HOME}/bin
+GOBBLIN_LOGS=${GOBBLIN_HOME}/logs
+GOBBLIN_CONF=''
+
+LOG4J_FILE_PATH=''
+CLUSTER_NAME="gobblin_cluster"
+JVM_OPTS="-Xmx1g -Xms512m"
+GOBBLIN_MODE=''
+ACTION=''
+JVM_FLAGS=''
+EXTRA_JARS=''
+VERBOSE=0
+ENABLE_GC_LOGS=0
+CMD_PARAMS=''
+GOBBLIN_MODE_TYPE_COMMAND="GOBBLIN_COMMAND"
+GOBBLIN_MODE_TYPE_SERVICE="GOBBLIN_SERVICE"
+
+# Gobblin Commands, Modes & respective Classes
+# Commands
+ADMIN_MODE='admin'
+CLI_MODE='cli'
+STATESTORE_CHECK_MODE='statestore-check'
+STATESTORE_CLEAN_MODE='statestore-clean'
+HISTORYSTORE_MANAGER_MODE='historystore-manager'
+
+# Services
+STANDALONE_MODE='standalone'
+CLUSTER_MASTER_MODE='cluster-master'
+CLUSTER_WORKER_MODE='cluster-worker'
+AWS_MODE='aws'
+YARN_MODE='yarn'
+MR_MODE='mr'
+SERVICE_MODE='service'
+
+# Command class
+ADMIN_CLASS="org.apache.gobblin.cli.Cli"
+CLI_CLASS='org.apache.gobblin.runtime.cli.GobblinCli'
+STATESTORE_CHECK_CLASS='org.apache.gobblin.runtime.util.JobStateToJsonConverter'
+STATESTORE_CLEAN_CLASS='org.apache.gobblin.metastore.util.StateStoreCleaner'
+HISTORYSTORE_MANAGER_CLASS='org.apache.gobblin.metastore.util.DatabaseJobHistoryStoreSchemaManager'
+
+# Service Class
+STANDALONE_CLASS='org.apache.gobblin.scheduler.SchedulerDaemon'
+CLUSTER_MASTER_CLASS='org.apache.gobblin.cluster.GobblinClusterManager'
+CLUSTER_WORKER_CLASS='org.apache.gobblin.cluster.GobblinTaskRunner'
+AWS_CLASS='org.apache.gobblin.aws.GobblinAWSClusterLauncher'
+YARN_CLASS='org.apache.gobblin.yarn.GobblinYarnAppLauncher'
+MR_CLASS='org.apache.gobblin.runtime.mapreduce.CliMRJobLauncher'
+SERVICE_CLASS='org.apache.gobblin.service.modules.core.GobblinServiceManager'
+
+
+function print_usage() {
+    echo "gobblin.sh  <command> <params>"
+    echo "gobblin.sh  <service-name> <start|stop|status>"
+
+    echo "Argument Options:"
+    echo "    <commands>                                    values: 
$ADMIN_MODE, $CLI_MODE, $STATESTORE_CHECK_MODE, $STATESTORE_CLEAN_MODE, 
$HISTORYSTORE_MANAGER_MODE"
+    echo "    <service>                                     values: 
$STANDALONE_MODE, $CLUSTER_MASTER_MODE, $CLUSTER_WORKER_MODE, $AWS_MODE, 
$YARN_MODE, $MR_MODE, $SERVICE_MODE."
+    echo "    --cluster-name                                assign cluster 
name ( default: $CLUSTER_NAME)."
+    echo "    --conf-dir <path-to-conf-dir>                 default is 
'$GOBBLIN_HOME/conf/<mode-name>'."
+    echo "    --log4j-conf <path-to-conf-file>              default is 
'$GOBBLIN_HOME/conf/<mode-name>/log4j.properties'."
+    echo "    --jt <resource manager URL>                   Only for MR mode: 
Job submission URL, if not set, taken from \${HADOOP_HOME}/conf."
+    echo "    --fs <file system URL>                        Only for MR mode: 
Target file system, if not set, taken from \${HADOOP_HOME}/conf."
+    echo "    --jvmopts <jvm or gc options>                 String containing 
JVM flags to include, in addition to \"$JVM_OPTS\"."
+    echo "    --jars <column-separated list of extra jars>  Column-separated 
list of extra jars to put on the CLASSPATH."
+    echo "    --enable-gc-logs                              enables gc logs & 
dumps."
+    echo "    --help                                        Display this help."
+    echo "    --verbose                                     Display full 
command used to start the process."
 }
 
+# TODO: use getopts
+shopt -s nocasematch
 for i in "$@"
 do
-  case "$1" in
-    "classpath")
-      classpath
-      exit
-  esac
+    case "$1" in
+        "$ADMIN_MODE" | "$CLI_MODE" | "$STATESTORE_CHECK_MODE" | 
"$STATESTORE_CLEAN_MODE" | "$HISTORYSTORE_MANAGER_CLASS" )
+            GOBBLIN_MODE_TYPE=$GOBBLIN_MODE_TYPE_COMMAND
+            GOBBLIN_MODE="$1"
+        ;;
+        "$STANDALONE_MODE" | "$CLUSTER_MASTER_MODE" | "$CLUSTER_WORKER_MODE" | 
"$AWS_MODE" | "$YARN_MODE" | "$MR_MODE")
+            GOBBLIN_MODE_TYPE=$GOBBLIN_MODE_TYPE_SERVICE
+            GOBBLIN_MODE="$1"
+        ;;
+        start | stop | status)
+            ACTION="$1"
+        ;;
+        --jvmflags)
+            JVM_FLAGS="$2"
+            shift
+        ;;
+        --conf-dir)
+            USER_CONF_DIR="$2"
+            shift
+        ;;
+        --log4j-conf)
+            USER_LOG4J_FILE="$2"
+            shift
+        ;;
+        --jars)
+            EXTRA_JARS="$2"
+            shift
+        ;;
+        --enable-gc-logs)
+            ENABLE_GC_LOGS=1
+        ;;
+        --cluster-name)
+            CLUSTER_NAME="$2"
+            shift
+        ;;
+        --help)
+            print_usage
+            exit 0
+        ;;
+        --verbose)
+            VERBOSE=1
+        ;;
+        --jt)
+            JOB_TRACKER_URL="$2"
+            shift
+        ;;
+        --fs)
+            FS_URL="$2"
+            shift
+        ;;
+        *)
+        CMD_PARAMS="$CMD_PARAMS $1"
+        ;;
+    esac
+    shift
 done
 
-CLASSPATH=$(classpath)
-if [ -z "$GOBBLIN_LOG4J_CONFIGURATION" ]
-then
-  GOBBLIN_LOG4J_CONFIGURATION=$(calling_dir)/../conf/log4j.properties
+
+PID_FILE_NAME=".gobblin-$GOBBLIN_MODE.pid"
+PID_FILE="$GOBBLIN_HOME/$PID_FILE_NAME"
+
+#sourcing basic gobblin env vars like GOBBLIN_HOME and GOBBLIN_LIB
+. ${GOBBLIN_BIN}/gobblin-env.sh
+
+# for gobblin commands, the action is always 'start'
+if [[ "$GOBBLIN_MODE_TYPE" == "$GOBBLIN_MODE_TYPE_COMMAND" ]]; then
+    ACTION='start'
+fi
+
+# JVM Flags
+if [[ -n "$JVM_FLAGS" ]]; then
+    JVM_OPTS="$JVM_OPTS $JVM_FLAGS"
+fi
+
+# gobblin config
+if [[ -n "$USER_CONF_DIR" ]]; then
+    GOBBLIN_CONF=$USER_CONF_DIR
+else
+    GOBBLIN_CONF=${GOBBLIN_HOME}/conf/${GOBBLIN_MODE}
+fi
+
+#log4j config file
+if [[ -n "$USER_LOG4J_FILE" ]]; then
+    LOG4J_FILE_PATH=$USER_LOG4J_FILE
+else
+    LOG4J_FILE_PATH=file://${GOBBLIN_CONF}/log4j.properties
 fi
 
-java -Dlog4j.configuration=file:$GOBBLIN_LOG4J_CONFIGURATION -cp "$CLASSPATH" 
$GOBBLIN_OPTS org.apache.gobblin.runtime.cli.GobblinCli $@
+
+GC_OPTS=''
+if [[ ${ENABLE_GC_LOGS} -eq 1 ]]; then
+    GC_OPTS+="-XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+UseCompressedOops "
+    GC_OPTS+="-XX:+PrintGCDetails -XX:+PrintGCDateStamps 
-XX:+PrintTenuringDistribution "
+    GC_OPTS+="-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=$GOBBLIN_LOGS/ "
+    GC_OPTS+="-Xloggc:$GOBBLIN_LOGS/gobblin-$GOBBLIN_MODE-gc.log "
+fi
+
+export HADOOP_USER_CLASSPATH_FIRST=true
+
+
+function start() {
+    # Build classpth
+    GOBBLIN_JARS=''
+    for jarFile in `ls ${GOBBLIN_LIB}/*`
+    do
+        if [[ -z "$GOBBLIN_JARS" ]]; then
+            GOBBLIN_JARS=${jarFile}
+        else
+            GOBBLIN_JARS=${GOBBLIN_JARS}:${jarFile}
+        fi
+    done
+
+    GOBBLIN_CLASSPATH=${GOBBLIN_JARS}
+    if [[ -n "$HADOOP_HOME" ]]; then
+        GOBBLIN_CLASSPATH=${GOBBLIN_CLASSPATH}:${HADOOP_HOME}/lib
+    else
+        echo "WARN: HADOOP_HOME is not defined. Hadoop libs are not added to 
classpath."
+    fi
+
+    if [[ -n "$EXTRA_JARS" ]]; then
+        GOBBLIN_CLASSPATH=${GOBBLIN_CLASSPATH}:"$EXTRA_JARS"
+    fi
+
+    GOBBLIN_CLASSPATH=${GOBBLIN_CONF}:${GOBBLIN_CLASSPATH}
+
+    LOG_OUT_FILE="${GOBBLIN_LOGS}/${GOBBLIN_MODE}.out"
+    LOG_ERR_FILE="${GOBBLIN_LOGS}/${GOBBLIN_MODE}.err"
+
+    # for all gobblin commands
+    if [[ "$GOBBLIN_MODE_TYPE" == "$GOBBLIN_MODE_TYPE_COMMAND" ]]; then
+        if [[ "$GOBBLIN_MODE" = "$ADMIN_MODE" ]]; then
+            CLASS_N_ARGS="$ADMIN_CLASS"
+        elif [[ "$GOBBLIN_MODE" = "$CLI_MODE" ]]; then
+            CLASS_N_ARGS="$CLI_CLASS"
+        elif [[ "$GOBBLIN_MODE" = "$STATESTORE_CHECK_MODE" ]]; then
+            CLASS_N_ARGS="$STATESTORE_CHECK_CLASS"
+        elif [[ "$GOBBLIN_MODE" = "$STATESTORE_CLEAN_MODE" ]]; then
+            CLASS_N_ARGS="$STATESTORE_CLEAN_CLASS"
+        elif [[ "$GOBBLIN_MODE" = "$HISTORYSTORE_MANAGER_MODE" ]]; then
+            CLASS_N_ARGS="$HISTORYSTORE_MANAGER_CLASS"
+        fi
+
+        if [[ $VERBOSE -eq 1 ]]; then
+            echo "Running command: $JAVA_HOME/bin/java $GC_OPTS $JVM_OPTS -cp 
$GOBBLIN_CLASSPATH  $CLASS_N_ARGS $CMD_PARAMS";
+        fi
+
+        $JAVA_HOME/bin/java $GC_OPTS $JVM_OPTS -cp $GOBBLIN_CLASSPATH  
$CLASS_N_ARGS $CMD_PARAMS
+    # for all gobblin services
+    else
+        if [[ "$GOBBLIN_MODE" = "$MR_MODE" ]]; then
+            
MR_MODE_LIB_JARS="gobblin-api-$GOBBLIN_VERSION.jar,gobblin-avro-json-$GOBBLIN_VERSION.jar,
+                    
gobblin-codecs-$GOBBLIN_VERSION.jar,gobblin-core-$GOBBLIN_VERSION.jar,gobblin-core-base-$GOBBLIN_VERSION.jar,
+                    
gobblin-crypto-$GOBBLIN_VERSION.jar,gobblin-crypto-provider-$GOBBLIN_VERSION.jar,gobblin-data-management-$GOBBLIN_VERSION.jar,
+                    
gobblin-metastore-$GOBBLIN_VERSION.jar,gobblin-metrics-$GOBBLIN_VERSION.jar,gobblin-metrics-base-$GOBBLIN_VERSION.jar,
+                    
gobblin-metadata-$GOBBLIN_VERSION.jar,gobblin-utility-$GOBBLIN_VERSION.jar,avro-1.8.1.jar,avro-mapred-1.8.1.jar,
+                    
commons-lang3-3.4.jar,config-1.2.1.jar,data-11.0.0.jar,gson-2.6.2.jar,guava-15.0.jar,guava-retrying-2.0.0.jar,
+                    
joda-time-2.9.3.jar,javassist-3.18.2-GA.jar,kafka_2.11-0.8.2.2.jar,kafka-clients-0.8.2.2.jar,metrics-core-2.2.0.jar,
+                    
metrics-core-3.2.3.jar,metrics-graphite-3.2.3.jar,scala-library-2.11.8.jar,influxdb-java-2.1.jar,okhttp-2.4.0.jar,
+                    
okio-1.4.0.jar,reactive-streams-1.0.0.jar,retrofit-1.9.0.jar,reflections-0.9.10.jar"
+
+            MR_MODE_LIB_JARS="${MR_MODE_LIB_JARS},$EXTRA_JARS"
+            JT_COMMAND=$([ -z $JOB_TRACKER_URL ] && echo "" || echo "-jt 
$JOB_TRACKER_URL")
+            FS_COMMAND=$([ -z $FS_URL ] && echo "" || echo "-fs $FS_URL")
+            GOBBLIN_COMMAND="hadoop jar 
$GOBBLIN_LIB/gobblin-runtime-$GOBBLIN_VERSION.jar $MR_CLASS \
+                                        -D mapreduce.user.classpath.first=true 
-D mapreduce.job.user.classpath.first=true \
+                                        $JT_COMMAND $FS_COMMAND \
+                                        -libjars $MR_MODE_LIB_JARS \
+                                        -sysconfig 
$GOBBLIN_CONF/application.properties \
+                                        -jobconfig 
$GOBBLIN_CONF/application.properties"
+        else
+            CLASS_N_ARGS=''
+            if [[ "$GOBBLIN_MODE" = "$STANDALONE_MODE" ]]; then
+                CLASS_N_ARGS="$STANDALONE_CLASS $GOBBLIN_CONF/application.conf"
+
+            elif [[ "$GOBBLIN_MODE" = "$AWS_MODE" ]]; then
+                CLASS_N_ARGS="$AWS_CLASS"
+
+            elif [[ "$GOBBLIN_MODE" = "$YARN_MODE" ]]; then
+                
GOBBLIN_CLASSPATH="${GOBBLIN_CLASSPATH}:${HADOOP_YARN_HOME}/lib"
+                CLASS_N_ARGS="$YARN_CLASS"
+
+            elif [[ "$GOBBLIN_MODE" = "$CLUSTER_MASTER_MODE" ]]; then
+                CLASS_N_ARGS="$CLUSTER_MASTER_CLASS --standalone_cluster true 
--app_name $CLUSTER_NAME"
+
+            elif [[ "$GOBBLIN_MODE" = "$SERVICE_MODE" ]]; then
+                CLASS_N_ARGS="$SERVICE_CLASS --service_name 
Gobblin-$SERVICE_MODE"
+
+            elif [[ "$GOBBLIN_MODE" = "$CLUSTER_WORKER_MODE" ]]; then
+                #Find largest worker id and use next one to start worker in 
incremental order
+                LAST_WORKER_ID=$(ps aux | grep -v grep | grep -Po 
"($CLUSTER_WORKER_CLASS)(.*)(cluster-worker.\K([0-9]+))" | sort --version-sort 
| tail -1)
+                WORKER_ID=$((LAST_WORKER_ID+1))
+                LOG_OUT_FILE="${GOBBLIN_LOGS}/${GOBBLIN_MODE}.$WORKER_ID.out"
+                LOG_ERR_FILE="${GOBBLIN_LOGS}/${GOBBLIN_MODE}.$WORKER_ID.err"
+                CLASS_N_ARGS="$CLUSTER_WORKER_CLASS --app_name $CLUSTER_NAME 
--helix_instance_name ${GOBBLIN_MODE}.$WORKER_ID"
+            else
+                echo "Invalid gobblin command or service... [EXITING]"
+                exit 1
+            fi
+            GOBBLIN_COMMAND="$JAVA_HOME/bin/java 
-Dlog4j.configuration=$LOG4J_FILE_PATH -cp $GOBBLIN_CLASSPATH $GC_OPTS 
$JVM_OPTS $CLASS_N_ARGS"
+        fi
+
+        # execute the command
+        if [[ $VERBOSE -eq 1 ]]; then
+            echo "Running command: $GOBBLIN_COMMAND";
+        fi
+
+        nohup $GOBBLIN_COMMAND 1>> ${LOG_OUT_FILE} 2>> ${LOG_ERR_FILE} &
+        PID=$!
+        echo $PID >> $PID_FILE
+        if [[ $? != 0 ]]; then
+            echo "Starting the Gobblin $GOBBLIN_MODE process... [FAILED]"
+        else
+            echo "Started the Gobblin $GOBBLIN_MODE process [pid: $PID] ... 
[DONE]"
+        fi
+
+    fi
+}
+
+function stop() {
+    #    echo "Stopping the Gobblin $MODE_TYPE process..."
+    PID=''
+    if [[ ! -f $PID_FILE ]]; then
+        echo "Gobblin process id file not found at $PID_FILE"
+        while true; do
+            read -p "Do you want to search gobblin $GOBBLIN_MODE process and 
stop it? (y/n): " search_and_kill
+            case ${search_and_kill} in
+                [Yy]*)
+                    class_to_search=''
+                    if [[ "$GOBBLIN_MODE" = "$MR_MODE" ]]; then
+                        class_to_search="$MR_CLASS"
+                    elif [[ "$GOBBLIN_MODE" = "$CLI_MODE" ]]; then
+                        class_to_search="$CLI_CLASS"
+                    elif [[ "$GOBBLIN_MODE" = "$STANDALONE_MODE" ]]; then
+                        class_to_search="$STANDALONE_CLASS"
+                    elif [[ "$GOBBLIN_MODE" = "$AWS_MODE" ]]; then
+                        class_to_search="$AWS_CLASS"
+                    elif [[ "$GOBBLIN_MODE" = "$YARN_MODE" ]]; then
+                        class_to_search="$YARN_CLASS"
+                    elif [[ "$GOBBLIN_MODE" = "$CLUSTER_MASTER_MODE" ]]; then
+                        class_to_search="$CLUSTER_MASTER_CLASS"
+                    elif [[ "$GOBBLIN_MODE" = "$CLUSTER_WORKER_MODE" ]]; then
+                        class_to_search=$CLUSTER_WORKER_CLASS
+                    fi
+
+                    if [[ -z "$class_to_search" ]]; then
+                        echo "Could not figure out process to search for MODE: 
$GOBBLIN_MODE...[ABORTED]"
+                        exit
+                    fi
+
+                    PID=$(ps aux | grep "$class_to_search" | grep -v grep | 
awk '{print $2}')
+                    break
+                    ;;
+                [Nn]*)
+                    echo "Stopping the Gobblin $GOBBLIN_MODE process... 
[ABORTED]"
+                    exit
+                    ;;
+                *)
+                    echo "Please answer yes or no."
+                    ;;
+            esac
+        done
+    else
+        PID=`tail -1 $PID_FILE`
+    fi
+
+    if [[ -z "$PID" ]]; then
+        echo "Can not find any running Gobblin $GOBBLIN_MODE process..."
+    else
+        if kill -0 $PID > /dev/null 2>&1; then
+            kill $PID
+            printf "Stopping the Gobblin $GOBBLIN_MODE process (pid: $PID)... 
"; sleep 1; printf "[DONE]\n"
+        else
+            echo "Gobblin $GOBBLIN_MODE process (pid: $PID) is not running."
+        fi
+        # remove the pid from pid_file, and remove the file if no more pid's 
left.
+        sed -i '' '$ d' $PID_FILE
+        if [[ -s $PID_FILE ]]; then
+            rm $PID_FILE;
+        fi
+    fi
+}
+
+function status() {
+    #    echo "Checking for Gobblin $MODE_TYPE service status ..."
+    if [[ -e ${PID_FILE} ]]; then
+        PID=`cat $PID_FILE`
+    fi
+    if [[ -z ${PID} ]]; then
+        echo "Gobblin $GOBBLIN_MODE process id not found, probably it is not 
running."
+    elif ps -p ${PID} > /dev/null; then
+        echo "Gobblin $GOBBLIN_MODE process is running... [ pid: $PID ]."
+    else
+        echo "Gobblin $GOBBLIN_MODE process is not running. [ last known pid: 
${PID}]."
+    fi
+}
+
+
+case "$ACTION" in
+    "start")
+        start
+    ;;
+    "stop")
+        stop
+    ;;
+    "status")
+        status
+    ;;
+    "restart")
+        stop
+        sleep 2
+        start
+    ;;
+    *)
+        print_usage
+#        if [[ "$GOBBLIN_MODE" == "$CLI_MODE" ]] || [[ "$GOBBLIN_MODE" == 
"$STATESTORE_CHECK_MODE" ]] || [[ "$GOBBLIN_MODE" == "$STATESTORE_CLEAN_MODE" 
]]; then
 
 Review comment:
   I will remove this commented code.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to