Repository: incubator-singa
Updated Branches:
  refs/heads/master 5bf1c9280 -> 29de86337


SINGA-38 Support concurrent jobs

clean bash scripts in /bin
  * scripts change input relative path to absolute path
  * scripts catch erros when running external scripts/binaries
  * add singa-env.sh to generate environment variables


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f746b995
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f746b995
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f746b995

Branch: refs/heads/master
Commit: f746b995ecffe4abe2f0667dbf0c10624ce00780
Parents: 5bf1c92
Author: wang sheng <[email protected]>
Authored: Sun Jul 19 05:31:32 2015 +0800
Committer: wang sheng <[email protected]>
Committed: Tue Jul 21 22:16:35 2015 +0800

----------------------------------------------------------------------
 bin/singa-cleanup.sh    |  35 +++++++++++++
 bin/singa-env.sh        |  57 +++++++++++++++++++++
 bin/singa-run.sh        | 119 ++++++++++++++++---------------------------
 bin/singa-stop.sh       |  70 +++++++++++--------------
 bin/zk-service.sh       |  72 +++++++++++++-------------
 src/trainer/worker.cc   |   2 +-
 src/utils/cluster_rt.cc |   2 +-
 tool/gen_hosts.py       |  23 +++++----
 8 files changed, 214 insertions(+), 166 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f746b995/bin/singa-cleanup.sh
----------------------------------------------------------------------
diff --git a/bin/singa-cleanup.sh b/bin/singa-cleanup.sh
new file mode 100755
index 0000000..9b542c6
--- /dev/null
+++ b/bin/singa-cleanup.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+#
+#/**
+# * Copyright 2015 The Apache Software Foundation
+# *
+# * Licensed to the Apache Software Foundation (ASF) under one
+# * or more contributor license agreements.  See the NOTICE file
+# * distributed with this work for additional information
+# * regarding copyright ownership.  The ASF licenses this file
+# * to you under the Apache License, Version 2.0 (the
+# * "License"); you may not use this file except in compliance
+# * with the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# 
+# clean up singa processes and zookeeper metadata
+#
+
+# get environment variables
+. `dirname "${BASH_SOURCE-$0}"`/singa-env.sh
+
+# clean singa jobs and data
+$SINGA_BIN/singa-stop.sh || exit 1
+
+# close zookeeper
+if [ $SINGA_MANAGES_ZK = true ]; then
+  $SINGA_BIN/zk-service.sh stop
+fi

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f746b995/bin/singa-env.sh
----------------------------------------------------------------------
diff --git a/bin/singa-env.sh b/bin/singa-env.sh
new file mode 100755
index 0000000..10578b8
--- /dev/null
+++ b/bin/singa-env.sh
@@ -0,0 +1,57 @@
+#!/usr/bin/env bash
+#
+#/**
+# * Copyright 2015 The Apache Software Foundation
+# *
+# * Licensed to the Apache Software Foundation (ASF) under one
+# * or more contributor license agreements.  See the NOTICE file
+# * distributed with this work for additional information
+# * regarding copyright ownership.  The ASF licenses this file
+# * to you under the Apache License, Version 2.0 (the
+# * "License"); you may not use this file except in compliance
+# * with the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+#
+# set Singa environment variables, includes:
+#   * SINGA_HOME
+#   * SINGA_BIN
+#   * SINGA_CONF
+#   * ZK_HOME
+#   * SINGA_MANAGES_ZK
+#
+
+# set SINGA_BIN
+if [ -z $SINGA_BIN ]; then
+  SINGA_BIN=`dirname "${BASH_SOURCE-$0}"`
+  SINGA_BIN=`cd "$SINGA_BIN">/dev/null; pwd`
+fi
+
+# set SINGA_HOME
+if [ -z $SINGA_HOME ]; then
+  SINGA_HOME=`cd "$SINGA_BIN/..">/dev/null; pwd`
+fi
+
+# set SINGA_CONF
+if [ -z $SINGA_CONF ]; then
+  SINGA_CONF=$SINGA_HOME/conf
+fi
+
+# set ZK_HOME
+if [ -z $ZK_HOME ]; then
+  ZK_HOME=$SINGA_HOME/thirdparty/zookeeper-3.4.6
+  SINGA_MANAGES_ZK=true
+fi
+
+# set SINGA_MANAGES_ZK
+if [ -z $SINGA_MANAGES_ZK ]; then
+  SINGA_MANAGES_ZK=false
+fi
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f746b995/bin/singa-run.sh
----------------------------------------------------------------------
diff --git a/bin/singa-run.sh b/bin/singa-run.sh
index 0a8c9f6..2c282c3 100755
--- a/bin/singa-run.sh
+++ b/bin/singa-run.sh
@@ -20,88 +20,59 @@
 # * limitations under the License.
 # */
 #
-# Run a Singa job
+# run a Singa job
 #
 
-usage="Usage: \n \
-  (single node): singa-run.sh -cluster=YOUR_CONF_FILE -model=YOUR_CONF_FILE \n 
\
-  (distributed): singa-run.sh -conf=YOUR_CONF_DIR \
-  (the directory should contain cluster.conf/model.conf/hostfile)"
+usage="Usage: singa-run.sh -conf=CONF_DIR 
+      (CONF_DIR should contain cluster.conf && model.conf)"
+# usage="Usage: \n
+#       (single process): singa-run.sh -cluster=YOUR_CONF_FILE 
-model=YOUR_CONF_FILE \n
+#       (multi-process): singa-run.sh -conf=YOUR_CONF_DIR 
+#       (the directory should contain cluster.conf && model.conf)"
 
-#if [ $# -le 0 ] || [ $# -ge 3 ] ; then
-#  echo -e $usage
-#  exit 1
-#fi
-
-valid_args=false
-
-if [ $# = 1 ] ; then
-  if [[ $1 = "-conf="* ]] ; then
-    valid_args=true
-    conf_path=${1:6}
-    host_path=$conf_path/job.hosts
-  fi
-elif [ $# = 2 ] ; then
-  if [[ $1 = "-cluster="* ]] && [[ $2 = "-model="*  ]] ; then
-    valid_args=true
-  elif [[ $2 = "-cluster="* ]] && [[ $1 = "-model="*  ]] ; then
-    valid_args=true
-  fi
-fi
-
-if [ $valid_args = false ] ; then
-  echo -e $usage
+# check arguments
+if [ $# != 1 ] || [[ $1 != "-conf="* ]]; then
+  echo $usage
   exit 1
 fi
 
-# get singa-base
-BIN=`dirname "${BASH_SOURCE-$0}"`
-BIN=`cd "$BIN">/dev/null; pwd`
-BASE=`cd "$BIN/..">/dev/null; pwd`
-
-cd $BASE
+# get environment variables
+. `dirname "${BASH_SOURCE-$0}"`/singa-env.sh
+# get workspace path
+workspace=`cd "${1:6}">/dev/null; pwd`
 
 # start zookeeper
-$BIN/zk-service.sh start 2>/dev/null
+if [ $SINGA_MANAGES_ZK = true ]; then
+  $SINGA_BIN/zk-service.sh start || exit 1
+fi
 
-# wait for zk service to be up
-sleep 3
+# cleanup old processes and data
+$SINGA_BIN/singa-stop.sh || exit 1
 
-# clenup singa data
-$BIN/singa-stop.sh conf/hostfile
+# generate host file
+host_file=$workspace/job.hosts
+python $SINGA_HOME/tool/gen_hosts.py -conf=$workspace/cluster.conf \
+                                     -hosts=$SINGA_CONF/hostfile \
+                                     -output=$host_file \
+                                     || exit 1
 
-# check mode
-if [ $# = 2 ] ; then
-  # start single singa process
-  cmd="./singa "$@
-  echo starting singa ...
-  echo executing : $cmd
-  $cmd
-elif [ $# = 1 ] ; then
-  # start multiple singa processes
-  # generate host file
-  cmd=" python tool/gen_hosts.py -conf=$conf_path/cluster.conf \
-    -src=conf/hostfile -dst=$host_path"
-  echo $cmd
-  $cmd
-  # ssh and start singa processes
-  ssh_options="-oStrictHostKeyChecking=no \
-  -oUserKnownHostsFile=/dev/null \
-  -oLogLevel=quiet"
-  hosts=(`cat $host_path |cut -d ' ' -f 1`)
-  cmd="./singa -cluster=$conf_path/cluster.conf -model=$conf_path/model.conf"
-  ssh_cmd="cd $BASE; "$cmd
-  for i in ${hosts[@]} ; do
-    if [ $i = localhost ] ; then
-      echo executing : $cmd
-      $cmd &
-    else
-      echo executing @ $i : $ssh_cmd
-      ssh $ssh_options $i $ssh_cmd &
-    fi
-  done
-  wait
-fi
-
-# cleanup singa data
-#$BIN/singa-stop.sh conf/hostfile
+# ssh and start singa processes
+ssh_options="-oStrictHostKeyChecking=no \
+-oUserKnownHostsFile=/dev/null \
+-oLogLevel=quiet"
+hosts=`cat $host_file |cut -d ' ' -f 1`
+# cd to SINGA_HOME as it need conf/singa.conf
+cd $SINGA_HOME
+singa_run="./singa -cluster=$workspace/cluster.conf 
-model=$workspace/model.conf"
+singa_sshrun="cd $SINGA_HOME; ./singa -cluster=$workspace/cluster.conf \
+              -model=$workspace/model.conf"
+for i in ${hosts[@]} ; do
+  if [ $i = localhost ] ; then
+    echo executing : $singa_run
+    $singa_run &
+  else
+    echo executing @ $i : $singa_sshrun
+    ssh $ssh_options $i $singa_sshrun &
+  fi
+done
+wait

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f746b995/bin/singa-stop.sh
----------------------------------------------------------------------
diff --git a/bin/singa-stop.sh b/bin/singa-stop.sh
index 0e7170b..e3f24dc 100755
--- a/bin/singa-stop.sh
+++ b/bin/singa-stop.sh
@@ -20,53 +20,41 @@
 # * limitations under the License.
 # */
 # 
-# Clean up singa processes and zookeeper metadata
+# clean up singa processes and zookeeper metadata
 #
 
-usage="Usage: \n \
-      (local process): singa-stop.sh \n \
-      (distributed): singa-stop.sh HOST_FILE"
-
-if [ $# -gt 1 ]; then
-  echo -e $usage
-  exit 1
-fi
-
-BIN=`dirname "${BASH_SOURCE-$0}"`
-BIN=`cd "$BIN">/dev/null; pwd`
-BASE=`cd "$BIN/..">/dev/null; pwd`
-ZKDATA_DIR="/tmp/zookeeper"
-
-PROC_NAME="singa"
-HOST_FILE=$1
+# usage="Usage: \n \
+#       (local process): singa-stop.sh \n \
+#       (distributed): singa-stop.sh HOST_FILE"
+# 
+# if [ $# -gt 1 ]; then
+#   echo -e $usage
+#   exit 1
+# fi
 
-cd $BASE
+# get environment variables
+. `dirname "${BASH_SOURCE-$0}"`/singa-env.sh
 
 # kill singa processes
-if [ $# = 0 ] ; then
-  echo kill singa @ localhost ...
-  cmd="killall -s SIGKILL "$PROC_NAME
-  $cmd
-elif [ $# = 1 ] ; then
-  ssh_options="-oStrictHostKeyChecking=no \
-  -oUserKnownHostsFile=/dev/null \
-  -oLogLevel=quiet"
-  hosts=(`cat $HOST_FILE |cut -d ' ' -f 1`)
-  for i in ${hosts[@]} ; do
-    cmd="killall -s SIGKILL -r "$PROC_NAME
-    echo kill singa @ $i ...
-    if [ $i == localhost ] ; then
-      $cmd
-    else
-      ssh $ssh_options $i $cmd
-    fi
-  done
-fi
-
+host_file=$SINGA_CONF/hostfile
+ssh_options="-oStrictHostKeyChecking=no \
+             -oUserKnownHostsFile=/dev/null \
+             -oLogLevel=quiet"
+hosts=`cat $host_file |cut -d ' ' -f 1`
+singa_kill="killall -s SIGKILL -r singa"
+for i in ${hosts[@]}; do
+  echo kill singa @ $i ...
+  if [ $i == localhost ]; then
+    $singa_kill
+  else
+    ssh $ssh_options $i $singa_kill
+  fi
+done
 # wait for killall command
 sleep 2
 
-echo cleanning metadata in zookeeper ...
 # remove zk data
-./singatool
-
+# singatool need global conf under SINGA_HOME
+echo cleanning metadata in zookeeper ...
+cd $SINGA_HOME
+./singatool || exit 1

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f746b995/bin/zk-service.sh
----------------------------------------------------------------------
diff --git a/bin/zk-service.sh b/bin/zk-service.sh
index 1b67c7c..f9d3823 100755
--- a/bin/zk-service.sh
+++ b/bin/zk-service.sh
@@ -20,59 +20,55 @@
 # * limitations under the License.
 # */
 # 
-# Manage a zookeeper service
+# manage ZooKeeper service
 #
 
 usage="Usage: zk-service.sh [start|stop]"
 
-if [ $# -le 0 ]; then
+if [ $# != 1 ]; then
   echo $usage
   exit 1
 fi
 
-BIN=`dirname "${BASH_SOURCE-$0}"`
-BIN=`cd "$BIN">/dev/null; pwd`
-BASE=`cd "$BIN/..">/dev/null; pwd`
-ZKBASE=$BASE/thirdparty/zookeeper-3.4.6
+# get environment variables
+. `dirname "${BASH_SOURCE-$0}"`/singa-env.sh
 
-if [ -z $SINGA_MANAGES_ZK ]; then
-  SINGA_MANAGES_ZK=true
+# check if singa manages zookeeper service
+if [ $SINGA_MANAGES_ZK != true ]; then
+  echo "Singa does not manage a valid zookeeper service (SINGA_MANAGES_ZK != 
true)"
+  exit 1
 fi
 
-if [ $SINGA_MANAGES_ZK = true ]; then
-  # check zookeeper installation
-  if [ ! -d $ZKBASE ]; then
-    echo "zookeeper not found, please install zookeeper first:"
-    echo "$./SINGA_BASE/thirdparty/install.sh zookeeper"
-    exit 1
-  fi
+# check zookeeper installation
+if [ ! -d $ZK_HOME ]; then
+  echo "zookeeper not found at $ZK_HOME"
+  echo "if you do not have zookeeper service, please install:"
+  echo "    $SINGA_HOME/thirdparty/install.sh zookeeper"
+  echo "otherwise, please set ZK_HOME correctly"
+  exit 1
 fi
 
-# get argument
-cmd=$1
-
-case $cmd in
-
-(start)
-  # start zk service
-  if [ $SINGA_MANAGES_ZK = true ]; then
-    # check zoo,cfg
-    if [ ! -f $ZKBASE/conf/zoo.cfg ]; then
+# get command
+case $1 in
+  start)
+    # start zk service
+    # check zoo.cfg
+    if [ ! -f $ZK_HOME/conf/zoo.cfg ]; then
       echo "zoo.cfg not found, create from sample.cfg"
-      cp $ZKBASE/conf/zoo_sample.cfg $ZKBASE/conf/zoo.cfg
+      cp $ZK_HOME/conf/zoo_sample.cfg $ZK_HOME/conf/zoo.cfg
     fi
-    # echo 'starting zookeeper service...'
-    $ZKBASE/bin/zkServer.sh start
-  fi
-  ;;
-
-(stop)
-  # stop zk service
-  if [ $SINGA_MANAGES_ZK = true ]; then
-    # echo 'stopping zookeeper service...'
-    $ZKBASE/bin/zkServer.sh stop
-  fi
-  ;;
+    # cd to SINGA_HOME as zookeeper.out will be here
+    cd $SINGA_HOME
+    $ZK_HOME/bin/zkServer.sh start 2>/dev/null
+    ;;
 
+  stop)
+    # stop zk service
+    $ZK_HOME/bin/zkServer.sh stop 2>/dev/null
+    ;;
+  
+  *)
+    echo $usage
+    exit 1
 esac
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f746b995/src/trainer/worker.cc
----------------------------------------------------------------------
diff --git a/src/trainer/worker.cc b/src/trainer/worker.cc
index b6ce0d5..7d779ad 100644
--- a/src/trainer/worker.cc
+++ b/src/trainer/worker.cc
@@ -183,7 +183,7 @@ void Worker::Run() {
   msg->set_type(kStop);
   dealer_->Send(&msg);  // use param dealer to send the stop msg
 
-  LOG(ERROR) << "Worker (group = " <<grp_id_ << ", id = " << id_ << ") stop";
+  LOG(ERROR) << "Worker (group = " <<grp_id_ << ", id = " << id_ << ") stops";
 }
 
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f746b995/src/utils/cluster_rt.cc
----------------------------------------------------------------------
diff --git a/src/utils/cluster_rt.cc b/src/utils/cluster_rt.cc
index 408adde..7e5a86b 100644
--- a/src/utils/cluster_rt.cc
+++ b/src/utils/cluster_rt.cc
@@ -106,7 +106,7 @@ bool ZKService::Exist(const char* path) {
   int ret = zoo_exists(zkhandle_, path, 0, &stat);
   if (ret == ZOK) return true;
   else if (ret == ZNONODE) return false;
-  LOG(ERROR) << "Unhandled ZK error code: " << ret << " (zoo_exists)";
+  //LOG(ERROR) << "Unhandled ZK error code: " << ret << " (zoo_exists)";
   return false;
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f746b995/tool/gen_hosts.py
----------------------------------------------------------------------
diff --git a/tool/gen_hosts.py b/tool/gen_hosts.py
index 570eff9..a3bec47 100644
--- a/tool/gen_hosts.py
+++ b/tool/gen_hosts.py
@@ -9,8 +9,8 @@ from pb2.cluster_pb2 import ClusterProto
 # parse command line
 parser = argparse.ArgumentParser(description='Generate host list from host 
file for a SINGA job')
 parser.add_argument('-conf', dest='conf', metavar='CONF_FILE', required=True, 
help='cluster.conf file')
-parser.add_argument('-src', dest='src', metavar='SRC_FILE', required=True, 
help='global host file')
-parser.add_argument('-dst', dest='dst', metavar='DST_FILE', required=True, 
help='generated list')
+parser.add_argument('-hosts', dest='hosts', metavar='HOST_FILE', 
required=True, help='global host file')
+parser.add_argument('-output', dest='output', metavar='OUTPUT_FILE', 
required=True, help='generated list')
 args = parser.parse_args();
 
 # read from .conf file
@@ -27,21 +27,22 @@ else:
 fd_conf.close()
 
 # read from source host file
-fd_src = open(args.src, 'r')
+fd_hosts = open(args.hosts, 'r')
 hosts = []
-for line in fd_src:
+for line in fd_hosts:
   line = line.strip()
   if len(line) == 0 or line[0] == '#':
     continue
   hosts.append(line)
-fd_src.close()
+fd_hosts.close()
 
-# write to dst file
+# write to output file
 num_hosts = len(hosts)
 if (num_hosts == 0):
-  print 'ERROR: source host file is empty'
-  sys.exit()
-fd_dst = open(args.dst, 'w')
+  print "contains no valid host %s" % args.hosts
+  sys.exit(1)
+fd_output = open(args.output, 'w')
 for i in range(nprocs):
-  fd_dst.write(hosts[i % num_hosts] + '\n')
-fd_dst.close()
+  fd_output.write(hosts[i % num_hosts] + '\n')
+fd_output.close()
+print 'generate host list at %s' % args.output

Reply via email to