SINGA-26 Run distributed training in a single command avoid using ssh when starting a process in localhost
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/7d9c0fb4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/7d9c0fb4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/7d9c0fb4 Branch: refs/heads/master Commit: 7d9c0fb4bcb06440b3212297a555b223ff9af48a Parents: 3af32e5 Author: wang sheng <[email protected]> Authored: Sun Jun 28 05:03:16 2015 +0800 Committer: wang sheng <[email protected]> Committed: Sun Jun 28 05:03:16 2015 +0800 ---------------------------------------------------------------------- bin/singa-run.sh | 17 ++++++++++------- bin/singa-stop.sh | 6 +++++- src/utils/cluster.cc | 2 +- 3 files changed, 16 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7d9c0fb4/bin/singa-run.sh ---------------------------------------------------------------------- diff --git a/bin/singa-run.sh b/bin/singa-run.sh index 697ae75..46ed715 100755 --- a/bin/singa-run.sh +++ b/bin/singa-run.sh @@ -80,20 +80,23 @@ if [ $# = 2 ] ; then cmd="./singa "$@ echo starting singa ... echo executing : $cmd - exec $cmd + $cmd elif [ $# = 1 ] ; then # ssh and start singa processes ssh_options="-oStrictHostKeyChecking=no \ -oUserKnownHostsFile=/dev/null \ -oLogLevel=quiet" hosts=(`cat $host_path |cut -d ' ' -f 1`) + cmd="./singa -cluster=$conf_path/cluster.conf -model=$conf_path/model.conf" + ssh_cmd="cd $BASE; "$cmd for i in ${hosts[@]} ; do - cmd="cd $BASE; \ - ./singa \ - -cluster=$conf_path/cluster.conf \ - -model=$conf_path/model.conf" - echo executing @ $i : $cmd - ssh $ssh_options $i $cmd & + if [ $i = localhost ] ; then + echo executing : $cmd + $cmd & + else + echo executing @ $i : $ssh_cmd + ssh $ssh_options $i $ssh_cmd & + fi done wait fi http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7d9c0fb4/bin/singa-stop.sh ---------------------------------------------------------------------- diff --git a/bin/singa-stop.sh b/bin/singa-stop.sh index 75f2a5a..ebd74e8 100755 --- a/bin/singa-stop.sh +++ b/bin/singa-stop.sh @@ -54,7 +54,11 @@ elif [ $# = 1 ] ; then for i in ${hosts[@]} ; do cmd="killall -s SIGKILL "$PROC_NAME echo kill singa @ $i ... - ssh $ssh_options $i $cmd + if [ $i == localhost ] ; then + $cmd + else + ssh $ssh_options $i $cmd + fi done fi http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7d9c0fb4/src/utils/cluster.cc ---------------------------------------------------------------------- diff --git a/src/utils/cluster.cc b/src/utils/cluster.cc index 8f8024b..fdbde69 100644 --- a/src/utils/cluster.cc +++ b/src/utils/cluster.cc @@ -56,9 +56,9 @@ Cluster::Cluster(const ClusterProto &cluster, int procs_id) { void Cluster::Register(const string& endpoint){ procs_id_=cluster_rt_->RegistProc(endpoint); - LOG(ERROR)<<endpoint; CHECK_GE(procs_id_,0); CHECK_LT(procs_id_,nprocs()); + LOG(ERROR) << "proc #" << procs_id_ << " -> " << endpoint; } const string Cluster::endpoint(int procsid) const{ CHECK_LT(procsid, nprocs());
