Repository: incubator-singa Updated Branches: refs/heads/master 039de8b0a -> f0071a5c8
SINGA-52 Remove Python dependency in bash scripts remove python scripts for running singa from now on, all complex functions for setting up singa are done by tool.cc (singatool binary) minor update: add workspace field in all examples Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f0071a5c Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f0071a5c Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f0071a5c Branch: refs/heads/master Commit: f0071a5c8ece4f44f9ff1c3cef30b17b66363628 Parents: 039de8b Author: wang sheng <[email protected]> Authored: Thu Aug 13 14:18:44 2015 +0800 Committer: wang sheng <[email protected]> Committed: Thu Aug 13 14:58:42 2015 +0800 ---------------------------------------------------------------------- bin/singa-env.sh | 4 +- bin/singa-run.sh | 5 +- examples/mnist/conv.conf | 2 +- examples/mnist/job.conf | 1 + examples/mnist/rbm_job.conf | 1 + src/utils/tool.cc | 99 +++++++++++++++++++++++++++++----------- tool/gen_hosts.py | 49 -------------------- 7 files changed, 78 insertions(+), 83 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f0071a5c/bin/singa-env.sh ---------------------------------------------------------------------- diff --git a/bin/singa-env.sh b/bin/singa-env.sh index c9d42bd..bfefc5e 100755 --- a/bin/singa-env.sh +++ b/bin/singa-env.sh @@ -50,8 +50,8 @@ fi # set SINGA_LOG if [ -z $SINGA_LOG ]; then - # add -global arg, so no need to run under SINGA_HOME - SINGA_LOG=`"$SINGA_HOME"/singatool getlogdir -global="$SINGA_CONF"/singa.conf` + # add -confdir arg, so no need to run under SINGA_HOME + SINGA_LOG=`"$SINGA_HOME"/singatool getlogdir -confdir="$SINGA_CONF"` [ $? == 0 ] || exit 1 fi http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f0071a5c/bin/singa-run.sh ---------------------------------------------------------------------- diff --git a/bin/singa-run.sh b/bin/singa-run.sh index aa65fd9..4c7bf04 100755 --- a/bin/singa-run.sh +++ b/bin/singa-run.sh @@ -78,10 +78,7 @@ echo Record job information to $log_dir # generate host file host_file=$log_dir/job.hosts -python $SINGA_HOME/tool/gen_hosts.py -conf=$job_conf \ - -hosts=$SINGA_CONF/hostfile \ - -output=$host_file \ - || exit 1 +./singatool genhost $job_conf 1>$host_file || exit 1 # set command to run singa singa_run="./singa -conf=$job_conf -job=$job_id" http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f0071a5c/examples/mnist/conv.conf ---------------------------------------------------------------------- diff --git a/examples/mnist/conv.conf b/examples/mnist/conv.conf index 0bd014d..d463cd9 100644 --- a/examples/mnist/conv.conf +++ b/examples/mnist/conv.conf @@ -5,7 +5,7 @@ cluster { nworkers_per_group: 1 nservers_per_procs: 1 nworkers_per_procs: 1 - workspace: "examples/mnist/" + workspace: "examples/mnist" } model { name: "mnist-conv" http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f0071a5c/examples/mnist/job.conf ---------------------------------------------------------------------- diff --git a/examples/mnist/job.conf b/examples/mnist/job.conf index ca54f92..5d1445d 100644 --- a/examples/mnist/job.conf +++ b/examples/mnist/job.conf @@ -1,6 +1,7 @@ cluster { nworker_groups: 1 nserver_groups: 1 + workspace: "examples/mnist" } model { name: "deep-big-simple-mlp" http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f0071a5c/examples/mnist/rbm_job.conf ---------------------------------------------------------------------- diff --git a/examples/mnist/rbm_job.conf b/examples/mnist/rbm_job.conf index e40d02d..87df1b3 100644 --- a/examples/mnist/rbm_job.conf +++ b/examples/mnist/rbm_job.conf @@ -3,6 +3,7 @@ cluster { nserver_groups: 1 nservers_per_group: 1 nworkers_per_group: 1 + workspace: "examples/mnist" } model { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f0071a5c/src/utils/tool.cc ---------------------------------------------------------------------- diff --git a/src/utils/tool.cc b/src/utils/tool.cc index c450b72..85d74be 100644 --- a/src/utils/tool.cc +++ b/src/utils/tool.cc @@ -1,21 +1,32 @@ #include <gflags/gflags.h> #include <glog/logging.h> -#include <iostream> +#include <algorithm> #include <fstream> -#include "utils/cluster_rt.h" +#include <iostream> +#include "proto/job.pb.h" #include "proto/singa.pb.h" +#include "utils/cluster_rt.h" #include "utils/common.h" + #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; #endif // GFLAGS_GFLAGS_H_ -DEFINE_string(global, "conf/singa.conf", "Global config file"); +DEFINE_string(confdir, "conf", "Global config dir"); singa::SingaProto global; const int SUCCESS = 0; const int ARG_ERR = 1; const int RUN_ERR = 2; +// show log dir in global config +int getlogdir() { + std::string dir = global.log_dir(); + while (dir.length() > 1 && dir[dir.length()-1] == '/') dir.pop_back(); + printf("%s\n", dir.c_str()); + return SUCCESS; +} + // generate a unique job id int create() { singa::JobManager mngr(global.zookeeper_host()); @@ -26,6 +37,45 @@ int create() { return SUCCESS; } +// generate a host list +int genhost(char* job_conf) { + // compute required #process from job conf + singa::JobProto job; + singa::ReadProtoFromTextFile(job_conf, &job); + singa::ClusterProto cluster = job.cluster(); + int nworker_procs = cluster.nworker_groups() * cluster.nworkers_per_group() + / cluster.nworkers_per_procs(); + int nserver_procs = cluster.nserver_groups() * cluster.nservers_per_group() + / cluster.nservers_per_procs(); + int nprocs = 0; + if (cluster.server_worker_separate()) + nprocs = nworker_procs + nserver_procs; + else + nprocs = std::max(nworker_procs, nserver_procs); + + // get available host list from global conf + std::fstream hostfile(FLAGS_confdir+"/hostfile"); + if (!hostfile.is_open()) { + LOG(ERROR) << "Cannot open file: " << FLAGS_confdir+"/hostfile"; + return RUN_ERR; + } + std::vector<std::string> hosts; + std::string host; + while (!hostfile.eof()) { + getline(hostfile, host); + if (!host.length() || host[0] == '#') continue; + hosts.push_back(host); + } + if (!hosts.size()) { + LOG(ERROR) << "Empty host file"; + return RUN_ERR; + } + // output selected hosts + for (int i = 0; i < nprocs; ++i) + printf("%s\n", hosts[i % hosts.size()].c_str()); + return SUCCESS; +} + // list singa jobs (running or all) int list(bool all) { singa::JobManager mngr(global.zookeeper_host()); @@ -69,36 +119,32 @@ int cleanup() { return SUCCESS; } -// show log dir in global config -int getlogdir() { - std::string dir = global.log_dir(); - while (dir.length() > 1 && dir[dir.length()-1] == '/') dir.pop_back(); - printf("%s\n", dir.c_str()); - return SUCCESS; -} - int main(int argc, char **argv) { std::string usage = "usage: singatool <command> <args>\n" - " getlogdir : show log dir in global config\n" - " create : generate a unique job id\n" - " list : list running singa jobs\n" - " listall : list all singa jobs\n" - " view JOB_ID : view procs of a singa job\n" - " clean JOB_ID : clean a job path in zookeeper\n" - " cleanup : clean all singa data in zookeeper\n"; + " getlogdir : show log dir in global config\n" + " create : generate a unique job id\n" + " genhost JOB_CONF : generate a host list\n" + " list : list running singa jobs\n" + " listall : list all singa jobs\n" + " view JOB_ID : view procs of a singa job\n" + " clean JOB_ID : clean a job path in zookeeper\n" + " cleanup : clean all singa data in zookeeper\n"; // set logging level to ERROR and log to STDERR FLAGS_logtostderr = 1; FLAGS_minloglevel = 2; google::InitGoogleLogging(argv[0]); gflags::ParseCommandLineFlags(&argc, &argv, true); - singa::ReadProtoFromTextFile(FLAGS_global.c_str(), &global); + singa::ReadProtoFromTextFile((FLAGS_confdir+"/singa.conf").c_str(), &global); // stat code: ARG_ERR for wrong argument, RUN_ERR for runtime error - int stat = SUCCESS; - if (argc <= 1) stat = ARG_ERR; - else { - if (!strcmp(argv[1], "create")) + int stat = (argc <= 1) ? ARG_ERR : SUCCESS; + if (stat == SUCCESS) { + if (!strcmp(argv[1], "getlogdir")) + stat = getlogdir(); + else if (!strcmp(argv[1], "create")) stat = create(); + else if (!strcmp(argv[1], "genhost")) + stat = (argc > 2) ? genhost(argv[2]) : ARG_ERR; else if (!strcmp(argv[1], "list")) stat = list(false); else if (!strcmp(argv[1], "listall")) @@ -109,11 +155,10 @@ int main(int argc, char **argv) { stat = (argc > 2) ? clean(atoi(argv[2])) : ARG_ERR; else if (!strcmp(argv[1], "cleanup")) stat = cleanup(); - else if (!strcmp(argv[1], "getlogdir")) - stat = getlogdir(); - else stat = ARG_ERR; + else + stat = ARG_ERR; } - + if (stat == ARG_ERR) LOG(ERROR) << usage; return stat; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f0071a5c/tool/gen_hosts.py ---------------------------------------------------------------------- diff --git a/tool/gen_hosts.py b/tool/gen_hosts.py deleted file mode 100644 index e38c8bf..0000000 --- a/tool/gen_hosts.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python - -import argparse -import os -import sys -from google.protobuf import text_format -from pb2.job_pb2 import JobProto - -# parse command line -parser = argparse.ArgumentParser(description='Generate host list from host file for a SINGA job') -parser.add_argument('-conf', dest='conf', metavar='CONF_FILE', required=True, help='job.conf file') -parser.add_argument('-hosts', dest='hosts', metavar='HOST_FILE', required=True, help='global host file') -parser.add_argument('-output', dest='output', metavar='OUTPUT_FILE', required=True, help='generated list') -args = parser.parse_args(); - -# read from .conf file -fd_conf = open(args.conf, 'r') -job = JobProto() -text_format.Merge(str(fd_conf.read()), job) -cluster = job.cluster -nworker_procs = cluster.nworker_groups * cluster.nworkers_per_group / cluster.nworkers_per_procs -nserver_procs = cluster.nserver_groups * cluster.nservers_per_group / cluster.nservers_per_procs -nprocs = 0 -if (cluster.server_worker_separate) : - nprocs = nworker_procs+nserver_procs -else: - nprocs = max(nworker_procs, nserver_procs) -fd_conf.close() - -# read from source host file -fd_hosts = open(args.hosts, 'r') -hosts = [] -for line in fd_hosts: - line = line.strip() - if len(line) == 0 or line[0] == '#': - continue - hosts.append(line) -fd_hosts.close() - -# write to output file -num_hosts = len(hosts) -if (num_hosts == 0): - print "Contains no valid host %s" % args.hosts - sys.exit(1) -fd_output = open(args.output, 'w') -for i in range(nprocs): - fd_output.write(hosts[i % num_hosts] + '\n') -fd_output.close() -print 'Generate host list to %s' % args.output
