Repository: incubator-singa
Updated Branches:
  refs/heads/master 72e73cc18 -> 7954a87d2


SINGA-34 Support external zookeeper service

move global configurations from ClusterProto to a new GlobalProto
add a global conf file as conf/singa.conf
change tool/plot to tool/pb2


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/3819e590
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/3819e590
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/3819e590

Branch: refs/heads/master
Commit: 3819e59089bd3f441b88e4ebfc37848f0983c3cc
Parents: 72e73cc
Author: wang sheng <[email protected]>
Authored: Thu Jul 16 03:41:25 2015 +0800
Committer: wang sheng <[email protected]>
Committed: Thu Jul 16 04:47:08 2015 +0800

----------------------------------------------------------------------
 .gitignore                |  3 +--
 Makefile.example          |  4 ++++
 conf/singa.conf           |  5 +++++
 include/trainer/trainer.h |  5 +++--
 include/utils/cluster.h   | 14 +++++---------
 src/main.cc               |  9 ++++++---
 src/proto/cluster.proto   |  4 ++--
 src/proto/global.proto    |  8 ++++++++
 src/trainer/trainer.cc    |  5 +++--
 src/utils/cluster.cc      | 11 +++++++----
 tool/gen_hosts.py         |  9 ++-------
 tool/plot/__init__.py     |  0
 tool/plot/plot.py         |  0
 13 files changed, 46 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index a419725..527972b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,12 +14,11 @@
 *.cproject
 *.log
 *.nfs*
-*_pb2.py
-*.pyc
 *.pb.h
 *.pb.cc
 *.hosts
 *.out
+tool/pb2/*
 src/test/data/*
 tmp
 log*

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/Makefile.example
----------------------------------------------------------------------
diff --git a/Makefile.example b/Makefile.example
index 6d8d83a..f2c58fe 100644
--- a/Makefile.example
+++ b/Makefile.example
@@ -82,11 +82,15 @@ $(PROTO_SRCS): $(PROTOS)
        protoc --proto_path=src/proto --cpp_out=src/proto $(PROTOS)
        mkdir -p include/proto/
        cp src/proto/*.pb.h include/proto/
+       mkdir -p tool/pb2/
+       touch tool/pb2/__init__.py
+       protoc --proto_path=src/proto --python_out=tool/pb2/ $(PROTOS)
        @echo
 
 clean:
        rm -rf *.a *.so
        rm -rf include/proto/*
        rm -rf src/proto/*.pb.h src/proto/*.pb.cc
+       rm -rf tool/pb2/*
        rm -rf $(BUILD_DIR)
        @echo

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/conf/singa.conf
----------------------------------------------------------------------
diff --git a/conf/singa.conf b/conf/singa.conf
new file mode 100644
index 0000000..f6c351b
--- /dev/null
+++ b/conf/singa.conf
@@ -0,0 +1,5 @@
+# point to your active zookeeper service
+zookeeper_host: "localhost:2181"
+
+# set if you want to change log directory
+# log_dir: "/tmp/singa-log/"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/include/trainer/trainer.h
----------------------------------------------------------------------
diff --git a/include/trainer/trainer.h b/include/trainer/trainer.h
index 2419dc4..0ee01d4 100644
--- a/include/trainer/trainer.h
+++ b/include/trainer/trainer.h
@@ -2,6 +2,7 @@
 #define INCLUDE_TRAINER_TRAINER_H_
 #include <unordered_map>
 #include "proto/cluster.pb.h"
+#include "proto/global.pb.h"
 #include "proto/model.pb.h"
 #include "utils/updater.h"
 #include "utils/param.h"
@@ -88,8 +89,8 @@ class Trainer{
    * @param modelproto
    * @param clusterproto
    */
-  void Start(const ModelProto& modelproto, const ClusterProto& clusterproto,
-    const int procs_id);
+  void Start(const ModelProto& modelproto, const GlobalProto& globalproto, 
+             const ClusterProto& clusterproto, const int procs_id);
 
   // TODO add Resume() function to continue training from a previously stopped
   // point.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/include/utils/cluster.h
----------------------------------------------------------------------
diff --git a/include/utils/cluster.h b/include/utils/cluster.h
index e5980ca..68ae937 100644
--- a/include/utils/cluster.h
+++ b/include/utils/cluster.h
@@ -9,6 +9,7 @@
 #include "utils/common.h"
 #include "proto/cluster.pb.h"
 #include "utils/cluster_rt.h"
+#include "proto/global.pb.h"
 
 using std::shared_ptr;
 using std::string;
@@ -24,7 +25,8 @@ namespace singa {
 class Cluster {
  public:
   static shared_ptr<Cluster> Get();
-  static shared_ptr<Cluster> Get(const ClusterProto& cluster, int procs_id=0);
+  static shared_ptr<Cluster> Get(const GlobalProto& global, 
+                                 const ClusterProto& cluster, int procs_id=0);
 
   const int nserver_groups()const{ return cluster_.nserver_groups(); }
   const int nworker_groups()const { return cluster_.nworker_groups(); }
@@ -83,13 +85,6 @@ class Cluster {
   const string vis_folder(){
     return cluster_.workspace()+"/visualization";
   }
-  const string log_folder(){
-    if(cluster_.has_log_dir()){
-      return cluster_.workspace()+"log";
-    }else
-      return "";
-  }
-
   const int stub_timeout() const {
     return cluster_.stub_timeout();
   }
@@ -130,7 +125,7 @@ class Cluster {
   void Register(const string& endpoint);
 
  private:
-  Cluster(const ClusterProto &cluster, int procs_id) ;
+  Cluster(const GlobalProto& global, const ClusterProto &cluster, int 
procs_id) ;
   void SetupFolders(const ClusterProto &cluster);
   int Hash(int gid, int id, int flag);
 
@@ -141,6 +136,7 @@ class Cluster {
   std::vector<std::string> endpoints_;
   // cluster config proto
   ClusterProto cluster_;
+  GlobalProto global_;
   shared_ptr<ClusterRuntime> cluster_rt_;
   // make this class a singlton
   static shared_ptr<Cluster> instance_;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/src/main.cc
----------------------------------------------------------------------
diff --git a/src/main.cc b/src/main.cc
index 4c2bb03..e6b7368 100644
--- a/src/main.cc
+++ b/src/main.cc
@@ -23,6 +23,7 @@
 DEFINE_int32(procsID, -1, "Global process ID");
 DEFINE_string(cluster, "examples/mnist/cluster.conf", "Cluster config file");
 DEFINE_string(model, "examples/mnist/conv.conf", "Model config file");
+DEFINE_string(global, "conf/singa.conf", "Global config file");
 
 /**
  * Register layers, and other customizable classes.
@@ -42,14 +43,16 @@ int main(int argc, char **argv) {
   singa::ReadProtoFromTextFile(FLAGS_cluster.c_str(), &cluster);
   singa::ModelProto model;
   singa::ReadProtoFromTextFile(FLAGS_model.c_str(), &model);
-  if(cluster.has_log_dir())
-    singa::SetupLog(cluster.log_dir(), model.name());
+  singa::GlobalProto global;
+  singa::ReadProtoFromTextFile(FLAGS_global.c_str(), &global);
+  singa::SetupLog(global.log_dir(), model.name());
 
   LOG(INFO) << "The cluster config is\n" << cluster.DebugString();
   LOG(INFO) << "The model config is\n" << model.DebugString();
+  LOG(INFO) << "The global config is\n" << global.DebugString();
 
   RegisterClasses(model);
   singa::Trainer trainer;
-  trainer.Start(model, cluster, FLAGS_procsID);
+  trainer.Start(model, global, cluster, FLAGS_procsID);
   return 0;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/src/proto/cluster.proto
----------------------------------------------------------------------
diff --git a/src/proto/cluster.proto b/src/proto/cluster.proto
index 1480cc1..54ce300 100644
--- a/src/proto/cluster.proto
+++ b/src/proto/cluster.proto
@@ -21,9 +21,9 @@ message ClusterProto {
   // local workspace, train/val/test shards, checkpoint files
   required string workspace = 14;
   // relative path to workspace. if not set, use the default dir of glog
-  optional string log_dir = 15;
+  //optional string log_dir = 15;
   // ip/hostname : port [, ip/hostname : port]
-  optional string zookeeper_host = 16 [default = "localhost:2181"];
+  //optional string zookeeper_host = 16 [default = "localhost:2181"];
   // message size limit, default 1MB
   // optional int32 largest_message = 20 [default = 1048576];
   // optional float bandwidth = 21 [default = 100];  // MB/s

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/src/proto/global.proto
----------------------------------------------------------------------
diff --git a/src/proto/global.proto b/src/proto/global.proto
new file mode 100644
index 0000000..84eb7be
--- /dev/null
+++ b/src/proto/global.proto
@@ -0,0 +1,8 @@
+package singa;
+
+message GlobalProto {
+  // ip/hostname:port[,ip/hostname:port]
+  required string zookeeper_host = 1;
+  // if not set, use the default dir of glog
+  optional string log_dir = 2 [default = "/tmp/singa-log/"];
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/src/trainer/trainer.cc
----------------------------------------------------------------------
diff --git a/src/trainer/trainer.cc b/src/trainer/trainer.cc
index f4e52a6..a6a5dbf 100644
--- a/src/trainer/trainer.cc
+++ b/src/trainer/trainer.cc
@@ -248,13 +248,14 @@ vector<Worker*> Trainer::CreateWorkers(int nthreads,
   return workers;
 }
 
-void Trainer::Start(const ModelProto& mproto, const ClusterProto& cproto,
+void Trainer::Start(const ModelProto& mproto, const GlobalProto& gproto, 
+                    const ClusterProto& cproto,
     int procs_id){
   // procs_id is only used for resume training
   CHECK_EQ(procs_id, -1);
   RegisterDefaultClasses(mproto);
 
-  auto cluster=Cluster::Get(cproto, procs_id);
+  auto cluster=Cluster::Get(gproto, cproto, procs_id);
   router_=make_shared<Router>();
   router_->Bind(kInprocRouterEndpoint);
   if(cluster->nprocs()>1){

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/src/utils/cluster.cc
----------------------------------------------------------------------
diff --git a/src/utils/cluster.cc b/src/utils/cluster.cc
index fdbde69..0c4eefa 100644
--- a/src/utils/cluster.cc
+++ b/src/utils/cluster.cc
@@ -10,9 +10,11 @@
 namespace singa {
 
 std::shared_ptr<Cluster> Cluster::instance_;
-Cluster::Cluster(const ClusterProto &cluster, int procs_id) {
+Cluster::Cluster(const GlobalProto & global, const ClusterProto &cluster,
+                int procs_id) {
   procs_id_=procs_id;
   cluster_ = cluster;
+  global_ = global;
   SetupFolders(cluster);
   if(server_worker_separate())
     nprocs_=nworker_procs()+nserver_procs();
@@ -47,7 +49,7 @@ Cluster::Cluster(const ClusterProto &cluster, int procs_id) {
     }
   }
 
-  auto rt=new ZKClusterRT(cluster_.zookeeper_host());
+  auto rt=new ZKClusterRT(global_.zookeeper_host());
   rt->Init();
   cluster_rt_=shared_ptr<ClusterRuntime>(static_cast<ClusterRuntime*>(rt));
 
@@ -73,8 +75,9 @@ void Cluster::SetupFolders(const ClusterProto &cluster){
   mkdir(vis_folder().c_str(),  S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
 }
 
-shared_ptr<Cluster> Cluster::Get(const ClusterProto& cluster, int procs_id){
-  instance_.reset(new Cluster(cluster, procs_id));
+shared_ptr<Cluster> Cluster::Get(const GlobalProto& global, const 
ClusterProto& cluster,
+                                 int procs_id){
+  instance_.reset(new Cluster(global, cluster, procs_id));
   return instance_;
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/tool/gen_hosts.py
----------------------------------------------------------------------
diff --git a/tool/gen_hosts.py b/tool/gen_hosts.py
old mode 100755
new mode 100644
index e2ed29d..570eff9
--- a/tool/gen_hosts.py
+++ b/tool/gen_hosts.py
@@ -4,7 +4,7 @@ import argparse
 import os
 import sys
 from google.protobuf import text_format
-from plot.cluster_pb2 import ClusterProto
+from pb2.cluster_pb2 import ClusterProto
 
 # parse command line
 parser = argparse.ArgumentParser(description='Generate host list from host 
file for a SINGA job')
@@ -13,11 +13,6 @@ parser.add_argument('-src', dest='src', metavar='SRC_FILE', 
required=True, help=
 parser.add_argument('-dst', dest='dst', metavar='DST_FILE', required=True, 
help='generated list')
 args = parser.parse_args();
 
-# change to SINGA_HOME
-abspath = os.path.abspath(__file__)
-dname = os.path.dirname(abspath)
-os.chdir(dname+'/..')
-
 # read from .conf file
 fd_conf = open(args.conf, 'r')
 cluster = ClusterProto()
@@ -44,7 +39,7 @@ fd_src.close()
 # write to dst file
 num_hosts = len(hosts)
 if (num_hosts == 0):
-  print 'source host file is empty'
+  print 'ERROR: source host file is empty'
   sys.exit()
 fd_dst = open(args.dst, 'w')
 for i in range(nprocs):

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/tool/plot/__init__.py
----------------------------------------------------------------------
diff --git a/tool/plot/__init__.py b/tool/plot/__init__.py
deleted file mode 100644
index e69de29..0000000

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3819e590/tool/plot/plot.py
----------------------------------------------------------------------
diff --git a/tool/plot/plot.py b/tool/plot/plot.py
deleted file mode 100644
index e69de29..0000000

Reply via email to