fix bug from zsock_connect by binding router firstly and then connecting dealers to it.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/af235456 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/af235456 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/af235456 Branch: refs/heads/master Commit: af235456bc348d33502162f2b291557c69662d23 Parents: 011823d Author: wang wei <[email protected]> Authored: Tue May 26 17:05:42 2015 +0800 Committer: wang wei <[email protected]> Committed: Tue May 26 17:05:42 2015 +0800 ---------------------------------------------------------------------- Makefile.example | 2 +- include/trainer/trainer.h | 2 ++ src/trainer/trainer.cc | 14 +++++++------- 3 files changed, 10 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/af235456/Makefile.example ---------------------------------------------------------------------- diff --git a/Makefile.example b/Makefile.example index 80dfc26..9a1b514 100644 --- a/Makefile.example +++ b/Makefile.example @@ -50,7 +50,7 @@ OBJS := $(sort $(SINGA_OBJS) $(TEST_OBJS) ) .PHONY: singa test singa: $(PROTO_OBJS) $(SINGA_OBJS) - $(CXX) $(SINGA_OBJS) src/main.cc -o $(BUILD_DIR)/singa $(CXXFLAGS) $(LDFLAGS) + $(CXX) $(SINGA_OBJS) src/main.cc -o singa $(CXXFLAGS) $(LDFLAGS) @echo loader: proto $(LOADER_OBJS) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/af235456/include/trainer/trainer.h ---------------------------------------------------------------------- diff --git a/include/trainer/trainer.h b/include/trainer/trainer.h index f5d2591..fed011d 100644 --- a/include/trainer/trainer.h +++ b/include/trainer/trainer.h @@ -9,6 +9,7 @@ #include "neuralnet/neuralnet.h" #include "trainer/worker.h" #include "trainer/server.h" +#include "communication/socket.h" namespace singa { /** @@ -130,6 +131,7 @@ class Trainer{ protected: int procs_id_; + shared_ptr<Router> router_; }; } /* singa */ #endif // INCLUDE_TRAINER_TRAINER_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/af235456/src/trainer/trainer.cc ---------------------------------------------------------------------- diff --git a/src/trainer/trainer.cc b/src/trainer/trainer.cc index bc6867d..00202ae 100644 --- a/src/trainer/trainer.cc +++ b/src/trainer/trainer.cc @@ -42,6 +42,11 @@ void Trainer::Start(const ModelProto& mproto, const ClusterProto& cproto, RegisterDefaultClasses(mproto); auto cluster=Cluster::Get(cproto, procs_id); + router_=make_shared<Router>(); + router_->Bind(kInprocRouterEndpoint); + if(cluster->nprocs()>1) + router_->Bind(cluster->endpoint()); + // create servers vector<shared_ptr<Server>> servers; int nthreads=1; // the first socket is the router @@ -160,14 +165,9 @@ void Trainer::Start(const ModelProto& mproto, const ClusterProto& cproto, void Trainer::Run(const std::map<int, shared_ptr<Trainer::ParamShard>>& shards){ auto cluster=Cluster::Get(); procs_id_=cluster->procs_id(); - auto router=make_shared<Router>(); - router->Bind(kInprocRouterEndpoint); - if(cluster->nprocs()>1) - router->Bind(cluster->endpoint()); - map<int, shared_ptr<Dealer>> interprocs_dealers; while(true){ - Msg* msg=router->Receive(); + Msg* msg=router_->Receive(); if(msg==nullptr){ LOG(ERROR)<<"Connection broken!"; exit(0); @@ -218,7 +218,7 @@ void Trainer::Run(const std::map<int, shared_ptr<Trainer::ParamShard>>& shards){ interprocs_dealers[procs_id]->Send(&msg); */ }else{ - router->Send(&msg); + router_->Send(&msg); } } }
