Repository: incubator-singa Updated Branches: refs/heads/master 280d5d6be -> 6019905e7
SINGA-15 Fixg a bug from ConnectStub function which gets stuck for connecting layer_dealer_ Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c12dc91f Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c12dc91f Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c12dc91f Branch: refs/heads/master Commit: c12dc91f376349c1b05330e144dc0756aed42429 Parents: 280d5d6 Author: wang wei <[email protected]> Authored: Tue Jun 16 20:56:46 2015 +0800 Committer: wang wei <[email protected]> Committed: Tue Jun 16 20:56:46 2015 +0800 ---------------------------------------------------------------------- include/trainer/worker.h | 2 +- src/trainer/trainer.cc | 3 +-- src/trainer/worker.cc | 8 ++++---- 3 files changed, 6 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c12dc91f/include/trainer/worker.h ---------------------------------------------------------------------- diff --git a/include/trainer/worker.h b/include/trainer/worker.h index 7481ebd..a4054cb 100644 --- a/include/trainer/worker.h +++ b/include/trainer/worker.h @@ -128,7 +128,7 @@ class Worker { */ void ReceiveBlobs(shared_ptr<NeuralNet> net); void SendBlob(); - void ConnectStub(shared_ptr<Dealer> dealer); + void ConnectStub(shared_ptr<Dealer> dealer, EntityType type); protected: int thread_id_, group_id_, worker_id_; int step_; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c12dc91f/src/trainer/trainer.cc ---------------------------------------------------------------------- diff --git a/src/trainer/trainer.cc b/src/trainer/trainer.cc index 3d69249..cd0189c 100644 --- a/src/trainer/trainer.cc +++ b/src/trainer/trainer.cc @@ -227,8 +227,7 @@ void Trainer::Run(int nworkers, int nservers, LOG(ERROR)<<prefix<<" step-" <<step<<", "<<perf.ToString(); perf.Reset(); } - delete msg; - msg=nullptr; + DeleteMsg(&msg); }else if(cluster->nserver_groups()>0){ int group_id=msg->src_first(); int paramid=msg->target_first(); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c12dc91f/src/trainer/worker.cc ---------------------------------------------------------------------- diff --git a/src/trainer/worker.cc b/src/trainer/worker.cc index bb6ff87..52798ad 100644 --- a/src/trainer/worker.cc +++ b/src/trainer/worker.cc @@ -29,10 +29,10 @@ void Worker::Setup(const ModelProto& model, } } -void Worker::ConnectStub(shared_ptr<Dealer> dealer){ +void Worker::ConnectStub(shared_ptr<Dealer> dealer, EntityType type){ dealer->Connect(kInprocRouterEndpoint); Msg* ping=new Msg(); - ping->set_src(group_id_, worker_id_, kWorkerParam); + ping->set_src(group_id_, worker_id_, type); ping->set_dst(-1,-1,kStub); ping->set_type(kConnect); ping->add_frame("PING", 4); @@ -45,12 +45,12 @@ void Worker::ConnectStub(shared_ptr<Dealer> dealer){ void Worker::Run(){ dealer_=make_shared<Dealer>(2*thread_id_); - ConnectStub(dealer_); + ConnectStub(dealer_, kWorkerParam); for(auto layer: train_net_->layers()) if(layer->partitionid()==worker_id_) if(layer->is_bridgedstlayer()||layer->is_bridgesrclayer()){ layer_dealer_=make_shared<Dealer>(2*thread_id_+1); - ConnectStub(layer_dealer_); + ConnectStub(layer_dealer_, kWorkerLayer); break; } step_=modelproto_.step();
