SINGA-8 Implement distributed Hogwild

handle zookeeper disconnection.
change zookeeper log level to ERROR.

close #14


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c51f9264
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c51f9264
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c51f9264

Branch: refs/heads/master
Commit: c51f9264bb26f22cc7e49ee85ea2fc30c322cc9f
Parents: 4956d6a
Author: wang sheng <[email protected]>
Authored: Thu Jun 25 21:40:38 2015 +0800
Committer: wang sheng <[email protected]>
Committed: Thu Jun 25 21:40:38 2015 +0800

----------------------------------------------------------------------
 src/utils/cluster_rt.cc | 13 +++++++++----
 src/utils/param.cc      |  9 ++++++---
 2 files changed, 15 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c51f9264/src/utils/cluster_rt.cc
----------------------------------------------------------------------
diff --git a/src/utils/cluster_rt.cc b/src/utils/cluster_rt.cc
index 61911fd..6143567 100644
--- a/src/utils/cluster_rt.cc
+++ b/src/utils/cluster_rt.cc
@@ -28,7 +28,7 @@ ZKClusterRT::~ZKClusterRT() {
 char zk_cxt[] = "ZKClusterRT";
 
 bool ZKClusterRT::Init() {
-  zoo_set_debug_level(ZOO_LOG_LEVEL_WARN);
+  zoo_set_debug_level(ZOO_LOG_LEVEL_ERROR);
   zkhandle_ = zookeeper_init(host_.c_str(), WatcherGlobal, timeout_, 0,
                              static_cast<void *>(zk_cxt), 0);
   if (zkhandle_ == NULL) {
@@ -176,9 +176,14 @@ bool ZKClusterRT::CreateZKNode(const char* path, const 
char* val, int flag,
   for (int i = 0; i < kNumRetry; ++i) {
     ret = zoo_create(zkhandle_, path, val, val == nullptr ? -1 : strlen(val),
                      &ZOO_OPEN_ACL_UNSAFE, flag, buf, kMaxBufLen);
-    if (ret != ZNONODE) break;
-    LOG(WARNING) << "zookeeper parent node of " << path
-                 << " not exist, retry later";
+    if (ret == ZNONODE) {
+      LOG(WARNING) << "zookeeper parent node of " << path
+                  << " not exist, retry later";
+    } else if (ret == ZCONNECTIONLOSS) {
+      LOG(WARNING) << "zookeeper disconnected, retry later";
+    } else {
+      break;
+    }
     sleep(kSleepSec);
   }
   // copy the node name ot output

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c51f9264/src/utils/param.cc
----------------------------------------------------------------------
diff --git a/src/utils/param.cc b/src/utils/param.cc
index 1e05ab9..07ad8ce 100644
--- a/src/utils/param.cc
+++ b/src/utils/param.cc
@@ -168,7 +168,8 @@ Msg* Param::HandlePutMsg(Msg** msg){
 }
 
 Msg* Param::HandleGetMsg(Msg** msg){
-  char copy; sscanf(static_cast<char*>((*msg)->frame_data()), " %d ", &copy);
+  int copy;
+  sscanf(static_cast<char*>((*msg)->frame_data()), " %d ", &copy);
   (*msg)->next_frame();
   if(copy)
     (*msg)->add_frame(mutable_cpu_data(), sizeof(float)*size());
@@ -179,7 +180,8 @@ Msg* Param::HandleGetMsg(Msg** msg){
 }
 
 int Param::ParseUpdateMsg(Msg** msg){
-  char copy; sscanf(static_cast<char*>((*msg)->frame_data()), " %d ", &copy);
+  int copy;
+  sscanf(static_cast<char*>((*msg)->frame_data()), " %d ", &copy);
   (*msg)->next_frame();
   if(copy){
     LOG(ERROR)<<"Copy in parse update";
@@ -231,7 +233,8 @@ int Param::ParseUpdateResponseMsg(Msg **msg, int slice_idx){
 }
 
 void Param::ParseResponseMsg(Msg** msg, int slice_idx){
-  char copy; sscanf(static_cast<char*>((*msg)->frame_data()), " %d ", &copy);
+  int copy;
+  sscanf(static_cast<char*>((*msg)->frame_data()), " %d ", &copy);
   (*msg)->next_frame();
   if(copy){
     CHECK((*msg)->frame_size());

Reply via email to