The 'watch' command does not return if the etcd server goes down.
We need to poll the etcd server to properly check we still have
connectivity to the etcd server.
---
 src/osaf/consensus/plugins/etcd3.plugin | 50 ++++++++++++++++++++++++++-------
 1 file changed, 40 insertions(+), 10 deletions(-)

diff --git a/src/osaf/consensus/plugins/etcd3.plugin 
b/src/osaf/consensus/plugins/etcd3.plugin
index b3814c9..4998df0 100644
--- a/src/osaf/consensus/plugins/etcd3.plugin
+++ b/src/osaf/consensus/plugins/etcd3.plugin
@@ -17,9 +17,12 @@
 # backward compatible. This plugin may need to be adapted.
 
 readonly keyname="opensaf_consensus_lock"
+readonly takeover_request="takeover_request"
+readonly node_name_file="/etc/opensaf/node_name"
 readonly directory="/opensaf/"
 readonly etcd_options=""
-readonly etcd_timeout="10s"
+readonly etcd_timeout="3s"
+readonly heartbeat_interval=2
 
 export ETCDCTL_API=3
 
@@ -29,7 +32,8 @@ export ETCDCTL_API=3
 #   $1 - <key>
 # returns:
 #   0 - success, <value> is echoed to stdout
-#   non-zero - failure
+#   1 - invalid param
+#   other - failure
 get() {
   readonly key="$1"
 
@@ -51,7 +55,7 @@ get() {
       return 1
     fi
   else
-    return 1
+    return 2
   fi
 }
 
@@ -101,7 +105,8 @@ setkey() {
 # returns:
 #   0 - success
 #   1 - already exists
-#   2 or above - other failure
+#   2 - invalid param
+#   3 or above - other failure
 create_key() {
   readonly key="$1"
   readonly value="$2"
@@ -114,7 +119,7 @@ create_key() {
       lease_id=$(echo $output | awk '{print $2}')
       lease_param="--lease="$lease_id""
     else
-      return 2
+      return 3
     fi
   else
     lease_param=""
@@ -135,7 +140,7 @@ create_key() {
   then
     return 1
   else
-    return 2
+    return 3
   fi
 }
 
@@ -149,6 +154,7 @@ create_key() {
 #   $4 - <timeout>
 # returns:
 #   0 - success
+#   1 - invalid param
 #   non-zero - failure
 setkey_match_prev() {
   readonly key="$1"
@@ -326,10 +332,34 @@ unlock() {
 #   non-zero - failure
 watch() {
   readonly watch_key="$1"
-  etcdctl $etcd_options --dial-timeout $etcd_timeout \
-    watch "$directory$watch_key" | grep -m0 \"\" 2>&1
-  get "$watch_key"
-  return 0
+
+  # get baseline
+  orig_value=$(get "$watch_key")
+  result=$?
+
+  if [ "$result" -le "1" ]; then
+    while true
+    do
+      sleep $heartbeat_interval
+      current_value=$(get "$watch_key")
+      result=$?
+      if [ "$result" -gt "1" ]; then
+        # etcd down?
+        if [ "$watch_key" == "$takeover_request" ]; then
+          hostname=`cat $node_name_file`
+          echo "$hostname SC-0 10000000 UNDEFINED"
+          return 0
+        else
+          return 1
+        fi
+      elif [ "$orig_value" != "$current_value" ]; then
+        echo $current_value
+        return 0
+      fi
+    done
+  fi
+
+  return 1
 }
 
 # argument parsing
-- 
2.7.4



_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to