This is an automated email from the ASF dual-hosted git repository.

wutao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git


The following commit(s) were added to refs/heads/main by this push:
     new 1ce369a  fix(script): operation return before the cluster balance when 
using pegasus_rolling_update.sh  (#576)
1ce369a is described below

commit 1ce369a2bf0dafe6e748de2815be27aa815b9a0a
Author: Shuo <[email protected]>
AuthorDate: Mon Aug 24 21:37:10 2020 +0800

    fix(script): operation return before the cluster balance when using 
pegasus_rolling_update.sh  (#576)
---
 scripts/pegasus_add_node_list.sh     |  72 +++-----------------
 scripts/pegasus_check_arguments.sh   |   6 --
 scripts/pegasus_offline_node_list.sh |  10 ++-
 scripts/pegasus_rebalance_cluster.sh | 128 +++++++++++++++++++++++++++++++++++
 scripts/pegasus_rolling_update.sh    |  43 ++----------
 5 files changed, 149 insertions(+), 110 deletions(-)

diff --git a/scripts/pegasus_add_node_list.sh b/scripts/pegasus_add_node_list.sh
index 1f358d8..a8ebaee 100755
--- a/scripts/pegasus_add_node_list.sh
+++ b/scripts/pegasus_add_node_list.sh
@@ -14,6 +14,12 @@ if [ $# -le 2 ]; then
   exit 1
 fi
 
+echo "UID=$UID"
+echo "PID=$PID"
+echo "Start time: `date`"
+add_node_start_time=$((`date +%s`))
+echo
+
 cluster=$1
 meta_list=$2
 replica_task_id_list=$3
@@ -49,70 +55,10 @@ do
   echo 
"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
 done
 
-echo "Set meta.lb.only_move_primary true"
-echo "This remote-command tells the meta-server to ignore copying primaries 
during rebalancing."
-echo "So the following steps only include move_primary and copy_secondary."
-echo "remote_command -l $pmeta meta.lb.only_move_primary true" | ./run.sh 
shell --cluster $meta_list 
&>/tmp/$UID.$PID.pegasus.add_node_list.only_move_primary
-set_ok=`grep OK /tmp/$UID.$PID.pegasus.add_node_list.only_move_primary | wc -l`
-if [ $set_ok -ne 1 ]; then
-  echo "ERROR: meta.lb.only_move_primary true"
-  exit 1
-fi
-echo
-
-echo "Set meta level to lively..."
-echo "set_meta_level lively" | ./run.sh shell --cluster $meta_list 
&>/tmp/$UID.$PID.pegasus.add_node_list.set_meta_level
-set_ok=`grep 'control meta level ok' 
/tmp/$UID.$PID.pegasus.add_node_list.set_meta_level | wc -l`
-if [ $set_ok -ne 1 ]; then
-  echo "ERROR: set meta level to lively failed"
-  exit 1
-fi
-
-echo "Wait cluster to become balanced..."
-echo "Wait for 3 minutes to do load balance..."
-sleep 180
-while true; do
-    op_count=$(echo "cluster_info" | ./run.sh shell --cluster $meta_list | 
grep balance_operation_count | grep -o 'total=[0-9][0-9]*' | cut -d= -f2)
-    if [ -z "op_count" ]; then
-        break
-    fi
-    if [ $op_count -eq 0 ]; then
-        echo "Cluster may be balanced, try wait 30 seconds..."
-        sleep 30
-        op_count=$(echo "cluster_info" | ./run.sh shell --cluster $meta_list | 
grep balance_operation_count | grep -o 'total=[0-9][0-9]*' | cut -d= -f2)
-        if [ $op_count -eq 0 ]; then
-            echo "Cluster becomes balanced."
-            break
-        fi
-    else
-        echo "Still $op_count balance operations to do..."
-        sleep 1
-    fi
-done
-echo
-
-
-
-echo "Set meta level to steady..."
-echo "set_meta_level steady" | ./run.sh shell --cluster $meta_list 
&>/tmp/$UID.$PID.pegasus.add_node_list.set_meta_level
-set_ok=`grep 'control meta level ok' 
/tmp/$UID.$PID.pegasus.add_node_list.set_meta_level | wc -l`
-if [ $set_ok -ne 1 ]; then
-  echo "ERROR: set meta level to steady failed"
-  exit 1
-fi
-
-echo "Set meta.lb.only_move_primary false"
-echo "This remote-command tells the meta-server to rebalance with copying 
primaries."
-echo "remote_command -l $pmeta meta.lb.only_move_primary false" | ./run.sh 
shell --cluster $meta_list 
&>/tmp/$UID.$PID.pegasus.add_node_list.only_move_primary
-set_ok=`grep OK /tmp/$UID.$PID.pegasus.add_node_list.only_move_primary | wc -l`
-if [ $set_ok -ne 1 ]; then
-  echo "ERROR: meta.lb.only_move_primary false"
-  exit 1
-fi
-echo
+./scripts/pegasus_rebalance_cluster.sh $cluster $meta_list true
 
 echo "Finish time: `date`"
-all_finish_time=$((`date +%s`))
-echo "add node list done, elasped time is $((all_finish_time - 
all_start_time)) seconds."
+add_node_finish_time=$((`date +%s`))
+echo "add node list done, elasped time is $((add_node_finish_time - 
add_node_start_time)) seconds."
 
 rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
diff --git a/scripts/pegasus_check_arguments.sh 
b/scripts/pegasus_check_arguments.sh
index de22f48..5b3f37b 100755
--- a/scripts/pegasus_check_arguments.sh
+++ b/scripts/pegasus_check_arguments.sh
@@ -31,12 +31,6 @@ if [ $? -ne 0 ]; then
   exit 1
 fi
 
-echo "UID=$UID"
-echo "PID=$PID"
-echo "Start time: `date`"
-all_start_time=$((`date +%s`))
-echo
-
 id_list_file="/tmp/$UID.$PID.pegasus.$check_type.id_list"
 echo "Generating $id_list_file..."
 minos_show_replica $cluster $id_list_file
diff --git a/scripts/pegasus_offline_node_list.sh 
b/scripts/pegasus_offline_node_list.sh
index f8883f2..093fe16 100755
--- a/scripts/pegasus_offline_node_list.sh
+++ b/scripts/pegasus_offline_node_list.sh
@@ -14,6 +14,12 @@ if [ $# -le 2 ]; then
   exit 1
 fi
 
+echo "UID=$UID"
+echo "PID=$PID"
+echo "Start time: `date`"
+offline_node_start_time=$((`date +%s`))
+echo
+
 cluster=$1
 meta_list=$2
 replica_task_id_list=$3
@@ -68,8 +74,8 @@ if [ $set_ok -ne 1 ]; then
   exit 1
 fi
 
-all_finish_time=$((`date +%s`))
+offline_finish_time=$((`date +%s`))
 echo "Offline replica server task list done."
-echo "Elapsed time is $((all_finish_time - all_start_time)) seconds."
+echo "Elapsed time is $((offline_finish_time - offline_node_start_time)) 
seconds."
 
 rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
diff --git a/scripts/pegasus_rebalance_cluster.sh 
b/scripts/pegasus_rebalance_cluster.sh
new file mode 100755
index 0000000..99112fe
--- /dev/null
+++ b/scripts/pegasus_rebalance_cluster.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+#
+# Pegasus cluster rebalance 
+#
+
+PID=$$
+
+if [ $# -le 1 ]; then
+  echo "USAGE: $0 <cluster-name> <cluster-meta-list> 
<only-move-primary>(default false)"
+  echo 
+  echo "for example:"
+  echo "  $0 onebox 127.0.0.1:34601,127.0.0.1:34602 true"
+  echo
+  exit 1
+fi
+
+cluster=$1
+meta_list=$2
+
+if [ -z $3 ]; then
+  only_move_primary=false
+else
+  only_move_primary=$3
+fi
+
+pwd="$( cd "$( dirname "$0"  )" && pwd )"
+shell_dir="$( cd $pwd/.. && pwd )"
+cd $shell_dir
+
+source ./scripts/minos_common.sh
+find_cluster $cluster
+if [ $? -ne 0 ]; then
+  echo "ERROR: cluster \"$cluster\" not found"
+  exit 1
+fi
+
+echo "UID=$UID"
+echo "PID=$PID"
+echo "Start time: `date`"
+rebalance_start_time=$((`date +%s`))
+echo
+
+echo "Generating /tmp/$UID.$PID.pegasus.rebalance.cluster_info..."
+echo cluster_info | ./run.sh shell --cluster $meta_list 2>&1 | sed 's/ *$//' 
>/tmp/$UID.$PID.pegasus.rebalance.cluster_info
+cname=`grep zookeeper_root /tmp/$UID.$PID.pegasus.rebalance.cluster_info | 
grep -o '/[^/]*$' | grep -o '[^/]*$'`
+if [ "$cname" != "$cluster" ]; then
+  echo "ERROR: cluster name and meta list not matched"
+  exit 1
+fi
+pmeta=`grep primary_meta_server /tmp/$UID.$PID.pegasus.rebalance.cluster_info 
| grep -o '[0-9.:]*$'`
+if [ "$pmeta" == "" ]; then
+  echo "ERROR: extract primary_meta_server by shell failed"
+  exit 1
+fi
+
+if [ "$only_move_primary" == "true" ]; then
+  echo "Set meta.lb.only_move_primary true"
+  echo "This remote-command tells the meta-server to ignore copying primaries 
during rebalancing."
+  echo "So the following steps only include move_primary and copy_secondary."
+  echo "remote_command -l $pmeta meta.lb.only_move_primary true" | ./run.sh 
shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rebalance.only_move_primary
+  set_ok=`grep OK /tmp/$UID.$PID.pegasus.rebalance.only_move_primary | wc -l`
+  if [ $set_ok -ne 1 ]; then
+    echo "ERROR: meta.lb.only_move_primary true"
+    exit 1
+  fi
+fi
+echo
+
+echo "Set meta level to lively..."
+echo "set_meta_level lively" | ./run.sh shell --cluster $meta_list 
&>/tmp/$UID.$PID.pegasus.rebalance.set_meta_level
+set_ok=`grep 'control meta level ok' 
/tmp/$UID.$PID.pegasus.rebalance.set_meta_level | wc -l`
+if [ $set_ok -ne 1 ]; then
+  echo "ERROR: set meta level to lively failed"
+  exit 1
+fi
+
+echo "Wait cluster to become balanced..."
+echo "Wait for 3 minutes to do load balance..."
+sleep 180
+## Number of check times for balanced state, in case that op_count is 0 but
+## the cluster is in fact unbalanced. Each check waits for 30 secs.
+op_count_check_remain_times=1
+while true; do
+    op_count=$(echo "cluster_info" | ./run.sh shell --cluster $meta_list | 
grep balance_operation_count | grep -o 'total=[0-9][0-9]*' | cut -d= -f2)
+    if [ -z $op_count ]; then
+        break
+    fi
+
+    if [ $op_count -eq 0 ]; then
+        if [ $op_count_check_remain_times -eq 0 ]; then
+          break
+        else
+           echo "Cluster may be balanced, try wait 30 seconds..."
+           ((op_count_check_remain_times--))
+           sleep 30
+        fi
+    else
+        echo "Still $op_count balance operations to do..."
+        sleep 10
+    fi
+done
+echo
+
+echo "Set meta level to steady..."
+echo "set_meta_level steady" | ./run.sh shell --cluster $meta_list 
&>/tmp/$UID.$PID.pegasus.rebalance.set_meta_level
+set_ok=`grep 'control meta level ok' 
/tmp/$UID.$PID.pegasus.rebalance.set_meta_level | wc -l`
+if [ $set_ok -ne 1 ]; then
+  echo "ERROR: set meta level to steady failed"
+  exit 1
+fi
+
+if [ "$only_move_primary" == "true" ]; then
+  echo "Set meta.lb.only_move_primary false"
+  echo "This remote-command tells the meta-server to rebalance with copying 
primaries."
+  echo "remote_command -l $pmeta meta.lb.only_move_primary false" | ./run.sh 
shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.rebalance.only_move_primary
+  set_ok=`grep OK /tmp/$UID.$PID.pegasus.rebalance.only_move_primary | wc -l`
+  if [ $set_ok -ne 1 ]; then
+    echo "ERROR: meta.lb.only_move_primary false"
+    exit 1
+  fi
+  echo
+fi
+
+echo "Finish time: `date`"
+rebalance_finish_time=$((`date +%s`))
+echo "rebalance done, elasped time is $((rebalance_finish_time - 
rebalance_start_time)) seconds."
+
+rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null
diff --git a/scripts/pegasus_rolling_update.sh 
b/scripts/pegasus_rolling_update.sh
index 9523119..a7ba7f5 100755
--- a/scripts/pegasus_rolling_update.sh
+++ b/scripts/pegasus_rolling_update.sh
@@ -46,7 +46,7 @@ fi
 echo "UID=$UID"
 echo "PID=$PID"
 echo "Start time: `date`"
-all_start_time=$((`date +%s`))
+rolling_start_time=$((`date +%s`))
 echo
 
 rs_list_file="/tmp/$UID.$PID.pegasus.rolling_update.rs.list"
@@ -279,46 +279,11 @@ if [ "$type" = "all" ]; then
   echo "Rolling update collectors done."
   echo
 
-  echo "Set meta level to lively..."
-  echo "set_meta_level lively" | ./run.sh shell --cluster $meta_list 
&>/tmp/$UID.$PID.pegasus.rolling_update.set_meta_level
-  set_ok=`grep 'control meta level ok' 
/tmp/$UID.$PID.pegasus.rolling_update.set_meta_level | wc -l`
-  if [ $set_ok -ne 1 ]; then
-    echo "ERROR: set meta level to lively failed"
-    exit 1
-  fi
-  echo
-
-  echo "Wait cluster to become balanced..."
-  echo "Wait for 3 minutes to do load balance..."
-  sleep 180
-  while true
-  do
-    op_count=`echo "cluster_info" | ./run.sh shell --cluster $meta_list | grep 
balance_operation_count | grep -o 'total=[0-9][0-9]*' | cut -d= -f2`
-    if [ -z "op_count" ]; then
-      break
-    fi
-    if [ $op_count -eq 0 ]; then
-      echo "Cluster becomes balanced."
-      break
-    else
-      echo "Still $op_count balance operations to do..."
-      sleep 10
-    fi
-  done
-  echo
-
-  echo "Set meta level to steady..."
-  echo "set_meta_level steady" | ./run.sh shell --cluster $meta_list 
&>/tmp/$UID.$PID.pegasus.rolling_update.set_meta_level
-  set_ok=`grep 'control meta level ok' 
/tmp/$UID.$PID.pegasus.rolling_update.set_meta_level | wc -l`
-  if [ $set_ok -ne 1 ]; then
-    echo "ERROR: set meta level to steady failed"
-    exit 1
-  fi
-  echo
+  ./scripts/pegasus_rebalance_cluster.sh $cluster $meta_list
 fi
 
 echo "Finish time: `date`"
-all_finish_time=$((`date +%s`))
-echo "Rolling update $type done, elasped time is $((all_finish_time - 
all_start_time)) seconds."
+rolling_finish_time=$((`date +%s`))
+echo "Rolling update $type done, elasped time is $((rolling_finish_time - 
rolling_start_time)) seconds."
 
 rm -f /tmp/$UID.$PID.pegasus.* &>/dev/null


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to