This is an automated email from the ASF dual-hosted git repository.

zhouky pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-celeborn.git


The following commit(s) were added to refs/heads/main by this push:
     new 07c1dc256 [CELEBORN-975] Refactor the check logic to stop the celeborn 
master and worker
07c1dc256 is described below

commit 07c1dc2568e8a1dc57448919ed01bfa6eb11b630
Author: sychen <[email protected]>
AuthorDate: Mon Sep 18 16:23:32 2023 +0800

    [CELEBORN-975] Refactor the check logic to stop the celeborn master and 
worker
    
    ### What changes were proposed in this pull request?
    
    `stop-master.sh` and `stop-worker.sh` support the stop command to wait up 
to 600s after starting `kill -15`.
    
    Delete the pid file only when the stop succeeds, to avoid failing to retry 
the stop command to find the pid file.
    
    ### Why are the changes needed?
    
    ### Does this PR introduce _any_ user-facing change?
    
    ### How was this patch tested?
    
    Closes #1911 from cxzl25/CELEBORN-975.
    
    Authored-by: sychen <[email protected]>
    Signed-off-by: zky.zhoukeyong <[email protected]>
---
 sbin/celeborn-daemon.sh | 72 ++++++++++++++++++++++++-------------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/sbin/celeborn-daemon.sh b/sbin/celeborn-daemon.sh
index 379562108..b4c2cc4c6 100755
--- a/sbin/celeborn-daemon.sh
+++ b/sbin/celeborn-daemon.sh
@@ -165,74 +165,74 @@ run_command() {
 
 }
 
-case $option in
-
-  (start)
+start_celeborn() {
     run_command class "$@"
-    ;;
-
-  (stop)
-
-    if [ -f $pid ]; then
-      TARGET_ID="$(cat "$pid")"
-      if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]] || [[ $(ps -p 
"$TARGET_ID" -o comm=) =~ "jboot" ]]; then
-        echo "stopping $command"
-        kill "$TARGET_ID" && rm -f "$pid"
-      else
-        echo "no $command to stop"
-      fi
-    else
-      echo "no $command to stop"
-    fi
-    ;;
-
-  (restart)
+}
 
+stop_celeborn() {
     if [ -f $pid ]; then
       TARGET_ID="$(cat "$pid")"
       if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]] || [[ $(ps -p 
"$TARGET_ID" -o comm=) =~ "jboot" ]]; then
         echo "stopping $command"
-        kill "$TARGET_ID" && rm -f "$pid"
+        kill "$TARGET_ID"
         wait_time=0
         # keep same with `celeborn.worker.graceful.shutdown.timeout`
         wait_timeout=600
         while [[ $(ps -p "$TARGET_ID" -o comm=) != "" && $wait_time -lt 
$wait_timeout ]];
         do
-          sleep 1s
+          sleep 1
           ((wait_time++))
           echo "waiting for server shutdown, wait for ${wait_time}s"
         done
+
         if [[ $(ps -p "$TARGET_ID" -o comm=) == "" ]]; then
-          run_command class "$@"
+          rm -f "$pid"
         else
-          echo "stopping $command failed."
+          echo "Failed to stop server(pid=$TARGET_ID) after ${wait_timeout}s"
+          exit 1
         fi
       else
-        rm -f "$pid"
-        echo "no $command to stop, directly start"
-        run_command class "$@"
+        echo "no $command to stop"
       fi
     else
-      echo "no $command to stop, directly start"
-      run_command class "$@"
+      echo "no $command to stop"
     fi
-    ;;
-
-  (status)
+}
 
+check_celeborn(){
     if [ -f $pid ]; then
       TARGET_ID="$(cat "$pid")"
       if [[ $(ps -p "$TARGET_ID" -o comm=) =~ "java" ]] || [[ $(ps -p 
"$TARGET_ID" -o comm=) =~ "jboot" ]]; then
-        echo $command is running.
+        echo "$command is running."
         exit 0
       else
-        echo $pid file is present but $command not running
+        echo "$pid file is present but $command not running"
         exit 1
       fi
     else
-      echo $command not running.
+      echo "$command not running."
       exit 2
     fi
+}
+
+case $option in
+
+  (start)
+    start_celeborn "$@"
+    ;;
+
+  (stop)
+    stop_celeborn
+    ;;
+
+  (restart)
+    echo "Restarting Celeborn"
+    stop_celeborn
+    start_celeborn "$@"
+    ;;
+
+  (status)
+    check_celeborn
     ;;
 
   (*)

Reply via email to