This is an automated email from the ASF dual-hosted git repository.

zhongxjian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/dubbo-kubernetes.git


The following commit(s) were added to refs/heads/master by this push:
     new def734bd [horus] Downtime function test results completed (#442)
def734bd is described below

commit def734bd7f9fd90cfc5f4b00c7a6767473949d8c
Author: mfordjody <[email protected]>
AuthorDate: Wed Oct 9 10:45:32 2024 +0800

    [horus] Downtime function test results completed (#442)
---
 app/horus/base/db/db.go                 |  2 +-
 app/horus/cmd/main.go                   | 12 +++++++-----
 app/horus/core/horuser/node_downtime.go |  1 +
 app/horus/core/horuser/node_drain.go    | 23 ++---------------------
 app/horus/core/horuser/node_restart.go  |  2 +-
 manifests/horus/horus.yaml              |  4 ++--
 6 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/app/horus/base/db/db.go b/app/horus/base/db/db.go
index 112fc656..d552ad0d 100644
--- a/app/horus/base/db/db.go
+++ b/app/horus/base/db/db.go
@@ -149,7 +149,7 @@ func GetRecoveryNodeDataInfoDate(day int) ([]NodeDataInfo, 
error) {
 
 func GetRestartNodeDataInfoDate() ([]NodeDataInfo, error) {
        var ndi []NodeDataInfo
-       session := db.Where("restart = 0 and repair = 0 and module_name = ?", 
"node_down")
+       session := db.Where("restart = 0 and repair = 0 and module_name = ?", 
"nodeDown")
        err := session.Find(&ndi)
        return ndi, err
 }
diff --git a/app/horus/cmd/main.go b/app/horus/cmd/main.go
index cb15d52e..f67786a4 100644
--- a/app/horus/cmd/main.go
+++ b/app/horus/cmd/main.go
@@ -93,11 +93,13 @@ func main() {
                return nil
        })
        group.Add(func() error {
-               klog.Info("horus node recovery manager start success.")
-               err := horus.RecoveryManager(ctx)
-               if err != nil {
-                       klog.Errorf("horus node recovery manager start failed 
err:%v", err)
-                       return err
+               if c.CustomModular.Enabled {
+                       klog.Info("horus node recovery manager start success.")
+                       err := horus.RecoveryManager(ctx)
+                       if err != nil {
+                               klog.Errorf("horus node recovery manager start 
failed err:%v", err)
+                               return err
+                       }
                }
                return nil
        })
diff --git a/app/horus/core/horuser/node_downtime.go 
b/app/horus/core/horuser/node_downtime.go
index 6cb57404..68c96beb 100644
--- a/app/horus/core/horuser/node_downtime.go
+++ b/app/horus/core/horuser/node_downtime.go
@@ -119,6 +119,7 @@ func (h *Horuser) DownTimeNodes(clusterName, addr string) {
        }
 
        msg := fmt.Sprintf("\n【%s】\n【集群:%v】\n【已达到宕机临界点:%v】", 
h.cc.NodeDownTime.DingTalk.Title, clusterName, len(WithDownNodeIPs))
+
        newfound := 0
 
        for nodeName, _ := range WithDownNodeIPs {
diff --git a/app/horus/core/horuser/node_drain.go 
b/app/horus/core/horuser/node_drain.go
index fd01562e..92434535 100644
--- a/app/horus/core/horuser/node_drain.go
+++ b/app/horus/core/horuser/node_drain.go
@@ -16,9 +16,7 @@
 package horuser
 
 import (
-       "context"
        "fmt"
-       corev1 "k8s.io/api/core/v1"
        v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
        "k8s.io/klog/v2"
 )
@@ -54,33 +52,16 @@ func (h *Horuser) Drain(nodeName, clusterName string) (err 
error) {
                                break
                        }
                }
-               klog.Errorf("node Drain evict pod result items:%d count:%v 
nodeName:%v\n clusterName:%v\n podName:%v\n podNamespace:%v\n", items+1, count, 
nodeName, clusterName, pods.Name, pods.Namespace)
                if ds {
                        continue
                }
+               klog.Errorf("node Drain evict pod result items:%d count:%v 
nodeName:%v\n clusterName:%v\n podName:%v\n podNamespace:%v\n", items+1, count, 
nodeName, clusterName, pods.Name, pods.Namespace)
+
                err = h.Evict(pods.Name, pods.Namespace, clusterName)
                if err != nil {
                        klog.Errorf("node Drain evict pod err:%v items:%d 
count:%v nodeName:%v\n clusterName:%v\n podName:%v\n podNamespace:%v\n", err, 
items+1, count, nodeName, clusterName, pods.Name, pods.Namespace)
                        return err
                }
-               err = h.Finalizer(clusterName, pods.Name, pods.Namespace)
-               if err != nil {
-                       klog.Errorf("node Drain finalizer pod err:%v items:%d 
count:%v nodeName:%v\n clusterName:%v\n podName:%v\n podNamespace:%v\n", err, 
items+1, count, nodeName, clusterName, pods.Name, pods.Namespace)
-                       return err
-               }
-
-               var oldPod *corev1.Pod
-               var _ = h.Terminating(clusterName, oldPod)
-               newPod, _ := 
kubeClient.CoreV1().Pods(oldPod.Namespace).Get(context.Background(), 
oldPod.Name, v1.GetOptions{})
-               if newPod == nil {
-                       return err
-               }
-               if newPod.UID != oldPod.UID {
-                       return err
-               }
-               if newPod.DeletionTimestamp.IsZero() {
-                       return err
-               }
        }
        return nil
 }
diff --git a/app/horus/core/horuser/node_restart.go 
b/app/horus/core/horuser/node_restart.go
index d8aedbec..b5b08faa 100644
--- a/app/horus/core/horuser/node_restart.go
+++ b/app/horus/core/horuser/node_restart.go
@@ -69,7 +69,7 @@ func (h *Horuser) TryRestart(node db.NodeDataInfo) {
        klog.Infof("RestartMarker result success:%v", success)
 
        if success {
-               msg := 
fmt.Sprintf("\n【等待宕机节点腾空后重启】\n【节点:%v】\n【日期:%v】\n【集群:%v】\n", node.NodeName, 
node.FirstDate, node.ClusterName)
+               msg := 
fmt.Sprintf("\n【宕机节点等待腾空后重启】\n【节点:%v】\n【日期:%v】\n【集群:%v】\n", node.NodeName, 
node.FirstDate, node.ClusterName)
                alerter.DingTalkSend(h.cc.NodeDownTime.DingTalk, msg)
 
                cmd := exec.Command("/bin/bash", "core/horuser/restart.sh", 
node.NodeIP, h.cc.NodeDownTime.AllSystemUser, 
h.cc.NodeDownTime.AllSystemPassword)
diff --git a/manifests/horus/horus.yaml b/manifests/horus/horus.yaml
index 6a52d9a4..fdac8695 100644
--- a/manifests/horus/horus.yaml
+++ b/manifests/horus/horus.yaml
@@ -25,7 +25,7 @@ kubeMultiple:
   cluster: config.1
 
 promMultiple:
-  cluster: http://192.168.15.134:31974
+  cluster: http://192.168.15.133:31974
 
 nodeRecovery:
   dayNumber: 1
@@ -64,7 +64,7 @@ customModular:
     title: "自定义通知"
 
 nodeDownTime:
-  enabled: true
+  enabled: false
   intervalSecond: 15
   promQueryTimeSecond: 60
   abnormalityQL:

Reply via email to