This is an automated email from the ASF dual-hosted git repository.

zhongxjian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/dubbo-kubernetes.git


The following commit(s) were added to refs/heads/master by this push:
     new 4c77b070 [horus] Pod Cleanup logic implementation (#388)
4c77b070 is described below

commit 4c77b070e1d244af708fa6897a915744afe5ab83
Author: mfordjody <[email protected]>
AuthorDate: Fri Sep 27 14:11:12 2024 +0800

    [horus] Pod Cleanup logic implementation (#388)
---
 app/horus/core/horuser/pod_abnormal.go | 77 ++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/app/horus/core/horuser/pod_abnormal.go 
b/app/horus/core/horuser/pod_abnormal.go
index adbc4f75..6ad4015e 100644
--- a/app/horus/core/horuser/pod_abnormal.go
+++ b/app/horus/core/horuser/pod_abnormal.go
@@ -17,7 +17,13 @@ package horuser
 
 import (
        "context"
+       "fmt"
+       "github.com/apache/dubbo-kubernetes/app/horus/basic/db"
+       "github.com/apache/dubbo-kubernetes/app/horus/core/alert"
+       "github.com/gammazero/workerpool"
+       corev1 "k8s.io/api/core/v1"
        "k8s.io/apimachinery/pkg/util/wait"
+       "k8s.io/klog/v2"
        "sync"
        "time"
 )
@@ -38,3 +44,74 @@ func (h *Horuser) PodAbnormalClean(ctx context.Context) {
        }
        wg.Wait()
 }
+
+func (h *Horuser) PodsOnCluster(clusterName string) {
+       var podNamespace string
+       pods, err := h.Fetch(clusterName, podNamespace, 
h.cc.PodAbnormal.FieldSelector)
+       if err != nil {
+               klog.Errorf("Failed to fetch pods on cluster:%v", err)
+               klog.Infof("clusterName:%v podNamespace:%v", clusterName, 
podNamespace)
+               return
+       }
+       count := len(pods)
+       if count == 0 {
+               klog.Infof("PodsOnCluster no abnomal clusterName:%v", 
clusterName)
+               return
+       }
+       wp := workerpool.New(10)
+       for index, pod := range pods {
+               if pod.Status.Phase == corev1.PodRunning || pod.Status.Phase == 
corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
+                       continue
+               }
+               msg := 
fmt.Sprintf("【集群:%v】【%d/%d】【Namespace:%v】【PodName:%v】【Phase:%v】【节点名:%v】", 
clusterName, index+1, count, pod.Namespace, pod.Name, pod.Status.Phase, 
pod.Spec.NodeName)
+               klog.Infof(msg)
+
+               wp.Submit(func() {
+                       h.PodSingle(pod, clusterName)
+               })
+       }
+       wp.StopWait()
+}
+
+func (h *Horuser) PodSingle(pod corev1.Pod, clusterName string) {
+       if !pod.DeletionTimestamp.IsZero() {
+               var err error
+               action := ""
+               switch len(pod.Finalizers) {
+               case 0:
+                       if pod.Name != "" {
+                               return
+                       }
+                       err = h.Evict(pod.Name, pod.Namespace, clusterName)
+                       action = "try patch-finalizer"
+               default:
+                       time.Sleep(time.Duration(h.cc.PodAbnormal.DoubleSecond) 
* time.Second)
+                       pass := h.Terminating(clusterName, &pod)
+                       if !pass {
+                               return
+                       }
+                       err = h.Finalizer(clusterName, pod.Name, pod.Namespace)
+                       action = "try patch-finalizer"
+                       res := "Success"
+                       if err != nil {
+                               res = fmt.Sprintf("failed:%v", err)
+                       }
+                       today := time.Now().Format("2006-01-02")
+                       msg := fmt.Sprintf("【集群:%v】【Pod:%v】【Namespace:%v】【无法删除 
pod-patch-finalizer:%v】【处理结果:%v】", clusterName, pod.Name, pod.Namespace, 
action, res)
+                       alert.DingTalkSend(h.cc.PodAbnormal.DingTalk, msg)
+                       write := db.PodDataInfo{
+                               PodName:     pod.Name,
+                               PodIP:       pod.Status.PodIP,
+                               NodeName:    pod.Spec.NodeName,
+                               ClusterName: clusterName,
+                               ModuleName:  "pod_abnormal_clean",
+                               Reason:      action,
+                               FirstDate:   today,
+                       }
+                       _, err = write.AddOrGet()
+                       klog.Errorf("write AddOrGet err:%v", err)
+                       klog.Infof("podName:%v", pod.Name)
+                       return
+               }
+       }
+}

Reply via email to