This is an automated email from the ASF dual-hosted git repository.

zhongxjian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/dubbo-kubernetes.git


The following commit(s) were added to refs/heads/master by this push:
     new 478a1f22 [horus] fixed remove pod logic (#397)
478a1f22 is described below

commit 478a1f22b9fcae5f9bd5c57ba37a39f974eedc6a
Author: mfordjody <[email protected]>
AuthorDate: Sat Sep 28 15:32:36 2024 +0800

    [horus] fixed remove pod logic (#397)
---
 app/horus/core/horuser/pod_abnormal.go | 79 ++++++++++++++++++----------------
 app/horus/core/horuser/pod_remove.go   |  4 +-
 2 files changed, 45 insertions(+), 38 deletions(-)

diff --git a/app/horus/core/horuser/pod_abnormal.go 
b/app/horus/core/horuser/pod_abnormal.go
index 38aefbf1..a3fb6296 100644
--- a/app/horus/core/horuser/pod_abnormal.go
+++ b/app/horus/core/horuser/pod_abnormal.go
@@ -30,6 +30,7 @@ import (
 
 const (
        ModuleName = "pod_abnormal_clean"
+       Reason     = "clean up"
 )
 
 func (h *Horuser) PodAbnormalCleanManager(ctx context.Context) error {
@@ -52,11 +53,10 @@ func (h *Horuser) PodAbnormalClean(ctx context.Context) {
 }
 
 func (h *Horuser) PodsOnCluster(clusterName string) {
-       var podNamespace string
-       pods, err := h.Fetch(clusterName, podNamespace, 
h.cc.PodAbnormal.FieldSelector)
+       pods, err := h.Fetch(clusterName, h.cc.PodAbnormal.FieldSelector)
        if err != nil {
                klog.Errorf("Failed to fetch pods on cluster:%v", err)
-               klog.Infof("clusterName:%v podNamespace:%v", clusterName, 
podNamespace)
+               klog.Infof("clusterName:%v", clusterName)
                return
        }
        count := len(pods)
@@ -66,10 +66,11 @@ func (h *Horuser) PodsOnCluster(clusterName string) {
        }
        wp := workerpool.New(10)
        for index, pod := range pods {
-               if pod.Status.Phase == corev1.PodRunning || pod.Status.Phase == 
corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
+               pod := pod
+               if pod.Status.Phase == corev1.PodRunning {
                        continue
                }
-               msg := 
fmt.Sprintf("\n【集群:%v】\n【存活:%d/%d】\n【PodName:%v】\n【Namespace:%v】\n【Phase:%v】\n【节点:%v】\n",
 clusterName, index+1, count, pod.Name, pod.Namespace, pod.Status.Phase, 
pod.Spec.NodeName)
+               msg := 
fmt.Sprintf("\n【集群:%v】\n【%d/%d】\n【PodName:%v】\n【Namespace:%v】\n【Phase:%v】\n【节点:%v】\n",
 clusterName, index+1, count, pod.Name, pod.Namespace, pod.Status.Phase, 
pod.Spec.NodeName)
                klog.Infof(msg)
 
                wp.Submit(func() {
@@ -80,44 +81,50 @@ func (h *Horuser) PodsOnCluster(clusterName string) {
 }
 
 func (h *Horuser) PodSingle(pod corev1.Pod, clusterName string) {
+       var err error
        if !pod.DeletionTimestamp.IsZero() {
-               var err error
-               action := ""
-               switch len(pod.Finalizers) {
-               case 0:
-                       if pod.Name != "" {
-                               return
-                       }
-                       err = h.Evict(pod.Name, pod.Namespace, clusterName)
-                       action = "try patch-finalizer"
-               default:
+               if len(pod.Finalizers) > 0 {
                        time.Sleep(time.Duration(h.cc.PodAbnormal.DoubleSecond) 
* time.Second)
-                       pass := h.Terminating(clusterName, &pod)
-                       if !pass {
+                       if !h.Terminating(clusterName, &pod) {
+                               klog.Infof("Pod %s is still terminating, 
skipping.", pod.Name)
                                return
                        }
-                       err = h.Finalizer(clusterName, pod.Name, pod.Namespace)
-                       action = "try patch-finalizer"
-                       res := "Success"
+                       err := h.Finalizer(clusterName, pod.Name, pod.Namespace)
                        if err != nil {
-                               res = fmt.Sprintf("failed:%v", err)
-                       }
-                       today := time.Now().Format("2006-01-02")
-                       msg := 
fmt.Sprintf("\n【集群:%v】\n【Pod:%v】\n【Namespace:%v】\n【无法删除的 
finalizer:%v】\n【处理结果:%v】\n", clusterName, pod.Name, pod.Namespace, err, res)
-                       alert.DingTalkSend(h.cc.PodAbnormal.DingTalk, msg)
-                       write := db.PodDataInfo{
-                               PodName:     pod.Name,
-                               PodIP:       pod.Status.PodIP,
-                               NodeName:    pod.Spec.NodeName,
-                               ClusterName: clusterName,
-                               ModuleName:  ModuleName,
-                               Reason:      action,
-                               FirstDate:   today,
+                               klog.Errorf("Failed to patch finalizer for pod 
%s: %v", pod.Name, err)
+                               return
                        }
-                       _, err = write.AddOrGet()
-                       klog.Errorf("write AddOrGet err:%v", err)
-                       klog.Infof("podName:%v", pod.Name)
+                       klog.Infof("Successfully patched finalizer for pod %s", 
pod.Name)
+               }
+               return
+       }
+
+       if len(pod.Finalizers) == 0 && pod.Name != "" {
+               err := h.Evict(pod.Name, pod.Namespace, clusterName)
+               if err != nil {
+                       klog.Errorf("Failed to evict pod %s: %v", pod.Name, err)
                        return
                }
+               klog.Infof("Evicted pod %s successfully", pod.Name)
+       }
+       res := "Success"
+       if err != nil {
+               res = fmt.Sprintf("failed:%v", err)
+       }
+       today := time.Now().Format("2006-01-02")
+       msg := fmt.Sprintf("\n【集群:%v】\n【Pod:%v】\n【Namespace:%v】\n【清除 
finalizer:%v】\n", clusterName, pod.Name, pod.Namespace, res)
+       alert.DingTalkSend(h.cc.PodAbnormal.DingTalk, msg)
+       write := db.PodDataInfo{
+               PodName:     pod.Name,
+               PodIP:       pod.Status.PodIP,
+               NodeName:    pod.Spec.NodeName,
+               ClusterName: clusterName,
+               ModuleName:  ModuleName,
+               Reason:      Reason,
+               FirstDate:   today,
        }
+       _, err = write.AddOrGet()
+       klog.Errorf("write AddOrGet err:%v", err)
+       klog.Infof("podName:%v", pod.Name)
+       return
 }
diff --git a/app/horus/core/horuser/pod_remove.go 
b/app/horus/core/horuser/pod_remove.go
index 2ea2396a..c3eac1b7 100644
--- a/app/horus/core/horuser/pod_remove.go
+++ b/app/horus/core/horuser/pod_remove.go
@@ -67,7 +67,7 @@ func (h *Horuser) Terminating(clusterName string, oldPod 
*corev1.Pod) bool {
        return true
 }
 
-func (h *Horuser) Fetch(clusterName, podNamespace, fieldSelector string) 
([]corev1.Pod, error) {
+func (h *Horuser) Fetch(clusterName, fieldSelector string) ([]corev1.Pod, 
error) {
        kubeClient := h.kubeClientMap[clusterName]
        if kubeClient == nil {
                klog.Errorf("Fetch kubeClient by clusterName empty.")
@@ -77,7 +77,7 @@ func (h *Horuser) Fetch(clusterName, podNamespace, 
fieldSelector string) ([]core
        ctx, cancel := h.GetK8sContext()
        defer cancel()
        list := v1.ListOptions{FieldSelector: fieldSelector}
-       pods, err := kubeClient.CoreV1().Pods(podNamespace).List(ctx, list)
+       pods, err := kubeClient.CoreV1().Pods("default").List(ctx, list)
        if err != nil {
                klog.Errorf("Fetch list pod err:%v", err)
                klog.Infof("clusterName:%v fieldSelector:%v", clusterName, 
fieldSelector)

Reply via email to