This is an automated email from the ASF dual-hosted git repository.
zhongxjian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/dubbo-kubernetes.git
The following commit(s) were added to refs/heads/master by this push:
new 478a1f22 [horus] fixed remove pod logic (#397)
478a1f22 is described below
commit 478a1f22b9fcae5f9bd5c57ba37a39f974eedc6a
Author: mfordjody <[email protected]>
AuthorDate: Sat Sep 28 15:32:36 2024 +0800
[horus] fixed remove pod logic (#397)
---
app/horus/core/horuser/pod_abnormal.go | 79 ++++++++++++++++++----------------
app/horus/core/horuser/pod_remove.go | 4 +-
2 files changed, 45 insertions(+), 38 deletions(-)
diff --git a/app/horus/core/horuser/pod_abnormal.go
b/app/horus/core/horuser/pod_abnormal.go
index 38aefbf1..a3fb6296 100644
--- a/app/horus/core/horuser/pod_abnormal.go
+++ b/app/horus/core/horuser/pod_abnormal.go
@@ -30,6 +30,7 @@ import (
const (
ModuleName = "pod_abnormal_clean"
+ Reason = "clean up"
)
func (h *Horuser) PodAbnormalCleanManager(ctx context.Context) error {
@@ -52,11 +53,10 @@ func (h *Horuser) PodAbnormalClean(ctx context.Context) {
}
func (h *Horuser) PodsOnCluster(clusterName string) {
- var podNamespace string
- pods, err := h.Fetch(clusterName, podNamespace,
h.cc.PodAbnormal.FieldSelector)
+ pods, err := h.Fetch(clusterName, h.cc.PodAbnormal.FieldSelector)
if err != nil {
klog.Errorf("Failed to fetch pods on cluster:%v", err)
- klog.Infof("clusterName:%v podNamespace:%v", clusterName,
podNamespace)
+ klog.Infof("clusterName:%v", clusterName)
return
}
count := len(pods)
@@ -66,10 +66,11 @@ func (h *Horuser) PodsOnCluster(clusterName string) {
}
wp := workerpool.New(10)
for index, pod := range pods {
- if pod.Status.Phase == corev1.PodRunning || pod.Status.Phase ==
corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
+ pod := pod
+ if pod.Status.Phase == corev1.PodRunning {
continue
}
- msg :=
fmt.Sprintf("\n【集群:%v】\n【存活:%d/%d】\n【PodName:%v】\n【Namespace:%v】\n【Phase:%v】\n【节点:%v】\n",
clusterName, index+1, count, pod.Name, pod.Namespace, pod.Status.Phase,
pod.Spec.NodeName)
+ msg :=
fmt.Sprintf("\n【集群:%v】\n【%d/%d】\n【PodName:%v】\n【Namespace:%v】\n【Phase:%v】\n【节点:%v】\n",
clusterName, index+1, count, pod.Name, pod.Namespace, pod.Status.Phase,
pod.Spec.NodeName)
klog.Infof(msg)
wp.Submit(func() {
@@ -80,44 +81,50 @@ func (h *Horuser) PodsOnCluster(clusterName string) {
}
func (h *Horuser) PodSingle(pod corev1.Pod, clusterName string) {
+ var err error
if !pod.DeletionTimestamp.IsZero() {
- var err error
- action := ""
- switch len(pod.Finalizers) {
- case 0:
- if pod.Name != "" {
- return
- }
- err = h.Evict(pod.Name, pod.Namespace, clusterName)
- action = "try patch-finalizer"
- default:
+ if len(pod.Finalizers) > 0 {
time.Sleep(time.Duration(h.cc.PodAbnormal.DoubleSecond)
* time.Second)
- pass := h.Terminating(clusterName, &pod)
- if !pass {
+ if !h.Terminating(clusterName, &pod) {
+ klog.Infof("Pod %s is still terminating,
skipping.", pod.Name)
return
}
- err = h.Finalizer(clusterName, pod.Name, pod.Namespace)
- action = "try patch-finalizer"
- res := "Success"
+ err := h.Finalizer(clusterName, pod.Name, pod.Namespace)
if err != nil {
- res = fmt.Sprintf("failed:%v", err)
- }
- today := time.Now().Format("2006-01-02")
- msg :=
fmt.Sprintf("\n【集群:%v】\n【Pod:%v】\n【Namespace:%v】\n【无法删除的
finalizer:%v】\n【处理结果:%v】\n", clusterName, pod.Name, pod.Namespace, err, res)
- alert.DingTalkSend(h.cc.PodAbnormal.DingTalk, msg)
- write := db.PodDataInfo{
- PodName: pod.Name,
- PodIP: pod.Status.PodIP,
- NodeName: pod.Spec.NodeName,
- ClusterName: clusterName,
- ModuleName: ModuleName,
- Reason: action,
- FirstDate: today,
+ klog.Errorf("Failed to patch finalizer for pod
%s: %v", pod.Name, err)
+ return
}
- _, err = write.AddOrGet()
- klog.Errorf("write AddOrGet err:%v", err)
- klog.Infof("podName:%v", pod.Name)
+ klog.Infof("Successfully patched finalizer for pod %s",
pod.Name)
+ }
+ return
+ }
+
+ if len(pod.Finalizers) == 0 && pod.Name != "" {
+ err := h.Evict(pod.Name, pod.Namespace, clusterName)
+ if err != nil {
+ klog.Errorf("Failed to evict pod %s: %v", pod.Name, err)
return
}
+ klog.Infof("Evicted pod %s successfully", pod.Name)
+ }
+ res := "Success"
+ if err != nil {
+ res = fmt.Sprintf("failed:%v", err)
+ }
+ today := time.Now().Format("2006-01-02")
+ msg := fmt.Sprintf("\n【集群:%v】\n【Pod:%v】\n【Namespace:%v】\n【清除
finalizer:%v】\n", clusterName, pod.Name, pod.Namespace, res)
+ alert.DingTalkSend(h.cc.PodAbnormal.DingTalk, msg)
+ write := db.PodDataInfo{
+ PodName: pod.Name,
+ PodIP: pod.Status.PodIP,
+ NodeName: pod.Spec.NodeName,
+ ClusterName: clusterName,
+ ModuleName: ModuleName,
+ Reason: Reason,
+ FirstDate: today,
}
+ _, err = write.AddOrGet()
+ klog.Errorf("write AddOrGet err:%v", err)
+ klog.Infof("podName:%v", pod.Name)
+ return
}
diff --git a/app/horus/core/horuser/pod_remove.go
b/app/horus/core/horuser/pod_remove.go
index 2ea2396a..c3eac1b7 100644
--- a/app/horus/core/horuser/pod_remove.go
+++ b/app/horus/core/horuser/pod_remove.go
@@ -67,7 +67,7 @@ func (h *Horuser) Terminating(clusterName string, oldPod
*corev1.Pod) bool {
return true
}
-func (h *Horuser) Fetch(clusterName, podNamespace, fieldSelector string)
([]corev1.Pod, error) {
+func (h *Horuser) Fetch(clusterName, fieldSelector string) ([]corev1.Pod,
error) {
kubeClient := h.kubeClientMap[clusterName]
if kubeClient == nil {
klog.Errorf("Fetch kubeClient by clusterName empty.")
@@ -77,7 +77,7 @@ func (h *Horuser) Fetch(clusterName, podNamespace,
fieldSelector string) ([]core
ctx, cancel := h.GetK8sContext()
defer cancel()
list := v1.ListOptions{FieldSelector: fieldSelector}
- pods, err := kubeClient.CoreV1().Pods(podNamespace).List(ctx, list)
+ pods, err := kubeClient.CoreV1().Pods("default").List(ctx, list)
if err != nil {
klog.Errorf("Fetch list pod err:%v", err)
klog.Infof("clusterName:%v fieldSelector:%v", clusterName,
fieldSelector)