This is an automated email from the ASF dual-hosted git repository.
zhongxjian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/dubbo-kubernetes.git
The following commit(s) were added to refs/heads/master by this push:
new d1de34c4 [horus] Modify pod exception name (#434)
d1de34c4 is described below
commit d1de34c48f105ca624747c01ba530f67ee32d62d
Author: mfordjody <[email protected]>
AuthorDate: Sun Oct 6 12:42:30 2024 +0800
[horus] Modify pod exception name (#434)
---
app/horus/base/config/config.go | 24 +++++++++++-----------
app/horus/cmd/main.go | 16 +++++++--------
app/horus/core/horuser/pod_remove.go | 2 +-
.../horuser/{pod_abnormal.go => pod_stagnation.go} | 20 +++++++++---------
manifests/horus/horus.yaml | 7 +++----
5 files changed, 34 insertions(+), 35 deletions(-)
diff --git a/app/horus/base/config/config.go b/app/horus/base/config/config.go
index 9dba5301..34d813a9 100644
--- a/app/horus/base/config/config.go
+++ b/app/horus/base/config/config.go
@@ -16,17 +16,17 @@
package config
type Config struct {
- Address string `yaml:"address"`
- KubeTimeSecond int64 `yaml:"kubeTimeSecond"`
- Mysql *MysqlConfiguration `yaml:"mysql"`
- DingTalk *DingTalkConfiguration `yaml:"dingTalk"`
- Slack *SlackConfiguration `yaml:"slack"`
- KubeMultiple map[string]string `yaml:"kubeMultiple"`
- PromMultiple map[string]string `yaml:"promMultiple"`
- NodeRecovery *RecoveryConfiguration `yaml:"nodeRecovery"`
- CustomModular *ModularConfiguration `yaml:"customModular"`
- NodeDownTime *DowntimeConfiguration `yaml:"nodeDownTime"`
- PodAbnormal *AbnormalConfiguration `yaml:"podAbnormal"`
+ Address string `yaml:"address"`
+ KubeTimeSecond int64 `yaml:"kubeTimeSecond"`
+ Mysql *MysqlConfiguration `yaml:"mysql"`
+ DingTalk *DingTalkConfiguration `yaml:"dingTalk"`
+ Slack *SlackConfiguration `yaml:"slack"`
+ KubeMultiple map[string]string `yaml:"kubeMultiple"`
+ PromMultiple map[string]string `yaml:"promMultiple"`
+ NodeRecovery *RecoveryConfiguration `yaml:"nodeRecovery"`
+ CustomModular *ModularConfiguration `yaml:"customModular"`
+ NodeDownTime *DowntimeConfiguration `yaml:"nodeDownTime"`
+ PodStagnationCleaner *CleanerConfiguration
`yaml:"podStagnationCleaner"`
}
type MysqlConfiguration struct {
@@ -79,7 +79,7 @@ type DowntimeConfiguration struct {
DingTalk *DingTalkConfiguration `yaml:"dingTalk"`
}
-type AbnormalConfiguration struct {
+type CleanerConfiguration struct {
Enabled bool `yaml:"enabled"`
IntervalSecond int `yaml:"intervalSecond"`
DoubleSecond int `yaml:"doubleSecond"`
diff --git a/app/horus/cmd/main.go b/app/horus/cmd/main.go
index effc2ecb..2b579fcc 100644
--- a/app/horus/cmd/main.go
+++ b/app/horus/cmd/main.go
@@ -101,7 +101,7 @@ func main() {
})
group.Add(func() error {
if c.NodeRecovery.Enabled {
- klog.Info("horus recovery manager start success.")
+ klog.Info("horus node recovery manager start success.")
err := horus.RecoveryManager(ctx)
if err != nil {
klog.Errorf("horus recovery manager start
failed error:%v", err)
@@ -111,7 +111,7 @@ func main() {
})
group.Add(func() error {
if c.CustomModular.Enabled {
- klog.Info("horus customize modular manager start
success.")
+ klog.Info("horus node customize modular manager start
success.")
err := horus.CustomizeModularManager(ctx)
if err != nil {
klog.Errorf("horus customize modular manager
start failed error:%v", err)
@@ -121,7 +121,7 @@ func main() {
})
group.Add(func() error {
if c.NodeDownTime.Enabled {
- klog.Info("horus down time manager start success.")
+ klog.Info("horus node downtime manager start success.")
err := horus.DownTimeManager(ctx)
if err != nil {
klog.Errorf("horus down time manager start
failed error:%v", err)
@@ -131,7 +131,7 @@ func main() {
})
group.Add(func() error {
if c.NodeDownTime.Enabled {
- klog.Info("horus down time restart manager start
success.")
+ klog.Info("horus node downtime restart manager start
success.")
err := horus.DowntimeRestartManager(ctx)
if err != nil {
klog.Errorf("horus down time restart manager
start failed error:%v", err)
@@ -141,11 +141,11 @@ func main() {
})
group.Add(func() error {
- if c.PodAbnormal.Enabled {
- klog.Info("horus pod abnormal clean manager start
success.")
- err := horus.PodAbnormalCleanManager(ctx)
+ if c.PodStagnationCleaner.Enabled {
+ klog.Info("horus pod stagnation clean manager start
success.")
+ err := horus.PodStagnationCleanManager(ctx)
if err != nil {
- klog.Errorf("horus pod abnormal clean manager
start failed error:%v", err)
+ klog.Errorf("horus pod stagnation clean manager
start failed error:%v", err)
}
}
return nil
diff --git a/app/horus/core/horuser/pod_remove.go
b/app/horus/core/horuser/pod_remove.go
index c3eac1b7..18448f43 100644
--- a/app/horus/core/horuser/pod_remove.go
+++ b/app/horus/core/horuser/pod_remove.go
@@ -77,7 +77,7 @@ func (h *Horuser) Fetch(clusterName, fieldSelector string)
([]corev1.Pod, error)
ctx, cancel := h.GetK8sContext()
defer cancel()
list := v1.ListOptions{FieldSelector: fieldSelector}
- pods, err := kubeClient.CoreV1().Pods("default").List(ctx, list)
+ pods, err := kubeClient.CoreV1().Pods("").List(ctx, list)
if err != nil {
klog.Errorf("Fetch list pod err:%v", err)
klog.Infof("clusterName:%v fieldSelector:%v", clusterName,
fieldSelector)
diff --git a/app/horus/core/horuser/pod_abnormal.go
b/app/horus/core/horuser/pod_stagnation.go
similarity index 79%
rename from app/horus/core/horuser/pod_abnormal.go
rename to app/horus/core/horuser/pod_stagnation.go
index e925a302..45ce344f 100644
--- a/app/horus/core/horuser/pod_abnormal.go
+++ b/app/horus/core/horuser/pod_stagnation.go
@@ -29,19 +29,19 @@ import (
)
const (
- ModuleName = "pod_abnormal_clean"
- Reason = "clean_up"
+ ModuleName = "podStagnationCleaner"
+ Reason = "StagnationCleanup"
)
-func (h *Horuser) PodAbnormalCleanManager(ctx context.Context) error {
- go wait.UntilWithContext(ctx, h.PodAbnormalClean,
time.Duration(h.cc.PodAbnormal.IntervalSecond)*time.Second)
+func (h *Horuser) PodStagnationCleanManager(ctx context.Context) error {
+ go wait.UntilWithContext(ctx, h.PodStagnationClean,
time.Duration(h.cc.PodStagnationCleaner.IntervalSecond)*time.Second)
<-ctx.Done()
return nil
}
-func (h *Horuser) PodAbnormalClean(ctx context.Context) {
+func (h *Horuser) PodStagnationClean(ctx context.Context) {
var wg sync.WaitGroup
- for cn := range h.cc.PodAbnormal.KubeMultiple {
+ for cn := range h.cc.PodStagnationCleaner.KubeMultiple {
cn := cn
wg.Add(1)
go func() {
@@ -53,7 +53,7 @@ func (h *Horuser) PodAbnormalClean(ctx context.Context) {
}
func (h *Horuser) PodsOnCluster(clusterName string) {
- pods, err := h.Fetch(clusterName, h.cc.PodAbnormal.FieldSelector)
+ pods, err := h.Fetch(clusterName,
h.cc.PodStagnationCleaner.FieldSelector)
if err != nil {
klog.Errorf("Failed to fetch pods on cluster:%v", err)
klog.Infof("clusterName:%v", clusterName)
@@ -70,7 +70,7 @@ func (h *Horuser) PodsOnCluster(clusterName string) {
if pod.Status.Phase == corev1.PodRunning {
continue
}
- msg :=
fmt.Sprintf("\n【集群:%v】\n【%d/%d】\n【PodName:%v】\n【Namespace:%v】\n【Phase:%v】\n【节点:%v】\n",
clusterName, index+1, count, pod.Name, pod.Namespace, pod.Status.Phase,
pod.Spec.NodeName)
+ msg :=
fmt.Sprintf("\n【集群:%v】\n【停滞:%d/%d】\n【PodName:%v】\n【Namespace:%v】\n【Phase:%v】\n【节点:%v】\n",
clusterName, index+1, count, pod.Name, pod.Namespace, pod.Status.Phase,
pod.Spec.NodeName)
klog.Infof(msg)
wp.Submit(func() {
@@ -84,7 +84,7 @@ func (h *Horuser) PodSingle(pod corev1.Pod, clusterName
string) {
var err error
if !pod.DeletionTimestamp.IsZero() {
if len(pod.Finalizers) > 0 {
- time.Sleep(time.Duration(h.cc.PodAbnormal.DoubleSecond)
* time.Second)
+
time.Sleep(time.Duration(h.cc.PodStagnationCleaner.DoubleSecond) * time.Second)
if !h.Terminating(clusterName, &pod) {
klog.Infof("Pod %s is still terminating,
skipping.", pod.Name)
return
@@ -113,7 +113,7 @@ func (h *Horuser) PodSingle(pod corev1.Pod, clusterName
string) {
}
today := time.Now().Format("2006-01-02")
msg := fmt.Sprintf("\n【集群:%v】\n【Pod:%v】\n【Namespace:%v】\n【清除
finalizer:%v】\n", clusterName, pod.Name, pod.Namespace, res)
- alerter.DingTalkSend(h.cc.PodAbnormal.DingTalk, msg)
+ alerter.DingTalkSend(h.cc.PodStagnationCleaner.DingTalk, msg)
write := db.PodDataInfo{
PodName: pod.Name,
PodIP: pod.Status.PodIP,
diff --git a/manifests/horus/horus.yaml b/manifests/horus/horus.yaml
index d9869011..ba1218bf 100644
--- a/manifests/horus/horus.yaml
+++ b/manifests/horus/horus.yaml
@@ -25,7 +25,7 @@ kubeMultiple:
cluster: config.1
promMultiple:
- cluster: http://192.168.15.133:31974
+ cluster: http://192.168.15.134:31974
nodeRecovery:
enabled: false
@@ -87,11 +87,10 @@ nodeDownTime:
webhookUrl:
"https://hooks.slack.com/services/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"
title: "自定义通知"
-podAbnormal:
- enabled: false
+podStagnationCleaner:
+ enabled: true
intervalSecond: 15
doubleSecond: 60
- labelSelector: "app.kubernetes.io/name=horus"
fieldSelector: "status.phase!=Running"
kubeMultiple:
cluster: config.1