This is an automated email from the ASF dual-hosted git repository.
zhongxjian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/dubbo-kubernetes.git
The following commit(s) were added to refs/heads/master by this push:
new 29e6a754 [horus] Optimized restart policy implementation (#414)
29e6a754 is described below
commit 29e6a75451545f563a562f0a1135124127dd6088
Author: mfordjody <[email protected]>
AuthorDate: Tue Oct 1 16:38:41 2024 +0800
[horus] Optimized restart policy implementation (#414)
---
app/horus/basic/config/file.go | 14 +++++++-------
app/horus/core/horuser/node_downtime.go | 8 ++++----
app/horus/core/horuser/node_modular.go | 2 +-
app/horus/core/horuser/node_restart.go | 2 +-
4 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/app/horus/basic/config/file.go b/app/horus/basic/config/file.go
index 724e6803..28af1ba5 100644
--- a/app/horus/basic/config/file.go
+++ b/app/horus/basic/config/file.go
@@ -67,13 +67,13 @@ type ModularConfiguration struct {
}
type DowntimeConfiguration struct {
- Enabled bool `yaml:"enabled"`
- IntervalSecond int `yaml:"intervalSecond"`
- PromQueryTimeSecond int64 `yaml:"promQueryTimeSecond"`
- KubeMultiple map[string]string `yaml:"kubeMultiple"`
- AbnormalityQL []string `yaml:"abnormalityQL"`
- NodeNameToIPs string `yaml:"nodeNameToIPs"`
- DingTalk *DingTalkConfiguration `yaml:"dingTalk"`
+ Enabled bool `yaml:"enabled"`
+ IntervalSecond int `yaml:"intervalSecond"`
+ PromQueryTimeSecond int64 `yaml:"promQueryTimeSecond"`
+ KubeMultiple map[string]string `yaml:"kubeMultiple"`
+ AbnormalityQL []string `yaml:"abnormalityQL"`
+ AbnormalInfoSystemQL string
`yaml:"abnormalInfoSystemQL"`
+ DingTalk *DingTalkConfiguration `yaml:"dingTalk"`
}
type AbnormalConfiguration struct {
diff --git a/app/horus/core/horuser/node_downtime.go
b/app/horus/core/horuser/node_downtime.go
index ff10a06c..5451b83b 100644
--- a/app/horus/core/horuser/node_downtime.go
+++ b/app/horus/core/horuser/node_downtime.go
@@ -98,15 +98,15 @@ func (h *Horuser) DownTimeNodes(clusterName, addr string) {
klog.Infof("clusterName:%v nodeName:%v threshold:%v
count:%v", clusterName, node, cq, count)
continue
}
- nnti := fmt.Sprintf(h.cc.NodeDownTime.NodeNameToIPs, node)
- res, err := h.InstantQuery(addr, nnti, clusterName,
h.cc.NodeDownTime.PromQueryTimeSecond)
+ abnormalInfoSystemQL :=
fmt.Sprintf(h.cc.NodeDownTime.AbnormalInfoSystemQL, node)
+ res, err := h.InstantQuery(addr, abnormalInfoSystemQL,
clusterName, h.cc.NodeDownTime.PromQueryTimeSecond)
if len(res) == 0 {
- klog.Errorf("No results returned for query: %s", nnti)
+ klog.Errorf("No results returned for query: %s",
abnormalInfoSystemQL)
continue
}
if err != nil {
klog.Errorf("downtimeNodes InstantQuery NodeName To IPs
empty err:%v", err)
- klog.Infof("clusterName:%v nodeNameToIPs: %v, err:%v",
clusterName, nnti, err)
+ klog.Infof("clusterName:%v AbnormalInfoSystemQL: %v,
err:%v", clusterName, abnormalInfoSystemQL, err)
continue
}
str := ""
diff --git a/app/horus/core/horuser/node_modular.go
b/app/horus/core/horuser/node_modular.go
index c60058d2..c2abf777 100644
--- a/app/horus/core/horuser/node_modular.go
+++ b/app/horus/core/horuser/node_modular.go
@@ -122,7 +122,7 @@ func (h *Horuser) CustomizeModularNodes(clusterName,
moduleName, nodeName, ip st
dailyLimit := h.cc.CustomModular.CordonDailyLimit[moduleName]
if len(data) > dailyLimit {
- msg := fmt.Sprintf("【日期:%v】 【集群:%v\n】 【今日 Cordon 节点数: %v】\n
【已达到今日上限: %v】\n 【节点:%v】",
+ msg := fmt.Sprintf("\n【日期:%v】\n【集群:%v\n】\n【今日 Cordon 节点数:
%v】\n【已达到今日上限: %v】\n【节点:%v】",
data, clusterName, len(data), dailyLimit, nodeName)
alert.DingTalkSend(h.cc.CustomModular.DingTalk, msg)
alert.SlackSend(h.cc.CustomModular.Slack, msg)
diff --git a/app/horus/core/horuser/node_restart.go
b/app/horus/core/horuser/node_restart.go
index 63d828a8..e24ea2f1 100644
--- a/app/horus/core/horuser/node_restart.go
+++ b/app/horus/core/horuser/node_restart.go
@@ -65,7 +65,7 @@ func (h *Horuser) TryRestart(node db.NodeDataInfo) {
klog.Infof("RestartMarker result pass:%v err:%v", pass, err)
if pass {
- msg := fmt.Sprintf("【等待宕机节点腾空后重启】【节点:%v】【日期:%v】【集群:%v】",
node.NodeName, node.FirstDate, node.ClusterName)
+ msg :=
fmt.Sprintf("\n【等待宕机节点腾空后重启】\n【节点:%v】\n【日期:%v】\n【集群:%v】\n", node.NodeName,
node.FirstDate, node.ClusterName)
alert.DingTalkSend(h.cc.NodeDownTime.DingTalk, msg)
// TODO user@password
cmd := exec.Command("/bin/bash", "./restart.sh", node.NodeIP)