This is an automated email from the ASF dual-hosted git repository.
zhangjintao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/apisix-ingress-controller.git
The following commit(s) were added to refs/heads/master by this push:
new abfacd6a fix: Keep health checker running when health check failed.
Make healthcheck function pure (#1779)
abfacd6a is described below
commit abfacd6ab7ff8129898ef9a1c5e880b92fd52313
Author: basefas <[email protected]>
AuthorDate: Fri Apr 21 10:35:02 2023 +0800
fix: Keep health checker running when health check failed. Make healthcheck
function pure (#1779)
---
pkg/apisix/cluster.go | 33 +++++++++++++--------------------
pkg/providers/controller.go | 16 ++++++++++------
2 files changed, 23 insertions(+), 26 deletions(-)
diff --git a/pkg/apisix/cluster.go b/pkg/apisix/cluster.go
index 7f551208..a50e9383 100644
--- a/pkg/apisix/cluster.go
+++ b/pkg/apisix/cluster.go
@@ -506,33 +506,22 @@ func (c *cluster) UpstreamServiceRelation()
UpstreamServiceRelation {
// HealthCheck implements Cluster.HealthCheck method.
func (c *cluster) HealthCheck(ctx context.Context) (err error) {
- if c.cacheSyncErr != nil {
- err = c.cacheSyncErr
- return
- }
- if atomic.LoadInt32(&c.cacheState) == _cacheSyncing {
- return
- }
-
// Retry three times in a row, and exit if all of them fail.
backoff := wait.Backoff{
Duration: 5 * time.Second,
Factor: 1,
Steps: 3,
}
- var lastCheckErr error
+
err = wait.ExponentialBackoffWithContext(ctx, backoff, func() (done
bool, _ error) {
- if lastCheckErr = c.healthCheck(ctx); lastCheckErr != nil {
+ if lastCheckErr := c.healthCheck(ctx); lastCheckErr != nil {
log.Warnf("failed to check health for cluster %s: %s,
will retry", c.name, lastCheckErr)
return
}
done = true
return
})
- if err != nil {
- // if ErrWaitTimeout then set lastSyncErr
- c.cacheSyncErr = lastCheckErr
- }
+
return err
}
@@ -543,12 +532,16 @@ func (c *cluster) healthCheck(ctx context.Context) (err
error) {
if err != nil {
return err
}
- if er := conn.Close(); er != nil {
- log.Warnw("failed to close tcp probe connection",
- zap.Error(err),
- zap.String("cluster", c.name),
- )
- }
+ defer func(conn net.Conn) {
+ err := conn.Close()
+ if err != nil {
+ log.Warnw("failed to close tcp probe connection",
+ zap.Error(err),
+ zap.String("cluster", c.name),
+ )
+ }
+ }(conn)
+
return
}
diff --git a/pkg/providers/controller.go b/pkg/providers/controller.go
index f59a934a..93ae1db5 100644
--- a/pkg/providers/controller.go
+++ b/pkg/providers/controller.go
@@ -570,16 +570,20 @@ func (c *Controller) checkClusterHealth(ctx
context.Context, cancelFunc context.
err :=
c.apisix.Cluster(c.cfg.APISIX.DefaultClusterName).HealthCheck(ctx)
if err != nil {
- // Finally failed health check, then give up leader.
- log.Warnf("failed to check health for default cluster:
%s, give up leader", err)
c.apiServer.HealthState.Lock()
c.apiServer.HealthState.Err = err
c.apiServer.HealthState.Unlock()
-
- return
+ // Finally failed health check, then give up leader.
+ log.Warnf("failed to check health for default cluster:
%s, give up leader", err)
+ } else {
+ if c.apiServer.HealthState.Err != nil {
+ c.apiServer.HealthState.Lock()
+ c.apiServer.HealthState.Err = err
+ c.apiServer.HealthState.Unlock()
+ }
+ log.Debugf("success check health for default cluster")
+ c.MetricsCollector.IncrCheckClusterHealth(c.name)
}
- log.Debugf("success check health for default cluster")
- c.MetricsCollector.IncrCheckClusterHealth(c.name)
}
}