slene commented on a change in pull request #453:
URL:
https://github.com/apache/apisix-ingress-controller/pull/453#discussion_r633088542
##########
File path: pkg/apisix/cluster.go
##########
@@ -290,6 +303,39 @@ func (c *cluster) GlobalRule() GlobalRule {
return c.globalRules
}
+// HealthCheck implements Cluster.HealthCheck method.
+func (c *cluster) HealthCheck(ctx context.Context, backoff wait.Backoff) (err
error) {
+ if c.cacheSyncErr != nil {
+ err = c.cacheSyncErr
+ return
+ }
+ if atomic.LoadInt32(&c.cacheState) == _cacheSyncing {
+ return
+ }
+ var lastCheckErr error
+ err = wait.ExponentialBackoffWithContext(ctx, backoff, func() (done
bool, _ error) {
+ if lastCheckErr = c.healthCheck(ctx); lastCheckErr != nil {
+ log.Warnf("failed to HealthCheck for cluster %s: %s,
will retry", c.name, lastCheckErr)
+ return
+ }
+ done = true
+ return
+ })
+ if err != nil {
+ // if ErrWaitTimeout then set lastSyncErr
+ c.cacheSyncErr = lastCheckErr
+ }
+ return err
+}
+
+func (c *cluster) healthCheck(ctx context.Context) (err error) {
+ // TODO
Review comment:
ok, later.
##########
File path: pkg/ingress/apisix_cluster_config.go
##########
@@ -62,7 +64,6 @@ func (c *apisixClusterConfigController) run(ctx
context.Context) {
go c.runWorker(ctx)
}
<-ctx.Done()
- c.workqueue.ShutDown()
Review comment:
If not change them. Wiil cause goroutine leak in this pr. When re-create
*controller.
##########
File path: pkg/ingress/controller.go
##########
@@ -317,30 +326,42 @@ election:
}
func (c *Controller) run(ctx context.Context) {
- log.Infow("controller now is running as leader",
+ log.Infow("controller is start leading ...",
zap.String("namespace", c.namespace),
zap.String("pod", c.name),
)
+
defer c.leaderContextCancelFunc()
c.metricsCollector.ResetLeader(true)
- err := c.apisix.AddCluster(&apisix.ClusterOptions{
+ clusterOpts := &apisix.ClusterOptions{
Name: c.cfg.APISIX.DefaultClusterName,
AdminKey: c.cfg.APISIX.DefaultClusterAdminKey,
BaseURL: c.cfg.APISIX.DefaultClusterBaseURL,
- })
+ Timeout: c.cfg.APISIX.DefaultClusterClientTimeout.Duration,
+ }
+ err := c.apisix.AddCluster(clusterOpts)
if err != nil && err != apisix.ErrDuplicatedCluster {
- // TODO give up the leader role.
Review comment:
Do heath check, give up leader if failed. Not enough ?
##########
File path: pkg/kube/init.go
##########
@@ -25,16 +25,12 @@ import (
// KubeClient contains some objects used to communicate with Kubernetes API
Server.
type KubeClient struct {
+ cfg *config.Config
+
// Client is the object used to operate Kubernetes builtin resources.
Client kubernetes.Interface
// APISIXClient is the object used to operate resources under
apisix.apache.org group.
APISIXClient clientset.Interface
- // SharedIndexInformerFactory is the index informer factory object used
to watch and
Review comment:
```
// in controller
// re-execute this code will make panic `close of closed channel` when
context cancelled
// unless you re-create Informer
**Informer.Run(ctx.Done())
```
So should re-create **Informer in every controller.run. It need re-create
InformerFactory.
##########
File path: pkg/ingress/controller.go
##########
@@ -317,30 +326,42 @@ election:
}
func (c *Controller) run(ctx context.Context) {
- log.Infow("controller now is running as leader",
+ log.Infow("controller is start leading ...",
zap.String("namespace", c.namespace),
zap.String("pod", c.name),
)
+
defer c.leaderContextCancelFunc()
c.metricsCollector.ResetLeader(true)
- err := c.apisix.AddCluster(&apisix.ClusterOptions{
+ clusterOpts := &apisix.ClusterOptions{
Name: c.cfg.APISIX.DefaultClusterName,
AdminKey: c.cfg.APISIX.DefaultClusterAdminKey,
BaseURL: c.cfg.APISIX.DefaultClusterBaseURL,
- })
+ Timeout: c.cfg.APISIX.DefaultClusterClientTimeout.Duration,
+ }
+ err := c.apisix.AddCluster(clusterOpts)
if err != nil && err != apisix.ErrDuplicatedCluster {
- // TODO give up the leader role.
Review comment:
Do heath check, give up leader if failed. Not enough ?
##########
File path: pkg/ingress/controller.go
##########
@@ -317,30 +326,42 @@ election:
}
func (c *Controller) run(ctx context.Context) {
- log.Infow("controller now is running as leader",
+ log.Infow("controller is start leading ...",
zap.String("namespace", c.namespace),
zap.String("pod", c.name),
)
+
defer c.leaderContextCancelFunc()
c.metricsCollector.ResetLeader(true)
- err := c.apisix.AddCluster(&apisix.ClusterOptions{
+ clusterOpts := &apisix.ClusterOptions{
Name: c.cfg.APISIX.DefaultClusterName,
AdminKey: c.cfg.APISIX.DefaultClusterAdminKey,
BaseURL: c.cfg.APISIX.DefaultClusterBaseURL,
- })
+ Timeout: c.cfg.APISIX.DefaultClusterClientTimeout.Duration,
+ }
+ err := c.apisix.AddCluster(clusterOpts)
if err != nil && err != apisix.ErrDuplicatedCluster {
- // TODO give up the leader role.
Review comment:
ok
##########
File path: pkg/ingress/controller.go
##########
@@ -423,3 +455,21 @@ func (c *Controller) syncSSL(ctx context.Context, ssl
*apisixv1.Ssl, event types
}
return err
}
+
+func (c *Controller) checkClusterHealth(ctx context.Context, cancelFunc
context.CancelFunc) {
+ for {
+ select {
+ case <-ctx.Done():
+ case <-time.After(5 * time.Second):
+ }
+
+ err :=
c.apisix.Cluster(c.cfg.APISIX.DefaultClusterName).HealthCheck(ctx)
+ if err != nil {
+ // Finally failed health check, then give up leader.
+ log.Warnf("failed to check health for default cluster:
%s, give up leader", err)
+ cancelFunc()
+ return
Review comment:
No, the current logic is to just discard it if it is unhealthy.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]