[GitHub] [apisix-ingress-controller] slene commented on a change in pull request #453: fix: panic of start leading. sync ingress failed when apisix not start.

GitBox Mon, 17 May 2021 11:08:20 -0700


slene commented on a change in pull request #453:
URL: 
https://github.com/apache/apisix-ingress-controller/pull/453#discussion_r633088542




##########
File path: pkg/apisix/cluster.go
##########
@@ -290,6 +303,39 @@ func (c *cluster) GlobalRule() GlobalRule {
        return c.globalRules
 }
 
+// HealthCheck implements Cluster.HealthCheck method.
+func (c *cluster) HealthCheck(ctx context.Context, backoff wait.Backoff) (err 
error) {
+       if c.cacheSyncErr != nil {
+               err = c.cacheSyncErr
+               return
+       }
+       if atomic.LoadInt32(&c.cacheState) == _cacheSyncing {
+               return
+       }
+       var lastCheckErr error
+       err = wait.ExponentialBackoffWithContext(ctx, backoff, func() (done 
bool, _ error) {
+               if lastCheckErr = c.healthCheck(ctx); lastCheckErr != nil {
+                       log.Warnf("failed to HealthCheck for cluster %s: %s, 
will retry", c.name, lastCheckErr)
+                       return
+               }
+               done = true
+               return
+       })
+       if err != nil {
+               // if ErrWaitTimeout then set lastSyncErr
+               c.cacheSyncErr = lastCheckErr
+       }
+       return err
+}
+
+func (c *cluster) healthCheck(ctx context.Context) (err error) {
+       // TODO

Review comment:
       ok, later.

##########
File path: pkg/ingress/apisix_cluster_config.go
##########
@@ -62,7 +64,6 @@ func (c *apisixClusterConfigController) run(ctx 
context.Context) {
                go c.runWorker(ctx)
        }
        <-ctx.Done()
-       c.workqueue.ShutDown()

Review comment:
       If not change them. Wiil cause goroutine leak in this pr. When re-create 
*controller.

##########
File path: pkg/ingress/controller.go
##########
@@ -317,30 +326,42 @@ election:
 }
 
 func (c *Controller) run(ctx context.Context) {
-       log.Infow("controller now is running as leader",
+       log.Infow("controller is start leading ...",
                zap.String("namespace", c.namespace),
                zap.String("pod", c.name),
        )
+
        defer c.leaderContextCancelFunc()
        c.metricsCollector.ResetLeader(true)
 
-       err := c.apisix.AddCluster(&apisix.ClusterOptions{
+       clusterOpts := &apisix.ClusterOptions{
                Name:     c.cfg.APISIX.DefaultClusterName,
                AdminKey: c.cfg.APISIX.DefaultClusterAdminKey,
                BaseURL:  c.cfg.APISIX.DefaultClusterBaseURL,
-       })
+               Timeout:  c.cfg.APISIX.DefaultClusterClientTimeout.Duration,
+       }
+       err := c.apisix.AddCluster(clusterOpts)
        if err != nil && err != apisix.ErrDuplicatedCluster {
-               // TODO give up the leader role.

Review comment:
       Do heath check, give up leader if failed. Not enough ?

##########
File path: pkg/kube/init.go
##########
@@ -25,16 +25,12 @@ import (
 
 // KubeClient contains some objects used to communicate with Kubernetes API 
Server.
 type KubeClient struct {
+       cfg *config.Config
+
        // Client is the object used to operate Kubernetes builtin resources.
        Client kubernetes.Interface
        // APISIXClient is the object used to operate resources under 
apisix.apache.org group.
        APISIXClient clientset.Interface
-       // SharedIndexInformerFactory is the index informer factory object used 
to watch and

Review comment:
       ```
   // in controller
   // re-execute this code will make panic `close of closed channel` when 
context cancelled
   // unless you re-create Informer
   **Informer.Run(ctx.Done())
   ```
   
   So should re-create **Informer in every controller.run. It need re-create 
InformerFactory.

##########
File path: pkg/ingress/controller.go
##########
@@ -317,30 +326,42 @@ election:
 }
 
 func (c *Controller) run(ctx context.Context) {
-       log.Infow("controller now is running as leader",
+       log.Infow("controller is start leading ...",
                zap.String("namespace", c.namespace),
                zap.String("pod", c.name),
        )
+
        defer c.leaderContextCancelFunc()
        c.metricsCollector.ResetLeader(true)
 
-       err := c.apisix.AddCluster(&apisix.ClusterOptions{
+       clusterOpts := &apisix.ClusterOptions{
                Name:     c.cfg.APISIX.DefaultClusterName,
                AdminKey: c.cfg.APISIX.DefaultClusterAdminKey,
                BaseURL:  c.cfg.APISIX.DefaultClusterBaseURL,
-       })
+               Timeout:  c.cfg.APISIX.DefaultClusterClientTimeout.Duration,
+       }
+       err := c.apisix.AddCluster(clusterOpts)
        if err != nil && err != apisix.ErrDuplicatedCluster {
-               // TODO give up the leader role.

Review comment:
       Do heath check, give up leader if failed. Not enough ?

##########
File path: pkg/ingress/controller.go
##########
@@ -317,30 +326,42 @@ election:
 }
 
 func (c *Controller) run(ctx context.Context) {
-       log.Infow("controller now is running as leader",
+       log.Infow("controller is start leading ...",
                zap.String("namespace", c.namespace),
                zap.String("pod", c.name),
        )
+
        defer c.leaderContextCancelFunc()
        c.metricsCollector.ResetLeader(true)
 
-       err := c.apisix.AddCluster(&apisix.ClusterOptions{
+       clusterOpts := &apisix.ClusterOptions{
                Name:     c.cfg.APISIX.DefaultClusterName,
                AdminKey: c.cfg.APISIX.DefaultClusterAdminKey,
                BaseURL:  c.cfg.APISIX.DefaultClusterBaseURL,
-       })
+               Timeout:  c.cfg.APISIX.DefaultClusterClientTimeout.Duration,
+       }
+       err := c.apisix.AddCluster(clusterOpts)
        if err != nil && err != apisix.ErrDuplicatedCluster {
-               // TODO give up the leader role.

Review comment:
       ok

##########
File path: pkg/ingress/controller.go
##########
@@ -423,3 +455,21 @@ func (c *Controller) syncSSL(ctx context.Context, ssl 
*apisixv1.Ssl, event types
        }
        return err
 }
+
+func (c *Controller) checkClusterHealth(ctx context.Context, cancelFunc 
context.CancelFunc) {
+       for {
+               select {
+               case <-ctx.Done():
+               case <-time.After(5 * time.Second):
+               }
+
+               err := 
c.apisix.Cluster(c.cfg.APISIX.DefaultClusterName).HealthCheck(ctx)
+               if err != nil {
+                       // Finally failed health check, then give up leader.
+                       log.Warnf("failed to check health for default cluster: 
%s, give up leader", err)
+                       cancelFunc()
+                       return

Review comment:
       No, the current logic is to just discard it if it is unhealthy.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [apisix-ingress-controller] slene commented on a change in pull request #453: fix: panic of start leading. sync ingress failed when apisix not start.

Reply via email to