The following pull request was submitted through Github.
It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/4474

This e-mail was sent by the LXC bot, direct replies will not reach the author
unless they happen to be subscribed to this list.

=== Description (from pull-request) ===
I think this does not change any observable behavior from users, but it might help us
debug problems in case waitready hangs. YMMV

The second commit increases a bit logging in the daemon startup phase, hopefully without making it too noisy, also for trouble-shooting purposes.
From 17e643e8ba430c9270a096a00d6e345bfe5b7b14 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <[email protected]>
Date: Thu, 19 Apr 2018 08:33:27 +0000
Subject: [PATCH 1/2] Make the interaction betwean lxd daemon and waitready
 non-blocking

I think this does not change observable behavior from users, but might help us
debug problems in case waitready hangs. YMMV

Signed-off-by: Free Ekanayaka <[email protected]>
---
 lxd/api_internal.go   |  6 +++++-
 lxd/daemon.go         |  7 ++++++-
 lxd/main_waitready.go | 20 +++++++++++++++++++-
 lxd/response.go       |  1 +
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/lxd/api_internal.go b/lxd/api_internal.go
index 6f0443240..7a8542e4f 100644
--- a/lxd/api_internal.go
+++ b/lxd/api_internal.go
@@ -36,7 +36,11 @@ var apiInternal = []Command{
 }
 
 func internalWaitReady(d *Daemon, r *http.Request) Response {
-       <-d.readyChan
+       select {
+       case <-d.readyChan:
+       default:
+               return Unavailable
+       }
 
        return EmptySyncResponse
 }
diff --git a/lxd/daemon.go b/lxd/daemon.go
index 2b76ab2ec..5e6a6a757 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -237,7 +237,12 @@ func (d *Daemon) createCmd(restAPI *mux.Router, version 
string, c Command) {
 
                // Block public API requests until we're done with basic
                // initialization tasks, such setting up the cluster database.
-               <-d.setupChan
+               select {
+               case <-d.setupChan:
+               default:
+                       Unavailable.Render(w)
+                       return
+               }
 
                untrustedOk := (r.Method == "GET" && c.untrustedGet) || 
(r.Method == "POST" && c.untrustedPost)
                err := d.checkTrustedClient(r)
diff --git a/lxd/main_waitready.go b/lxd/main_waitready.go
index 78bb62e82..1ad4ff8be 100644
--- a/lxd/main_waitready.go
+++ b/lxd/main_waitready.go
@@ -7,6 +7,7 @@ import (
        "github.com/spf13/cobra"
 
        "github.com/lxc/lxd/client"
+       "github.com/lxc/lxd/shared/logger"
 )
 
 type cmdWaitready struct {
@@ -35,15 +36,32 @@ func (c *cmdWaitready) Command() *cobra.Command {
 func (c *cmdWaitready) Run(cmd *cobra.Command, args []string) error {
        finger := make(chan error, 1)
        go func() {
-               for {
+               for i := 0; ; i++ {
+                       // Log initial attempts at debug level, but use warn
+                       // level after the 10'th attempt (about 5 seconds). Then
+                       // after the 30'th attempt (about 15 seconds), log only
+                       // only one attempt every 10 attempts (about 5 seconds),
+                       // to avoid being too verbose.
+                       log := logger.Debugf
+                       if i > 10 {
+                               log = logger.Warnf
+                               if i > 30 && !((i % 10) == 0) {
+                                       log = func(string, ...interface{}) {}
+                               }
+                       }
+
+                       log("Connecting to LXD daemon (attempt %d)", i)
                        d, err := lxd.ConnectLXDUnix("", nil)
                        if err != nil {
+                               log("Failed connecting to LXD daemon (attempt 
%d): %v", i, err)
                                time.Sleep(500 * time.Millisecond)
                                continue
                        }
 
+                       log("Checking if LXD daemon is ready (attempt %d)", i)
                        _, _, err = d.RawQuery("GET", "/internal/ready", nil, 
"")
                        if err != nil {
+                               log("Failed checking if LXD daemon is ready 
(attempt %d): %v", i, err)
                                time.Sleep(500 * time.Millisecond)
                                continue
                        }
diff --git a/lxd/response.go b/lxd/response.go
index cb5cb59dc..bc975d344 100644
--- a/lxd/response.go
+++ b/lxd/response.go
@@ -477,6 +477,7 @@ var NotImplemented = 
&errorResponse{http.StatusNotImplemented, "not implemented"
 var NotFound = &errorResponse{http.StatusNotFound, "not found"}
 var Forbidden = &errorResponse{http.StatusForbidden, "not authorized"}
 var Conflict = &errorResponse{http.StatusConflict, "already exists"}
+var Unavailable = &errorResponse{http.StatusServiceUnavailable, "unavailable"}
 
 func BadRequest(err error) Response {
        return &errorResponse{http.StatusBadRequest, err.Error()}

From 402d911e65ad439d924b0023a9a357c894b03dab Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <[email protected]>
Date: Thu, 19 Apr 2018 08:53:51 +0000
Subject: [PATCH 2/2] Increase logging during startup

Signed-off-by: Free Ekanayaka <[email protected]>
---
 lxd/daemon.go  |  5 +++++
 lxd/db/db.go   | 15 +++++++++++++--
 lxd/patches.go |  2 +-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/lxd/daemon.go b/lxd/daemon.go
index 5e6a6a757..e868c855e 100644
--- a/lxd/daemon.go
+++ b/lxd/daemon.go
@@ -468,6 +468,7 @@ func (d *Daemon) init() error {
 
        /* Open the cluster database */
        for {
+               logger.Info("Initializing global database")
                d.cluster, err = db.OpenCluster("db.bin", d.gateway.Dialer(), 
address)
                if err == nil {
                        break
@@ -534,6 +535,7 @@ func (d *Daemon) init() error {
        }
 
        /* Read the storage pools */
+       logger.Infof("Initializing storage pools")
        err = SetupStorageDriver(d.State(), false)
        if err != nil {
                return err
@@ -546,6 +548,7 @@ func (d *Daemon) init() error {
        }
 
        /* Setup the networks */
+       logger.Infof("Initializing networks")
        err = networkStartup(d.State())
        if err != nil {
                return err
@@ -579,6 +582,7 @@ func (d *Daemon) init() error {
                return err
        }
 
+       logger.Infof("Loading configuration")
        err = d.cluster.Transaction(func(tx *db.ClusterTx) error {
                config, err := cluster.ConfigLoad(tx)
                if err != nil {
@@ -864,6 +868,7 @@ func (d *Daemon) setupMAASController(server string, key 
string, machine string)
 
 // Create a database connection and perform any updates needed.
 func initializeDbObject(d *Daemon) (*db.Dump, error) {
+       logger.Info("Initializing local database")
        // Rename the old database name if needed.
        if shared.PathExists(d.os.LegacyLocalDatabasePath()) {
                if shared.PathExists(d.os.LocalDatabasePath()) {
diff --git a/lxd/db/db.go b/lxd/db/db.go
index 3c540c322..37a652cfd 100644
--- a/lxd/db/db.go
+++ b/lxd/db/db.go
@@ -164,16 +164,27 @@ func OpenCluster(name string, dialer grpcsql.Dialer, 
address string) (*Cluster,
        // Test that the cluster database is operational. We wait up to 10
        // minutes, in case there's no quorum of nodes online yet.
        timeout := time.After(10 * time.Minute)
-       for {
+       for i := 0; ; i++ {
+               // Log initial attempts at debug level, but use warn
+               // level after the 10'th attempt (about 10 seconds).
+               log := logger.Debugf
+               if i > 5 {
+                       log = logger.Warnf
+               }
+
                err = db.Ping()
                if err == nil {
                        break
                }
+
                cause := errors.Cause(err)
                if cause != context.DeadlineExceeded {
                        return nil, err
                }
-               time.Sleep(10 * time.Second)
+
+               log("Failed connecting to global database (attempt %d): %v", i, 
err)
+
+               time.Sleep(2 * time.Second)
                select {
                case <-timeout:
                        return nil, fmt.Errorf("failed to connect to cluster 
database")
diff --git a/lxd/patches.go b/lxd/patches.go
index 911e0e9aa..227d35a51 100644
--- a/lxd/patches.go
+++ b/lxd/patches.go
@@ -62,7 +62,7 @@ type patch struct {
 }
 
 func (p *patch) apply(d *Daemon) error {
-       logger.Debugf("Applying patch: %s", p.name)
+       logger.Infof("Applying patch: %s", p.name)
 
        err := p.run(p.name, d)
        if err != nil {
_______________________________________________
lxc-devel mailing list
[email protected]
http://lists.linuxcontainers.org/listinfo/lxc-devel

Reply via email to