The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/4474
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === I think this does not change any observable behavior from users, but it might help us debug problems in case waitready hangs. YMMV The second commit increases a bit logging in the daemon startup phase, hopefully without making it too noisy, also for trouble-shooting purposes.
From 17e643e8ba430c9270a096a00d6e345bfe5b7b14 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <[email protected]> Date: Thu, 19 Apr 2018 08:33:27 +0000 Subject: [PATCH 1/2] Make the interaction betwean lxd daemon and waitready non-blocking I think this does not change observable behavior from users, but might help us debug problems in case waitready hangs. YMMV Signed-off-by: Free Ekanayaka <[email protected]> --- lxd/api_internal.go | 6 +++++- lxd/daemon.go | 7 ++++++- lxd/main_waitready.go | 20 +++++++++++++++++++- lxd/response.go | 1 + 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/lxd/api_internal.go b/lxd/api_internal.go index 6f0443240..7a8542e4f 100644 --- a/lxd/api_internal.go +++ b/lxd/api_internal.go @@ -36,7 +36,11 @@ var apiInternal = []Command{ } func internalWaitReady(d *Daemon, r *http.Request) Response { - <-d.readyChan + select { + case <-d.readyChan: + default: + return Unavailable + } return EmptySyncResponse } diff --git a/lxd/daemon.go b/lxd/daemon.go index 2b76ab2ec..5e6a6a757 100644 --- a/lxd/daemon.go +++ b/lxd/daemon.go @@ -237,7 +237,12 @@ func (d *Daemon) createCmd(restAPI *mux.Router, version string, c Command) { // Block public API requests until we're done with basic // initialization tasks, such setting up the cluster database. - <-d.setupChan + select { + case <-d.setupChan: + default: + Unavailable.Render(w) + return + } untrustedOk := (r.Method == "GET" && c.untrustedGet) || (r.Method == "POST" && c.untrustedPost) err := d.checkTrustedClient(r) diff --git a/lxd/main_waitready.go b/lxd/main_waitready.go index 78bb62e82..1ad4ff8be 100644 --- a/lxd/main_waitready.go +++ b/lxd/main_waitready.go @@ -7,6 +7,7 @@ import ( "github.com/spf13/cobra" "github.com/lxc/lxd/client" + "github.com/lxc/lxd/shared/logger" ) type cmdWaitready struct { @@ -35,15 +36,32 @@ func (c *cmdWaitready) Command() *cobra.Command { func (c *cmdWaitready) Run(cmd *cobra.Command, args []string) error { finger := make(chan error, 1) go func() { - for { + for i := 0; ; i++ { + // Log initial attempts at debug level, but use warn + // level after the 10'th attempt (about 5 seconds). Then + // after the 30'th attempt (about 15 seconds), log only + // only one attempt every 10 attempts (about 5 seconds), + // to avoid being too verbose. + log := logger.Debugf + if i > 10 { + log = logger.Warnf + if i > 30 && !((i % 10) == 0) { + log = func(string, ...interface{}) {} + } + } + + log("Connecting to LXD daemon (attempt %d)", i) d, err := lxd.ConnectLXDUnix("", nil) if err != nil { + log("Failed connecting to LXD daemon (attempt %d): %v", i, err) time.Sleep(500 * time.Millisecond) continue } + log("Checking if LXD daemon is ready (attempt %d)", i) _, _, err = d.RawQuery("GET", "/internal/ready", nil, "") if err != nil { + log("Failed checking if LXD daemon is ready (attempt %d): %v", i, err) time.Sleep(500 * time.Millisecond) continue } diff --git a/lxd/response.go b/lxd/response.go index cb5cb59dc..bc975d344 100644 --- a/lxd/response.go +++ b/lxd/response.go @@ -477,6 +477,7 @@ var NotImplemented = &errorResponse{http.StatusNotImplemented, "not implemented" var NotFound = &errorResponse{http.StatusNotFound, "not found"} var Forbidden = &errorResponse{http.StatusForbidden, "not authorized"} var Conflict = &errorResponse{http.StatusConflict, "already exists"} +var Unavailable = &errorResponse{http.StatusServiceUnavailable, "unavailable"} func BadRequest(err error) Response { return &errorResponse{http.StatusBadRequest, err.Error()} From 402d911e65ad439d924b0023a9a357c894b03dab Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <[email protected]> Date: Thu, 19 Apr 2018 08:53:51 +0000 Subject: [PATCH 2/2] Increase logging during startup Signed-off-by: Free Ekanayaka <[email protected]> --- lxd/daemon.go | 5 +++++ lxd/db/db.go | 15 +++++++++++++-- lxd/patches.go | 2 +- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lxd/daemon.go b/lxd/daemon.go index 5e6a6a757..e868c855e 100644 --- a/lxd/daemon.go +++ b/lxd/daemon.go @@ -468,6 +468,7 @@ func (d *Daemon) init() error { /* Open the cluster database */ for { + logger.Info("Initializing global database") d.cluster, err = db.OpenCluster("db.bin", d.gateway.Dialer(), address) if err == nil { break @@ -534,6 +535,7 @@ func (d *Daemon) init() error { } /* Read the storage pools */ + logger.Infof("Initializing storage pools") err = SetupStorageDriver(d.State(), false) if err != nil { return err @@ -546,6 +548,7 @@ func (d *Daemon) init() error { } /* Setup the networks */ + logger.Infof("Initializing networks") err = networkStartup(d.State()) if err != nil { return err @@ -579,6 +582,7 @@ func (d *Daemon) init() error { return err } + logger.Infof("Loading configuration") err = d.cluster.Transaction(func(tx *db.ClusterTx) error { config, err := cluster.ConfigLoad(tx) if err != nil { @@ -864,6 +868,7 @@ func (d *Daemon) setupMAASController(server string, key string, machine string) // Create a database connection and perform any updates needed. func initializeDbObject(d *Daemon) (*db.Dump, error) { + logger.Info("Initializing local database") // Rename the old database name if needed. if shared.PathExists(d.os.LegacyLocalDatabasePath()) { if shared.PathExists(d.os.LocalDatabasePath()) { diff --git a/lxd/db/db.go b/lxd/db/db.go index 3c540c322..37a652cfd 100644 --- a/lxd/db/db.go +++ b/lxd/db/db.go @@ -164,16 +164,27 @@ func OpenCluster(name string, dialer grpcsql.Dialer, address string) (*Cluster, // Test that the cluster database is operational. We wait up to 10 // minutes, in case there's no quorum of nodes online yet. timeout := time.After(10 * time.Minute) - for { + for i := 0; ; i++ { + // Log initial attempts at debug level, but use warn + // level after the 10'th attempt (about 10 seconds). + log := logger.Debugf + if i > 5 { + log = logger.Warnf + } + err = db.Ping() if err == nil { break } + cause := errors.Cause(err) if cause != context.DeadlineExceeded { return nil, err } - time.Sleep(10 * time.Second) + + log("Failed connecting to global database (attempt %d): %v", i, err) + + time.Sleep(2 * time.Second) select { case <-timeout: return nil, fmt.Errorf("failed to connect to cluster database") diff --git a/lxd/patches.go b/lxd/patches.go index 911e0e9aa..227d35a51 100644 --- a/lxd/patches.go +++ b/lxd/patches.go @@ -62,7 +62,7 @@ type patch struct { } func (p *patch) apply(d *Daemon) error { - logger.Debugf("Applying patch: %s", p.name) + logger.Infof("Applying patch: %s", p.name) err := p.run(p.name, d) if err != nil {
_______________________________________________ lxc-devel mailing list [email protected] http://lists.linuxcontainers.org/listinfo/lxc-devel
