The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/4456
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === This commits make the cluster automatically promote a spare node as database node (if available) when another node gets deleted.
From 5483c49a90c94e1d262737327729ba1de4ed3b98 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Mon, 16 Apr 2018 10:44:46 +0000 Subject: [PATCH 1/4] Add new cluster.Promote function to turn a non-database node into a database one Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxd/api_cluster.go | 2 +- lxd/cluster/gateway.go | 5 ++ lxd/cluster/membership.go | 159 ++++++++++++++++++++++++++++++++++------- lxd/cluster/membership_test.go | 89 +++++++++++++++++++++++ lxd/db/cluster/open.go | 2 +- 5 files changed, 230 insertions(+), 27 deletions(-) diff --git a/lxd/api_cluster.go b/lxd/api_cluster.go index 2c1327f6f..8fb21efe9 100644 --- a/lxd/api_cluster.go +++ b/lxd/api_cluster.go @@ -441,7 +441,7 @@ func clusterNodeDelete(d *Daemon, r *http.Request) Response { // First check that the node is clear from containers and images and // make it leave the database cluster, if it's part of it. name := mux.Vars(r)["name"] - address, err := cluster.Leave(d.State(), d.gateway, name, force == 1) + address, _, err := cluster.Leave(d.State(), d.gateway, name, force == 1) if err != nil { return SmartError(err) } diff --git a/lxd/cluster/gateway.go b/lxd/cluster/gateway.go index e0ffce294..07546c170 100644 --- a/lxd/cluster/gateway.go +++ b/lxd/cluster/gateway.go @@ -217,6 +217,11 @@ func (g *Gateway) WaitUpgradeNotification() { <-g.upgradeCh } +// IsDatabaseNode returns true if this gateway also run acts a raft database node. +func (g *Gateway) IsDatabaseNode() bool { + return g.raft != nil +} + // Dialer returns a gRPC dial function that can be used to connect to one of // the dqlite nodes via gRPC. func (g *Gateway) Dialer() grpcsql.Dialer { diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go index 33ae69122..b21c87d06 100644 --- a/lxd/cluster/membership.go +++ b/lxd/cluster/membership.go @@ -7,8 +7,8 @@ import ( "strconv" "time" - "github.com/CanonicalLtd/raft-http" - "github.com/CanonicalLtd/raft-membership" + rafthttp "github.com/CanonicalLtd/raft-http" + raftmembership "github.com/CanonicalLtd/raft-membership" "github.com/hashicorp/raft" "github.com/lxc/lxd/lxd/db" "github.com/lxc/lxd/lxd/db/cluster" @@ -432,6 +432,111 @@ func Join(state *state.State, gateway *Gateway, cert *shared.CertInfo, name stri return nil } +// Promote makes a LXD node which is not a database node, become part of the +// raft cluster. +func Promote(state *state.State, gateway *Gateway, nodes []db.RaftNode) error { + logger.Info("Promote node to database node") + + // Sanity check that this is not already a database node + if gateway.IsDatabaseNode() { + return fmt.Errorf("this node is already a database node") + } + + // Figure out our own address. + address := "" + err := state.Cluster.Transaction(func(tx *db.ClusterTx) error { + var err error + address, err = tx.NodeAddress() + if err != nil { + return errors.Wrap(err, "failed to fetch the address of this node") + } + return nil + }) + if err != nil { + return err + } + + // Sanity check that we actually have an address. + if address == "" { + return fmt.Errorf("node is not exposed on the network") + } + + // Figure out our raft node ID, and an existing target raft node that + // we'll contact to add ourselves as member. + id := "" + target := "" + for _, node := range nodes { + if node.Address == address { + id = strconv.Itoa(int(node.ID)) + } else { + target = node.Address + } + } + + // Sanity check that our address was actually included in the given + // list of raft nodes. + if id == "" { + return fmt.Errorf("this node is not included in the given list of database nodes") + } + + // Replace our local list of raft nodes with the given one (which + // includes ourselves). This will make the gateway start a raft node + // when restarted. + err = state.Node.Transaction(func(tx *db.NodeTx) error { + err = tx.RaftNodesReplace(nodes) + if err != nil { + return errors.Wrap(err, "failed to set raft nodes") + } + + return nil + }) + if err != nil { + return err + } + + // Lock regular access to the cluster database since we don't want any + // other database code to run while we're reconfiguring raft. + err = state.Cluster.EnterExclusive() + if err != nil { + return errors.Wrap(err, "failed to acquire cluster database lock") + } + + // Wipe all existing raft data, for good measure (perhaps they were + // somehow leftover). + err = os.RemoveAll(filepath.Join(state.OS.VarDir, "raft")) + if err != nil { + return errors.Wrap(err, "failed to remove existing raft data") + } + + // Re-initialize the gateway. This will create a new raft factory an + // dqlite driver instance, which will be exposed over gRPC by the + // gateway handlers. + err = gateway.init() + if err != nil { + return errors.Wrap(err, "failed to re-initialize gRPC SQL gateway") + } + + logger.Info( + "Joining dqlite raft cluster", + log15.Ctx{"id": id, "address": address, "target": target}) + changer := gateway.raft.MembershipChanger() + err = changer.Join(raft.ServerID(id), raft.ServerAddress(target), 5*time.Second) + if err != nil { + return err + } + + // Unlock regular access to our cluster database, and make sure our + // gateway still works correctly. + err = state.Cluster.ExitExclusive(func(tx *db.ClusterTx) error { + _, err := tx.Nodes() + return err + }) + if err != nil { + return errors.Wrap(err, "cluster database initialization failed") + } + return nil +} + // Leave a cluster. // // If the force flag is true, the node will leave even if it still has @@ -469,19 +574,17 @@ func Leave(state *state.State, gateway *Gateway, name string, force bool) (strin } // If the node is a database node, leave the raft cluster too. - id := "" - target := "" + var raftNodes []db.RaftNode // Current raft nodes + raftNodeRemoveIndex := -1 // Index of the raft node to remove, if any. err = state.Node.Transaction(func(tx *db.NodeTx) error { - nodes, err := tx.RaftNodes() + var err error + raftNodes, err = tx.RaftNodes() if err != nil { - return err + return errors.Wrap(err, "failed to get current database nodes") } - for i, node := range nodes { + for i, node := range raftNodes { if node.Address == address { - id = strconv.Itoa(int(node.ID)) - // Save the address of another database node, - // we'll use it to leave the raft cluster. - target = nodes[(i+1)%len(nodes)].Address + raftNodeRemoveIndex = i break } } @@ -491,22 +594,28 @@ func Leave(state *state.State, gateway *Gateway, name string, force bool) (strin return "", err } - if target != "" { - logger.Info( - "Remove node from dqlite raft cluster", - log15.Ctx{"id": id, "address": address, "target": target}) - dial, err := raftDial(gateway.cert) - if err != nil { - return "", err - } - err = rafthttp.ChangeMembership( - raftmembership.LeaveRequest, raftEndpoint, dial, - raft.ServerID(id), address, target, 5*time.Second) - if err != nil { - return "", err - } + if raftNodeRemoveIndex == -1 { + // The node was not part of the raft cluster, nothing left to + // do. + return address, nil } + id := strconv.Itoa(int(raftNodes[raftNodeRemoveIndex].ID)) + // Get the address of another database node, + target := raftNodes[(raftNodeRemoveIndex+1)%len(raftNodes)].Address + logger.Info( + "Remove node from dqlite raft cluster", + log15.Ctx{"id": id, "address": address, "target": target}) + dial, err := raftDial(gateway.cert) + if err != nil { + return "", err + } + err = rafthttp.ChangeMembership( + raftmembership.LeaveRequest, raftEndpoint, dial, + raft.ServerID(id), address, target, 5*time.Second) + if err != nil { + return "", err + } return address, nil } diff --git a/lxd/cluster/membership_test.go b/lxd/cluster/membership_test.go index b9907cfdc..5c260841c 100644 --- a/lxd/cluster/membership_test.go +++ b/lxd/cluster/membership_test.go @@ -348,6 +348,83 @@ func TestJoin(t *testing.T) { assert.Equal(t, 1, count) } +func TestPromote(t *testing.T) { + // Setup a target node running as leader of a cluster. + targetCert := shared.TestingKeyPair() + targetMux := http.NewServeMux() + targetServer := newServer(targetCert, targetMux) + defer targetServer.Close() + + targetState, cleanup := state.NewTestState(t) + defer cleanup() + + targetGateway := newGateway(t, targetState.Node, targetCert) + defer targetGateway.Shutdown() + + for path, handler := range targetGateway.HandlerFuncs() { + targetMux.HandleFunc(path, handler) + } + + targetAddress := targetServer.Listener.Addr().String() + var err error + require.NoError(t, targetState.Cluster.Close()) + targetState.Cluster, err = db.OpenCluster("db.bin", targetGateway.Dialer(), targetAddress) + require.NoError(t, err) + targetF := &membershipFixtures{t: t, state: targetState} + targetF.NetworkAddress(targetAddress) + + err = cluster.Bootstrap(targetState, targetGateway, "buzz") + require.NoError(t, err) + + // Setup a node to be promoted. + mux := http.NewServeMux() + server := newServer(targetCert, mux) // Use the same cert, as we're already part of the cluster + defer server.Close() + + state, cleanup := state.NewTestState(t) + defer cleanup() + + address := server.Listener.Addr().String() + targetF.ClusterNode(address) // Add the non database node to the cluster database + f := &membershipFixtures{t: t, state: state} + f.NetworkAddress(address) + f.RaftNode(targetAddress) // Insert the leader in our local list of database nodes + + gateway := newGateway(t, state.Node, targetCert) + defer gateway.Shutdown() + + for path, handler := range gateway.HandlerFuncs() { + mux.HandleFunc(path, handler) + } + + state.Cluster, err = db.OpenCluster("db.bin", gateway.Dialer(), address) + require.NoError(t, err) + + // Promote the node. + targetF.RaftNode(address) // Add the address of the node to be promoted in the leader's db + raftNodes := targetF.RaftNodes() + err = cluster.Promote(state, gateway, raftNodes) + require.NoError(t, err) + + // The leader now returns an updated list of raft nodes. + raftNodes, err = targetGateway.RaftNodes() + require.NoError(t, err) + assert.Len(t, raftNodes, 2) + assert.Equal(t, int64(1), raftNodes[0].ID) + assert.Equal(t, targetAddress, raftNodes[0].Address) + assert.Equal(t, int64(2), raftNodes[1].ID) + assert.Equal(t, address, raftNodes[1].Address) + + // The List function returns all nodes in the cluster. + nodes, err := cluster.List(state) + require.NoError(t, err) + assert.Len(t, nodes, 2) + assert.Equal(t, "Online", nodes[0].Status) + assert.Equal(t, "Online", nodes[1].Status) + assert.True(t, nodes[0].Database) + assert.True(t, nodes[1].Database) +} + // Helper for setting fixtures for Bootstrap tests. type membershipFixtures struct { t *testing.T @@ -374,6 +451,18 @@ func (h *membershipFixtures) RaftNode(address string) { require.NoError(h.t, err) } +// Get the current list of the raft nodes in the raft_nodes table. +func (h *membershipFixtures) RaftNodes() []db.RaftNode { + var nodes []db.RaftNode + err := h.state.Node.Transaction(func(tx *db.NodeTx) error { + var err error + nodes, err = tx.RaftNodes() + return err + }) + require.NoError(h.t, err) + return nodes +} + // Add the given address to the nodes table of the cluster database. func (h *membershipFixtures) ClusterNode(address string) { err := h.state.Cluster.Transaction(func(tx *db.ClusterTx) error { diff --git a/lxd/db/cluster/open.go b/lxd/db/cluster/open.go index 0ca5d50af..498e5d923 100644 --- a/lxd/db/cluster/open.go +++ b/lxd/db/cluster/open.go @@ -74,7 +74,7 @@ func EnsureSchema(db *sql.DB, address string) (bool, error) { // Update the schema and api_extension columns of ourselves. err = updateNodeVersion(tx, address, apiExtensions) if err != nil { - return err + return errors.Wrap(err, "failed to update node version info") } err = checkClusterIsUpgradable(tx, [2]int{len(updates), apiExtensions}) From 4ea558b71cab3f2d0201aed61688fa83a94ac240 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Mon, 16 Apr 2018 12:34:50 +0000 Subject: [PATCH 2/4] Add new cluster.Rebalance function to check if we need to add database nodes Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxd/cluster/membership.go | 79 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go index b21c87d06..8bea809ad 100644 --- a/lxd/cluster/membership.go +++ b/lxd/cluster/membership.go @@ -432,6 +432,85 @@ func Join(state *state.State, gateway *Gateway, cert *shared.CertInfo, name stri return nil } +// Rebalance the raft cluster, trying to see if we have a spare online node +// that we can promote to database node if we are below membershipMaxRaftNodes. +// +// If there's such spare node, return its address as well as the new list of +// raft nodes. +func Rebalance(state *state.State, gateway *Gateway) (string, []db.RaftNode, error) { + // First get the current raft members, since this method should be + // called after a node has left. + currentRaftNodes, err := gateway.currentRaftNodes() + if err != nil { + return "", nil, errors.Wrap(err, "failed to get current raft nodes") + } + if len(currentRaftNodes) >= membershipMaxRaftNodes { + // We're already at full capacity. + return "", nil, nil + } + + currentRaftAddresses := make([]string, len(currentRaftNodes)) + for i, node := range currentRaftNodes { + currentRaftAddresses[i] = node.Address + } + + // Check if we have a spare node that we can turn into a database one. + address := "" + err = state.Cluster.Transaction(func(tx *db.ClusterTx) error { + config, err := ConfigLoad(tx) + if err != nil { + return errors.Wrap(err, "failed load cluster configuration") + } + nodes, err := tx.Nodes() + if err != nil { + return errors.Wrap(err, "failed to get cluster nodes") + } + // Find a node that is not part of the raft cluster yet. + for _, node := range nodes { + if shared.StringInSlice(node.Address, currentRaftAddresses) { + continue // This is already a database node + } + if node.IsOffline(config.OfflineThreshold()) { + continue // This node is offline + } + logger.Debugf( + "Found spare node %s (%s) to be promoted as database node", node.Name, node.Address) + address = node.Address + break + } + + return nil + }) + if err != nil { + return "", nil, err + } + + if address == "" { + // No node to promote + return "", nil, nil + } + + // Update the local raft_table adding the new member and building a new + // list. + updatedRaftNodes := currentRaftNodes + err = gateway.db.Transaction(func(tx *db.NodeTx) error { + id, err := tx.RaftNodeAdd(address) + if err != nil { + return errors.Wrap(err, "failed to add new raft node") + } + updatedRaftNodes = append(updatedRaftNodes, db.RaftNode{ID: id, Address: address}) + err = tx.RaftNodesReplace(updatedRaftNodes) + if err != nil { + return errors.Wrap(err, "failed to update raft nodes") + } + return nil + }) + if err != nil { + return "", nil, err + } + return address, updatedRaftNodes, nil +} + // Promote makes a LXD node which is not a database node, become part of the // raft cluster. func Promote(state *state.State, gateway *Gateway, nodes []db.RaftNode) error { From 11d1b65a1dc6ed9343b32625f121e730548e181b Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Mon, 16 Apr 2018 12:35:12 +0000 Subject: [PATCH 3/4] Notify the cluster leader after a node removal, so it can rebalance Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxd/api_cluster.go | 125 +++++++++++++++++++++++++++++++++++++++++++++++- lxd/api_cluster_test.go | 35 ++++++++++++++ lxd/api_internal.go | 2 + 3 files changed, 160 insertions(+), 2 deletions(-) diff --git a/lxd/api_cluster.go b/lxd/api_cluster.go index 8fb21efe9..0fd89ed3b 100644 --- a/lxd/api_cluster.go +++ b/lxd/api_cluster.go @@ -438,10 +438,12 @@ func clusterNodeDelete(d *Daemon, r *http.Request) Response { force = 0 } + name := mux.Vars(r)["name"] + logger.Debugf("Delete node %s from cluster (force=%d)", name, force) + // First check that the node is clear from containers and images and // make it leave the database cluster, if it's part of it. - name := mux.Vars(r)["name"] - address, _, err := cluster.Leave(d.State(), d.gateway, name, force == 1) + address, err := cluster.Leave(d.State(), d.gateway, name, force == 1) if err != nil { return SmartError(err) } @@ -483,6 +485,12 @@ func clusterNodeDelete(d *Daemon, r *http.Request) Response { if err != nil { return SmartError(errors.Wrap(err, "failed to remove node from database")) } + // Try to notify the leader. + err = tryClusterRebalance(d) + if err != nil { + // This is not a fatal error, so let's just log it. + logger.Errorf("Failed to rebalance cluster: %v", err) + } if force != 1 { // Try to gracefully reset the database on the node. @@ -502,6 +510,26 @@ func clusterNodeDelete(d *Daemon, r *http.Request) Response { return EmptySyncResponse } +// This function is used to notify the leader that a node was removed, it will +// decide whether to promote a new node as database node. +func tryClusterRebalance(d *Daemon) error { + leader, err := d.gateway.LeaderAddress() + if err != nil { + // This is not a fatal error, so let's just log it. + return errors.Wrap(err, "failed to get current leader node") + } + cert := d.endpoints.NetworkCert() + client, err := cluster.Connect(leader, cert, true) + if err != nil { + return errors.Wrap(err, "failed to connect to leader node") + } + _, _, err = client.RawQuery("POST", "/internal/cluster/rebalance", nil, "") + if err != nil { + return errors.Wrap(err, "request to rebalance cluster failed") + } + return nil +} + var internalClusterAcceptCmd = Command{name: "cluster/accept", post: internalClusterPostAccept} func internalClusterPostAccept(d *Daemon, r *http.Request) Response { @@ -586,6 +614,99 @@ type internalRaftNode struct { Address string `json:"address" yaml:"address"` } +var internalClusterRebalanceCmd = Command{name: "cluster/rebalance", post: internalClusterPostRebalance} + +// Used to update the cluster after a database node has been removed, and +// possibly promote another one as database node. +func internalClusterPostRebalance(d *Daemon, r *http.Request) Response { + // Redirect all requests to the leader, which is the one with with + // up-to-date knowledge of what nodes are part of the raft cluster. + localAddress, err := node.HTTPSAddress(d.db) + if err != nil { + return SmartError(err) + } + leader, err := d.gateway.LeaderAddress() + if err != nil { + return InternalError(err) + } + if localAddress != leader { + logger.Debugf("Redirect cluster rebalance request to %s", leader) + url := &url.URL{ + Scheme: "https", + Path: "/internal/cluster/rebalance", + Host: leader, + } + return SyncResponseRedirect(url.String()) + } + + logger.Debugf("Rebalance cluster") + + // Check if we have a spare node to promote. + address, nodes, err := cluster.Rebalance(d.State(), d.gateway) + if err != nil { + return SmartError(err) + } + if address == "" { + return SyncResponse(true, nil) // Nothing to change + } + + // Tell the node to promote itself. + post := &internalClusterPostPromoteRequest{} + for _, node := range nodes { + post.RaftNodes = append(post.RaftNodes, internalRaftNode{ + ID: node.ID, + Address: node.Address, + }) + } + + cert := d.endpoints.NetworkCert() + client, err := cluster.Connect(address, cert, false) + if err != nil { + return SmartError(err) + } + _, _, err = client.RawQuery("POST", "/internal/cluster/promote", post, "") + if err != nil { + return SmartError(err) + } + + return SyncResponse(true, nil) +} + +var internalClusterPromoteCmd = Command{name: "cluster/promote", post: internalClusterPostPromote} + +// Used to promote the local non-database node to be a database one. +func internalClusterPostPromote(d *Daemon, r *http.Request) Response { + req := internalClusterPostPromoteRequest{} + + // Parse the request + err := json.NewDecoder(r.Body).Decode(&req) + if err != nil { + return BadRequest(err) + } + + // Sanity checks + if len(req.RaftNodes) == 0 { + return BadRequest(fmt.Errorf("No raft nodes provided")) + } + + nodes := make([]db.RaftNode, len(req.RaftNodes)) + for i, node := range req.RaftNodes { + nodes[i].ID = node.ID + nodes[i].Address = node.Address + } + err = cluster.Promote(d.State(), d.gateway, nodes) + if err != nil { + return SmartError(err) + } + + return SyncResponse(true, nil) +} + +// A request for the /internal/cluster/promote endpoint. +type internalClusterPostPromoteRequest struct { + RaftNodes []internalRaftNode `json:"raft_nodes" yaml:"raft_nodes"` +} + func clusterCheckStoragePoolsMatch(cluster *db.Cluster, reqPools []api.StoragePool) error { poolNames, err := cluster.StoragePoolsNotPending() if err != nil && err != db.ErrNoSuchObject { diff --git a/lxd/api_cluster_test.go b/lxd/api_cluster_test.go index 569ffb520..4dd76393e 100644 --- a/lxd/api_cluster_test.go +++ b/lxd/api_cluster_test.go @@ -294,6 +294,41 @@ func TestCluster_LeaveForce(t *testing.T) { assert.Equal(t, []string{}, images) } +// If a spare non-database node is available after a nodes leaves, it gets +// promoted as database node. +func TestCluster_LeaveAndPromote(t *testing.T) { + if testing.Short() { + t.Skip("skipping cluster promote test in short mode.") + } + daemons, cleanup := newDaemons(t, 4) + defer cleanup() + + f := clusterFixture{t: t} + f.FormCluster(daemons) + + // The first three nodes are database nodes, the fourth is not. + client := f.ClientUnix(daemons[0]) + nodes, err := client.GetClusterMembers() + assert.Len(t, nodes, 4) + assert.True(t, nodes[0].Database) + assert.True(t, nodes[1].Database) + assert.True(t, nodes[2].Database) + assert.False(t, nodes[3].Database) + + client = f.ClientUnix(daemons[1]) + err = client.DeleteClusterMember("rusp-0", false) + require.NoError(t, err) + + // Only three nodes are left, and they are all database nodes. + client = f.ClientUnix(daemons[0]) + nodes, err = client.GetClusterMembers() + require.NoError(t, err) + assert.Len(t, nodes, 3) + assert.True(t, nodes[0].Database) + assert.True(t, nodes[1].Database) + assert.True(t, nodes[2].Database) +} + // A LXD node can be renamed. func TestCluster_NodeRename(t *testing.T) { daemon, cleanup := newDaemon(t) diff --git a/lxd/api_internal.go b/lxd/api_internal.go index 5f234537d..6f0443240 100644 --- a/lxd/api_internal.go +++ b/lxd/api_internal.go @@ -30,6 +30,8 @@ var apiInternal = []Command{ internalContainersCmd, internalSQLCmd, internalClusterAcceptCmd, + internalClusterRebalanceCmd, + internalClusterPromoteCmd, internalClusterContainerMovedCmd, } From c67471e0ccb11dcd4a19c4ee031cc1f7396db099 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Mon, 16 Apr 2018 12:56:41 +0000 Subject: [PATCH 4/4] Add integration test Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- test/suites/clustering.sh | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/test/suites/clustering.sh b/test/suites/clustering.sh index 579bfb3cd..7ff994672 100644 --- a/test/suites/clustering.sh +++ b/test/suites/clustering.sh @@ -74,39 +74,44 @@ test_clustering_membership() { LXD_DIR="${LXD_ONE_DIR}" lxc remote set-url cluster https://10.1.1.102:8443 lxc network list cluster: | grep -q "${bridge}" - # Shutdown a non-database node, and wait a few seconds so it will be + # Shutdown a database node, and wait a few seconds so it will be # detected as down. LXD_DIR="${LXD_ONE_DIR}" lxc config set cluster.offline_threshold 5 - LXD_DIR="${LXD_FIVE_DIR}" lxd shutdown + LXD_DIR="${LXD_THREE_DIR}" lxd shutdown sleep 10 - LXD_DIR="${LXD_THREE_DIR}" lxc cluster list | grep "node5" | grep -q "OFFLINE" + LXD_DIR="${LXD_TWO_DIR}" lxc cluster list | grep "node3" | grep -q "OFFLINE" LXD_DIR="${LXD_TWO_DIR}" lxc config set cluster.offline_threshold 20 # Trying to delete the preseeded network now fails, because a node is degraded. ! LXD_DIR="${LXD_TWO_DIR}" lxc network delete "${bridge}" # Force the removal of the degraded node. - LXD_DIR="${LXD_THREE_DIR}" lxc cluster remove node5 --force + LXD_DIR="${LXD_TWO_DIR}" lxc cluster remove node3 --force + + # Sleep a bit to let a heartbeat occur and update the list of raft nodes + # everywhere, showing that node 4 has been promoted to database node. + sleep 8 + LXD_DIR="${LXD_TWO_DIR}" lxc cluster list | grep "node4" | grep -q "YES" # Now the preseeded network can be deleted, and all nodes are # notified. LXD_DIR="${LXD_TWO_DIR}" lxc network delete "${bridge}" # Rename a node using the pre-existing name. - LXD_DIR="${LXD_THREE_DIR}" lxc cluster rename node4 node5 + LXD_DIR="${LXD_ONE_DIR}" lxc cluster rename node4 node3 # Trying to delete a container which is the only one with a copy of # an image results in an error LXD_DIR="${LXD_FOUR_DIR}" ensure_import_testimage - ! LXD_DIR="${LXD_FOUR_DIR}" lxc cluster remove node5 + ! LXD_DIR="${LXD_FOUR_DIR}" lxc cluster remove node3 LXD_DIR="${LXD_TWO_DIR}" lxc image delete testimage # Remove a node gracefully. - LXD_DIR="${LXD_FOUR_DIR}" lxc cluster remove node5 + LXD_DIR="${LXD_FOUR_DIR}" lxc cluster remove node3 ! LXD_DIR="${LXD_FOUR_DIR}" lxc cluster list + LXD_DIR="${LXD_FIVE_DIR}" lxd shutdown LXD_DIR="${LXD_FOUR_DIR}" lxd shutdown - LXD_DIR="${LXD_THREE_DIR}" lxd shutdown LXD_DIR="${LXD_TWO_DIR}" lxd shutdown LXD_DIR="${LXD_ONE_DIR}" lxd shutdown sleep 2
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel