The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/7608
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) ===
From 4eece934f4cefaff341d018bf1fedfb1b289e1c8 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 2 Jul 2020 10:00:44 +0200 Subject: [PATCH 01/10] lxd/db: Add failure_domains table and nodes column reference Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxd/db/cluster/schema.go | 8 +++++++- lxd/db/cluster/update.go | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/lxd/db/cluster/schema.go b/lxd/db/cluster/schema.go index 201ed4e1eb..fb36056864 100644 --- a/lxd/db/cluster/schema.go +++ b/lxd/db/cluster/schema.go @@ -20,6 +20,11 @@ CREATE TABLE config ( value TEXT, UNIQUE (key) ); +CREATE TABLE failure_domains ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name TEXT NOT NULL, + UNIQUE (name) +); CREATE TABLE "images" ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, fingerprint TEXT NOT NULL, @@ -311,6 +316,7 @@ CREATE TABLE nodes ( heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP, pending INTEGER NOT NULL DEFAULT 0, arch INTEGER NOT NULL DEFAULT 0 CHECK (arch > 0), + failure_domain_id INTEGER DEFAULT NULL REFERENCES failure_domains (id) ON DELETE SET NULL, UNIQUE (name), UNIQUE (address) ); @@ -565,5 +571,5 @@ CREATE TABLE storage_volumes_snapshots_config ( UNIQUE (storage_volume_snapshot_id, key) ); -INSERT INTO schema (version, updated_at) VALUES (31, strftime("%s")) +INSERT INTO schema (version, updated_at) VALUES (32, strftime("%s")) ` diff --git a/lxd/db/cluster/update.go b/lxd/db/cluster/update.go index b6d5d4b1fe..758b569a96 100644 --- a/lxd/db/cluster/update.go +++ b/lxd/db/cluster/update.go @@ -68,6 +68,26 @@ var updates = map[int]schema.Update{ 29: updateFromV28, 30: updateFromV29, 31: updateFromV30, + 32: updateFromV31, +} + +// Add failure_domain column to nodes table. +func updateFromV31(tx *sql.Tx) error { + stmts := ` +CREATE TABLE failure_domains ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name TEXT NOT NULL, + UNIQUE (name) +); +ALTER TABLE nodes + ADD COLUMN failure_domain_id INTEGER DEFAULT NULL REFERENCES failure_domains (id) ON DELETE SET NULL; +` + _, err := tx.Exec(stmts) + if err != nil { + return err + } + + return nil } // Add content type field to storage volumes From c945c182b16941212e9de3e3b502ff0e975d78eb Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 2 Jul 2020 10:35:29 +0200 Subject: [PATCH 02/10] lxd/db: Add UpdateNodeFailureDomain() and GetNodesFailureDomains() Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxd/db/node.go | 128 +++++++++++++++++++++++++++++++++++++++++++- lxd/db/node_test.go | 20 +++++++ 2 files changed, 147 insertions(+), 1 deletion(-) diff --git a/lxd/db/node.go b/lxd/db/node.go index e0a2c30f41..ec4aad41d3 100644 --- a/lxd/db/node.go +++ b/lxd/db/node.go @@ -3,6 +3,7 @@ package db import ( + "database/sql" "fmt" "strconv" "strings" @@ -228,7 +229,7 @@ func (c *ClusterTx) RenameNode(old, new string) error { // Nodes returns all LXD nodes part of the cluster. func (c *ClusterTx) nodes(pending bool, where string, args ...interface{}) ([]NodeInfo, error) { // Get node roles - sql := "SELECT node_id, role FROM nodes_roles;" + sql := "SELECT node_id, role FROM nodes_roles" nodeRoles := map[int64][]string{} rows, err := c.tx.Query(sql) @@ -456,6 +457,131 @@ func (c *ClusterTx) UpdateNodeRoles(id int64, roles []ClusterRole) error { return nil } +// UpdateNodeFailureDomain changes the failure domain of a node. +func (c *ClusterTx) UpdateNodeFailureDomain(id int64, domain string) error { + var domainID interface{} + + if domain == "" { + domainID = nil + } else { + row := c.tx.QueryRow("SELECT id FROM failure_domains WHERE name=?", domain) + err := row.Scan(&domainID) + if err != nil { + if err != sql.ErrNoRows { + return errors.Wrapf(err, "Load failure domain name") + } + result, err := c.tx.Exec("INSERT INTO failure_domains (name) VALUES (?)", domain) + if err != nil { + return errors.Wrapf(err, "Create new failure domain") + } + domainID, err = result.LastInsertId() + if err != nil { + return errors.Wrapf(err, "Get last inserted ID") + } + } + } + + result, err := c.tx.Exec("UPDATE nodes SET failure_domain_id=? WHERE id=?", domainID, id) + if err != nil { + return err + } + n, err := result.RowsAffected() + if err != nil { + return err + } + if n != 1 { + return fmt.Errorf("Query updated %d rows instead of 1", n) + } + + return nil +} + +// GetNodeFailureDomain returns the failure domain associated with the node with the given ID. +func (c *ClusterTx) GetNodeFailureDomain(id int64) (string, error) { + stmt := ` +SELECT coalesce(failure_domains.name,'') + FROM nodes LEFT JOIN failure_domains ON nodes.failure_domain_id = failure_domains.id + WHERE nodes.id=? +` + var domain string + + err := c.tx.QueryRow(stmt, id).Scan(&domain) + if err != nil { + return "", err + } + return domain, nil +} + +// GetNodesFailureDomains returns a map associating each node address with its +// failure domain code. +func (c *ClusterTx) GetNodesFailureDomains() (map[string]uint64, error) { + stmt, err := c.tx.Prepare("SELECT address, coalesce(failure_domain_id, 0) FROM nodes") + if err != nil { + return nil, err + } + + rows := []struct { + Address string + FailureDomainID int64 + }{} + + dest := func(i int) []interface{} { + rows = append(rows, struct { + Address string + FailureDomainID int64 + }{}) + return []interface{}{&rows[len(rows)-1].Address, &rows[len(rows)-1].FailureDomainID} + } + + err = query.SelectObjects(stmt, dest) + if err != nil { + return nil, err + } + + domains := map[string]uint64{} + + for _, row := range rows { + domains[row.Address] = uint64(row.FailureDomainID) + } + + return domains, nil +} + +// GetFailureDomainsNames return a map associating failure domain IDs to their +// names. +func (c *ClusterTx) GetFailureDomainsNames() (map[uint64]string, error) { + stmt, err := c.tx.Prepare("SELECT id, name FROM failure_domains") + if err != nil { + return nil, err + } + + rows := []struct { + ID int64 + Name string + }{} + + dest := func(i int) []interface{} { + rows = append(rows, struct { + ID int64 + Name string + }{}) + return []interface{}{&rows[len(rows)-1].ID, &rows[len(rows)-1].Name} + } + + err = query.SelectObjects(stmt, dest) + if err != nil { + return nil, err + } + + domains := map[uint64]string{} + + for _, row := range rows { + domains[uint64(row.ID)] = row.Name + } + + return domains, nil +} + // RemoveNode removes the node with the given id. func (c *ClusterTx) RemoveNode(id int64) error { result, err := c.tx.Exec("DELETE FROM nodes WHERE id=?", id) diff --git a/lxd/db/node_test.go b/lxd/db/node_test.go index 99c1c47f34..866053255f 100644 --- a/lxd/db/node_test.go +++ b/lxd/db/node_test.go @@ -391,3 +391,23 @@ INSERT INTO instances (id, node_id, name, architecture, type, project_id) VALUES require.NoError(t, err) assert.Equal(t, "none", name) } + +func TestUpdateNodeFailureDomain(t *testing.T) { + tx, cleanup := db.NewTestClusterTx(t) + defer cleanup() + + id, err := tx.CreateNode("buzz", "1.2.3.4:666") + require.NoError(t, err) + + assert.NoError(t, tx.UpdateNodeFailureDomain(id, "foo")) + + domains, err := tx.GetNodesFailureDomains() + require.NoError(t, err) + assert.Equal(t, map[string]uint64{"0.0.0.0": 0, "1.2.3.4:666": 1}, domains) + + assert.NoError(t, tx.UpdateNodeFailureDomain(id, "")) + + domains, err = tx.GetNodesFailureDomains() + require.NoError(t, err) + assert.Equal(t, map[string]uint64{"0.0.0.0": 0, "1.2.3.4:666": 0}, domains) +} From 197a19fc1d6895a839fda8d7b9f36a55bedd12ae Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 2 Jul 2020 10:39:42 +0200 Subject: [PATCH 03/10] lxd/cluster: Honor failure domains when changing roles Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxd/cluster/membership.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go index 978d8877d1..135f3417cb 100644 --- a/lxd/cluster/membership.go +++ b/lxd/cluster/membership.go @@ -824,6 +824,8 @@ func Handover(state *state.State, gateway *Gateway, address string) (string, []d func newRolesChanges(state *state.State, gateway *Gateway, nodes []db.RaftNode) (*app.RolesChanges, error) { var maxVoters int var maxStandBy int + var domains map[string]uint64 + err := state.Cluster.Transaction(func(tx *db.ClusterTx) error { config, err := ConfigLoad(tx) if err != nil { @@ -831,6 +833,12 @@ func newRolesChanges(state *state.State, gateway *Gateway, nodes []db.RaftNode) } maxVoters = int(config.MaxVoters()) maxStandBy = int(config.MaxStandBy()) + + domains, err = tx.GetNodesFailureDomains() + if err != nil { + return errors.Wrap(err, "Load failure domains") + } + return nil }) if err != nil { @@ -841,7 +849,9 @@ func newRolesChanges(state *state.State, gateway *Gateway, nodes []db.RaftNode) for _, node := range nodes { if HasConnectivity(gateway.cert, node.Address) { - cluster[node] = &client.NodeMetadata{} + cluster[node] = &client.NodeMetadata{ + FailureDomain: domains[node.Address], + } } else { cluster[node] = nil } From 68e5d4006f61446d56dd4592d524296365520954 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 2 Jul 2020 12:20:58 +0200 Subject: [PATCH 04/10] shared/version: Add clustering_failure_domains extension Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- doc/api-extensions.md | 5 +++++ shared/version/api.go | 1 + 2 files changed, 6 insertions(+) diff --git a/doc/api-extensions.md b/doc/api-extensions.md index 9a34d7871e..ea699c5d7f 100644 --- a/doc/api-extensions.md +++ b/doc/api-extensions.md @@ -1093,3 +1093,8 @@ The 5 entities that have UsedBy are: This adds support for creating and attaching custom block volumes to instances. It introduces the new `--type` flag when creating custom storage volumes, and accepts the values `fs` and `block`. + +## clustering\_failure\_domains + +This extension adds a new `failure_domain` field to the `PUT /1.0/cluster/<node>` API, +which can be used to set the failure domain of a node. diff --git a/shared/version/api.go b/shared/version/api.go index 6b5a9909a9..52744904a4 100644 --- a/shared/version/api.go +++ b/shared/version/api.go @@ -216,6 +216,7 @@ var APIExtensions = []string{ "network_state_bond_bridge", "usedby_consistency", "custom_block_volumes", + "clustering_failure_domains", } // APIExtensionsCount returns the number of available API extensions. From 3ea751dde65009cf6576b45eb70db896fa79b544 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 2 Jul 2020 12:21:47 +0200 Subject: [PATCH 05/10] shared/api: Add FailureDomain field to ClusterMemberPut Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- shared/api/cluster.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/shared/api/cluster.go b/shared/api/cluster.go index 42808d98c6..96c8d60909 100644 --- a/shared/api/cluster.go +++ b/shared/api/cluster.go @@ -75,4 +75,7 @@ type ClusterMemberPut struct { // API extension: clustering_architecture Architecture string `json:"architecture" yaml:"architecture"` + + // API extension: clustering_failure_domains + FailureDomain string `json:"failure_domain" yaml:"failure_domain"` } From cb8b33f7b5a8fdc21757e0a1cdac3a6549c6306c Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 2 Jul 2020 12:22:40 +0200 Subject: [PATCH 06/10] lxd/cluster: Populate FailureDomain field when listing cluster members Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxd/cluster/membership.go | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go index 135f3417cb..3c475deff3 100644 --- a/lxd/cluster/membership.go +++ b/lxd/cluster/membership.go @@ -898,16 +898,34 @@ func List(state *state.State, gateway *Gateway) ([]api.ClusterMember, error) { var err error var nodes []db.NodeInfo var offlineThreshold time.Duration + domains := map[string]string{} err = state.Cluster.Transaction(func(tx *db.ClusterTx) error { nodes, err = tx.GetNodes() if err != nil { - return err + return errors.Wrap(err, "Load nodes") } offlineThreshold, err = tx.GetNodeOfflineThreshold() if err != nil { - return err + return errors.Wrap(err, "Load offline threshold config") + } + + nodesDomains, err := tx.GetNodesFailureDomains() + if err != nil { + return errors.Wrap(err, "Load nodes failure domains") + } + + domainsNames, err := tx.GetFailureDomainsNames() + if err != nil { + return errors.Wrap(err, "Load failure domains names") + } + + for _, node := range nodes { + domainID := nodesDomains[node.Address] + if domainID != 0 { + domains[node.Address] = domainsNames[domainID] + } } return nil @@ -956,6 +974,7 @@ func List(state *state.State, gateway *Gateway) ([]api.ClusterMember, error) { if err != nil { return nil, err } + result[i].FailureDomain = domains[node.Address] if node.IsOffline(offlineThreshold) { result[i].Status = "Offline" From f44648765fb1271b3dc9955919473cf15af68e60 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 2 Jul 2020 12:23:22 +0200 Subject: [PATCH 07/10] lxd: Support changing failure domain in PUT /1.0/cluster/<node> Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- client/lxd_cluster.go | 5 +++++ lxd/api_cluster.go | 21 ++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/client/lxd_cluster.go b/client/lxd_cluster.go index 7922ce1149..f67940b64f 100644 --- a/client/lxd_cluster.go +++ b/client/lxd_cluster.go @@ -113,6 +113,11 @@ func (r *ProtocolLXD) UpdateClusterMember(name string, member api.ClusterMemberP if !r.HasExtension("clustering_edit_roles") { return fmt.Errorf("The server is missing the required \"clustering_edit_roles\" API extension") } + if member.FailureDomain != "" { + if !r.HasExtension("clustering_failure_domains") { + return fmt.Errorf("The server is missing the required \"clustering_failure_domains\" API extension") + } + } // Send the request _, _, err := r.query("PUT", fmt.Sprintf("/cluster/members/%s", name), member, ETag) diff --git a/lxd/api_cluster.go b/lxd/api_cluster.go index 4632cb8dbc..d8cfb0a74c 100644 --- a/lxd/api_cluster.go +++ b/lxd/api_cluster.go @@ -887,11 +887,17 @@ func clusterNodePut(d *Daemon, r *http.Request) response.Response { // Find the requested one. var current db.NodeInfo + var currentFailureDomain string var err error err = d.cluster.Transaction(func(tx *db.ClusterTx) error { current, err = tx.GetNodeByName(name) if err != nil { - return err + return errors.Wrap(err, "Load current node state") + } + + currentFailureDomain, err = tx.GetNodeFailureDomain(current.ID) + if err != nil { + return errors.Wrap(err, "Load current failure domain") } return nil @@ -901,7 +907,11 @@ func clusterNodePut(d *Daemon, r *http.Request) response.Response { } // Validate the request is fine - err = util.EtagCheck(r, current.Roles) + etag := []interface{}{ + current.Roles, + currentFailureDomain, + } + err = util.EtagCheck(r, etag) if err != nil { return response.PreconditionFailed(err) } @@ -932,7 +942,12 @@ func clusterNodePut(d *Daemon, r *http.Request) response.Response { err := tx.UpdateNodeRoles(current.ID, dbRoles) if err != nil { - return err + return errors.Wrap(err, "Update roles") + } + + err = tx.UpdateNodeFailureDomain(current.ID, req.FailureDomain) + if err != nil { + return errors.Wrap(err, "Update failure domain") } return nil From ebd2459a5b8244546ef597496288b62b5efe0d5f Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 2 Jul 2020 12:23:49 +0200 Subject: [PATCH 08/10] test: Add new clustering_failure_domains test case Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- test/main.sh | 1 + test/suites/clustering.sh | 91 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/test/main.sh b/test/main.sh index f51a0ee0eb..364b203af2 100755 --- a/test/main.sh +++ b/test/main.sh @@ -182,6 +182,7 @@ run_test test_clustering_recover "clustering recovery" run_test test_clustering_handover "clustering handover" run_test test_clustering_rebalance "clustering rebalance" run_test test_clustering_remove_raft_node "custering remove raft node" +run_test test_clustering_failure_domains "failure domains" # run_test test_clustering_upgrade "clustering upgrade" run_test test_projects_default "default project" run_test test_projects_crud "projects CRUD operations" diff --git a/test/suites/clustering.sh b/test/suites/clustering.sh index 49631e8d8f..240eee66d9 100644 --- a/test/suites/clustering.sh +++ b/test/suites/clustering.sh @@ -1901,3 +1901,94 @@ test_clustering_remove_raft_node() { kill_lxd "${LXD_THREE_DIR}" kill_lxd "${LXD_FOUR_DIR}" } + +test_clustering_failure_domains() { + # shellcheck disable=2039 + local LXD_DIR + + setup_clustering_bridge + prefix="lxd$$" + bridge="${prefix}" + + setup_clustering_netns 1 + LXD_ONE_DIR=$(mktemp -d -p "${TEST_DIR}" XXX) + chmod +x "${LXD_ONE_DIR}" + ns1="${prefix}1" + spawn_lxd_and_bootstrap_cluster "${ns1}" "${bridge}" "${LXD_ONE_DIR}" + + # Add a newline at the end of each line. YAML as weird rules.. + cert=$(sed ':a;N;$!ba;s/\n/\n\n/g' "${LXD_ONE_DIR}/server.crt") + + # Spawn a second node + setup_clustering_netns 2 + LXD_TWO_DIR=$(mktemp -d -p "${TEST_DIR}" XXX) + chmod +x "${LXD_TWO_DIR}" + ns2="${prefix}2" + spawn_lxd_and_join_cluster "${ns2}" "${bridge}" "${cert}" 2 1 "${LXD_TWO_DIR}" + + # Spawn a third node, using the non-leader node2 as join target. + setup_clustering_netns 3 + LXD_THREE_DIR=$(mktemp -d -p "${TEST_DIR}" XXX) + chmod +x "${LXD_THREE_DIR}" + ns3="${prefix}3" + spawn_lxd_and_join_cluster "${ns3}" "${bridge}" "${cert}" 3 2 "${LXD_THREE_DIR}" + + # Spawn a fourth node, this will be a non-database node. + setup_clustering_netns 4 + LXD_FOUR_DIR=$(mktemp -d -p "${TEST_DIR}" XXX) + chmod +x "${LXD_FOUR_DIR}" + ns4="${prefix}4" + spawn_lxd_and_join_cluster "${ns4}" "${bridge}" "${cert}" 4 1 "${LXD_FOUR_DIR}" + + # Spawn a fifth node, using non-database node4 as join target. + setup_clustering_netns 5 + LXD_FIVE_DIR=$(mktemp -d -p "${TEST_DIR}" XXX) + chmod +x "${LXD_FIVE_DIR}" + ns5="${prefix}5" + spawn_lxd_and_join_cluster "${ns5}" "${bridge}" "${cert}" 5 4 "${LXD_FIVE_DIR}" + + # Spawn a sixth node, using non-database node4 as join target. + setup_clustering_netns 6 + LXD_SIX_DIR=$(mktemp -d -p "${TEST_DIR}" XXX) + chmod +x "${LXD_SIX_DIR}" + ns6="${prefix}6" + spawn_lxd_and_join_cluster "${ns6}" "${bridge}" "${cert}" 6 4 "${LXD_SIX_DIR}" + + # Set failure domains + echo -e "roles: [\"database\"]\nfailure_domain: \"az1\"" | LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node1 + echo -e "roles: [\"database\"]\nfailure_domain: \"az2\"" | LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node2 + echo -e "roles: [\"database\"]\nfailure_domain: \"az3\"" | LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node3 + echo -e "roles: []\nfailure_domain: \"az1\"" | LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node4 + echo -e "roles: []\nfailure_domain: \"az2\"" | LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node5 + echo -e "roles: []\nfailure_domain: \"az3\"" | LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node6 + + # Shutdown a node in az2, its replacement is picked from az2. + LXD_DIR="${LXD_TWO_DIR}" lxd shutdown + sleep 3 + + LXD_DIR="${LXD_ONE_DIR}" lxc cluster show node2 | grep -q "database: false" + LXD_DIR="${LXD_ONE_DIR}" lxc cluster show node5 | grep -q "database: true" + + LXD_DIR="${LXD_SIX_DIR}" lxd shutdown + LXD_DIR="${LXD_FIVE_DIR}" lxd shutdown + LXD_DIR="${LXD_FOUR_DIR}" lxd shutdown + LXD_DIR="${LXD_THREE_DIR}" lxd shutdown + LXD_DIR="${LXD_ONE_DIR}" lxd shutdown + sleep 0.5 + rm -f "${LXD_SIX_DIR}/unix.socket" + rm -f "${LXD_FIVE_DIR}/unix.socket" + rm -f "${LXD_FOUR_DIR}/unix.socket" + rm -f "${LXD_THREE_DIR}/unix.socket" + rm -f "${LXD_TWO_DIR}/unix.socket" + rm -f "${LXD_ONE_DIR}/unix.socket" + + teardown_clustering_netns + teardown_clustering_bridge + + kill_lxd "${LXD_ONE_DIR}" + kill_lxd "${LXD_TWO_DIR}" + kill_lxd "${LXD_THREE_DIR}" + kill_lxd "${LXD_FOUR_DIR}" + kill_lxd "${LXD_FIVE_DIR}" + kill_lxd "${LXD_SIX_DIR}" +} From a0120ab3234bd80280356e8c6487df8b8dc026d1 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 2 Jul 2020 12:27:34 +0200 Subject: [PATCH 09/10] doc: Add documentation about failure domains Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- doc/clustering.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/clustering.md b/doc/clustering.md index cb90f4ed55..e7137350a1 100644 --- a/doc/clustering.md +++ b/doc/clustering.md @@ -217,6 +217,17 @@ transition to the Blocked state, until you upgrade the very last one. At that point the blocked nodes will notice that there is no out-of-date node left and will become operational again. +### Failure domains + +Failure domains can be used to indicate which nodes should be given preference +when trying to assign roles to a cluster member that has been shutdown or has +crashed. For example, if a cluster member that currently has the database role +gets shutdown, LXD will try to assign its database role to another cluster +member in the same failure domain, if one is available. + +To change the failure domain of a cluster member you can use the `lxc cluster +edit <member>` command line tool, or the `PUT /1.0/cluster/<member>` REST API. + ### Recover from quorum loss Every LXD cluster has up to 3 members that serve as database nodes. If you From b6d795a3555960e8d2871480b72485dbfa3ffa33 Mon Sep 17 00:00:00 2001 From: Free Ekanayaka <free.ekanay...@canonical.com> Date: Thu, 2 Jul 2020 12:33:42 +0200 Subject: [PATCH 10/10] lxc: Add failure domain column in "lxc cluster list" output Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com> --- lxc/cluster.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lxc/cluster.go b/lxc/cluster.go index eacd4ae36f..9d605990ce 100644 --- a/lxc/cluster.go +++ b/lxc/cluster.go @@ -123,7 +123,7 @@ func (c *cmdClusterList) Run(cmd *cobra.Command, args []string) error { if member.Database { database = "YES" } - line := []string{member.ServerName, member.URL, database, strings.ToUpper(member.Status), member.Message, member.Architecture} + line := []string{member.ServerName, member.URL, database, strings.ToUpper(member.Status), member.Message, member.Architecture, member.FailureDomain} data = append(data, line) } sort.Sort(byName(data)) @@ -135,6 +135,7 @@ func (c *cmdClusterList) Run(cmd *cobra.Command, args []string) error { i18n.G("STATE"), i18n.G("MESSAGE"), i18n.G("ARCHITECTURE"), + i18n.G("FAILURE DOMAIN"), } return utils.RenderTable(c.flagFormat, header, data, members)
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel