The following pull request was submitted through Github.
It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/7608

This e-mail was sent by the LXC bot, direct replies will not reach the author
unless they happen to be subscribed to this list.

=== Description (from pull-request) ===

From 4eece934f4cefaff341d018bf1fedfb1b289e1c8 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanay...@canonical.com>
Date: Thu, 2 Jul 2020 10:00:44 +0200
Subject: [PATCH 01/10] lxd/db: Add failure_domains table and nodes column
 reference

Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com>
---
 lxd/db/cluster/schema.go |  8 +++++++-
 lxd/db/cluster/update.go | 20 ++++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/lxd/db/cluster/schema.go b/lxd/db/cluster/schema.go
index 201ed4e1eb..fb36056864 100644
--- a/lxd/db/cluster/schema.go
+++ b/lxd/db/cluster/schema.go
@@ -20,6 +20,11 @@ CREATE TABLE config (
     value TEXT,
     UNIQUE (key)
 );
+CREATE TABLE failure_domains (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    name TEXT NOT NULL,
+    UNIQUE (name)
+);
 CREATE TABLE "images" (
     id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
     fingerprint TEXT NOT NULL,
@@ -311,6 +316,7 @@ CREATE TABLE nodes (
     heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP,
     pending INTEGER NOT NULL DEFAULT 0,
     arch INTEGER NOT NULL DEFAULT 0 CHECK (arch > 0),
+    failure_domain_id INTEGER DEFAULT NULL REFERENCES failure_domains (id) ON 
DELETE SET NULL,
     UNIQUE (name),
     UNIQUE (address)
 );
@@ -565,5 +571,5 @@ CREATE TABLE storage_volumes_snapshots_config (
     UNIQUE (storage_volume_snapshot_id, key)
 );
 
-INSERT INTO schema (version, updated_at) VALUES (31, strftime("%s"))
+INSERT INTO schema (version, updated_at) VALUES (32, strftime("%s"))
 `
diff --git a/lxd/db/cluster/update.go b/lxd/db/cluster/update.go
index b6d5d4b1fe..758b569a96 100644
--- a/lxd/db/cluster/update.go
+++ b/lxd/db/cluster/update.go
@@ -68,6 +68,26 @@ var updates = map[int]schema.Update{
        29: updateFromV28,
        30: updateFromV29,
        31: updateFromV30,
+       32: updateFromV31,
+}
+
+// Add failure_domain column to nodes table.
+func updateFromV31(tx *sql.Tx) error {
+       stmts := `
+CREATE TABLE failure_domains (
+    id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+    name TEXT NOT NULL,
+    UNIQUE (name)
+);
+ALTER TABLE nodes
+ ADD COLUMN failure_domain_id INTEGER DEFAULT NULL REFERENCES failure_domains 
(id) ON DELETE SET NULL;
+`
+       _, err := tx.Exec(stmts)
+       if err != nil {
+               return err
+       }
+
+       return nil
 }
 
 // Add content type field to storage volumes

From c945c182b16941212e9de3e3b502ff0e975d78eb Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanay...@canonical.com>
Date: Thu, 2 Jul 2020 10:35:29 +0200
Subject: [PATCH 02/10] lxd/db: Add UpdateNodeFailureDomain() and
 GetNodesFailureDomains()

Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com>
---
 lxd/db/node.go      | 128 +++++++++++++++++++++++++++++++++++++++++++-
 lxd/db/node_test.go |  20 +++++++
 2 files changed, 147 insertions(+), 1 deletion(-)

diff --git a/lxd/db/node.go b/lxd/db/node.go
index e0a2c30f41..ec4aad41d3 100644
--- a/lxd/db/node.go
+++ b/lxd/db/node.go
@@ -3,6 +3,7 @@
 package db
 
 import (
+       "database/sql"
        "fmt"
        "strconv"
        "strings"
@@ -228,7 +229,7 @@ func (c *ClusterTx) RenameNode(old, new string) error {
 // Nodes returns all LXD nodes part of the cluster.
 func (c *ClusterTx) nodes(pending bool, where string, args ...interface{}) 
([]NodeInfo, error) {
        // Get node roles
-       sql := "SELECT node_id, role FROM nodes_roles;"
+       sql := "SELECT node_id, role FROM nodes_roles"
 
        nodeRoles := map[int64][]string{}
        rows, err := c.tx.Query(sql)
@@ -456,6 +457,131 @@ func (c *ClusterTx) UpdateNodeRoles(id int64, roles 
[]ClusterRole) error {
        return nil
 }
 
+// UpdateNodeFailureDomain changes the failure domain of a node.
+func (c *ClusterTx) UpdateNodeFailureDomain(id int64, domain string) error {
+       var domainID interface{}
+
+       if domain == "" {
+               domainID = nil
+       } else {
+               row := c.tx.QueryRow("SELECT id FROM failure_domains WHERE 
name=?", domain)
+               err := row.Scan(&domainID)
+               if err != nil {
+                       if err != sql.ErrNoRows {
+                               return errors.Wrapf(err, "Load failure domain 
name")
+                       }
+                       result, err := c.tx.Exec("INSERT INTO failure_domains 
(name) VALUES (?)", domain)
+                       if err != nil {
+                               return errors.Wrapf(err, "Create new failure 
domain")
+                       }
+                       domainID, err = result.LastInsertId()
+                       if err != nil {
+                               return errors.Wrapf(err, "Get last inserted ID")
+                       }
+               }
+       }
+
+       result, err := c.tx.Exec("UPDATE nodes SET failure_domain_id=? WHERE 
id=?", domainID, id)
+       if err != nil {
+               return err
+       }
+       n, err := result.RowsAffected()
+       if err != nil {
+               return err
+       }
+       if n != 1 {
+               return fmt.Errorf("Query updated %d rows instead of 1", n)
+       }
+
+       return nil
+}
+
+// GetNodeFailureDomain returns the failure domain associated with the node 
with the given ID.
+func (c *ClusterTx) GetNodeFailureDomain(id int64) (string, error) {
+       stmt := `
+SELECT coalesce(failure_domains.name,'')
+  FROM nodes LEFT JOIN failure_domains ON nodes.failure_domain_id = 
failure_domains.id
+ WHERE nodes.id=?
+`
+       var domain string
+
+       err := c.tx.QueryRow(stmt, id).Scan(&domain)
+       if err != nil {
+               return "", err
+       }
+       return domain, nil
+}
+
+// GetNodesFailureDomains returns a map associating each node address with its
+// failure domain code.
+func (c *ClusterTx) GetNodesFailureDomains() (map[string]uint64, error) {
+       stmt, err := c.tx.Prepare("SELECT address, coalesce(failure_domain_id, 
0) FROM nodes")
+       if err != nil {
+               return nil, err
+       }
+
+       rows := []struct {
+               Address         string
+               FailureDomainID int64
+       }{}
+
+       dest := func(i int) []interface{} {
+               rows = append(rows, struct {
+                       Address         string
+                       FailureDomainID int64
+               }{})
+               return []interface{}{&rows[len(rows)-1].Address, 
&rows[len(rows)-1].FailureDomainID}
+       }
+
+       err = query.SelectObjects(stmt, dest)
+       if err != nil {
+               return nil, err
+       }
+
+       domains := map[string]uint64{}
+
+       for _, row := range rows {
+               domains[row.Address] = uint64(row.FailureDomainID)
+       }
+
+       return domains, nil
+}
+
+// GetFailureDomainsNames return a map associating failure domain IDs to their
+// names.
+func (c *ClusterTx) GetFailureDomainsNames() (map[uint64]string, error) {
+       stmt, err := c.tx.Prepare("SELECT id, name FROM failure_domains")
+       if err != nil {
+               return nil, err
+       }
+
+       rows := []struct {
+               ID   int64
+               Name string
+       }{}
+
+       dest := func(i int) []interface{} {
+               rows = append(rows, struct {
+                       ID   int64
+                       Name string
+               }{})
+               return []interface{}{&rows[len(rows)-1].ID, 
&rows[len(rows)-1].Name}
+       }
+
+       err = query.SelectObjects(stmt, dest)
+       if err != nil {
+               return nil, err
+       }
+
+       domains := map[uint64]string{}
+
+       for _, row := range rows {
+               domains[uint64(row.ID)] = row.Name
+       }
+
+       return domains, nil
+}
+
 // RemoveNode removes the node with the given id.
 func (c *ClusterTx) RemoveNode(id int64) error {
        result, err := c.tx.Exec("DELETE FROM nodes WHERE id=?", id)
diff --git a/lxd/db/node_test.go b/lxd/db/node_test.go
index 99c1c47f34..866053255f 100644
--- a/lxd/db/node_test.go
+++ b/lxd/db/node_test.go
@@ -391,3 +391,23 @@ INSERT INTO instances (id, node_id, name, architecture, 
type, project_id) VALUES
        require.NoError(t, err)
        assert.Equal(t, "none", name)
 }
+
+func TestUpdateNodeFailureDomain(t *testing.T) {
+       tx, cleanup := db.NewTestClusterTx(t)
+       defer cleanup()
+
+       id, err := tx.CreateNode("buzz", "1.2.3.4:666")
+       require.NoError(t, err)
+
+       assert.NoError(t, tx.UpdateNodeFailureDomain(id, "foo"))
+
+       domains, err := tx.GetNodesFailureDomains()
+       require.NoError(t, err)
+       assert.Equal(t, map[string]uint64{"0.0.0.0": 0, "1.2.3.4:666": 1}, 
domains)
+
+       assert.NoError(t, tx.UpdateNodeFailureDomain(id, ""))
+
+       domains, err = tx.GetNodesFailureDomains()
+       require.NoError(t, err)
+       assert.Equal(t, map[string]uint64{"0.0.0.0": 0, "1.2.3.4:666": 0}, 
domains)
+}

From 197a19fc1d6895a839fda8d7b9f36a55bedd12ae Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanay...@canonical.com>
Date: Thu, 2 Jul 2020 10:39:42 +0200
Subject: [PATCH 03/10] lxd/cluster: Honor failure domains when changing roles

Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com>
---
 lxd/cluster/membership.go | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go
index 978d8877d1..135f3417cb 100644
--- a/lxd/cluster/membership.go
+++ b/lxd/cluster/membership.go
@@ -824,6 +824,8 @@ func Handover(state *state.State, gateway *Gateway, address 
string) (string, []d
 func newRolesChanges(state *state.State, gateway *Gateway, nodes 
[]db.RaftNode) (*app.RolesChanges, error) {
        var maxVoters int
        var maxStandBy int
+       var domains map[string]uint64
+
        err := state.Cluster.Transaction(func(tx *db.ClusterTx) error {
                config, err := ConfigLoad(tx)
                if err != nil {
@@ -831,6 +833,12 @@ func newRolesChanges(state *state.State, gateway *Gateway, 
nodes []db.RaftNode)
                }
                maxVoters = int(config.MaxVoters())
                maxStandBy = int(config.MaxStandBy())
+
+               domains, err = tx.GetNodesFailureDomains()
+               if err != nil {
+                       return errors.Wrap(err, "Load failure domains")
+               }
+
                return nil
        })
        if err != nil {
@@ -841,7 +849,9 @@ func newRolesChanges(state *state.State, gateway *Gateway, 
nodes []db.RaftNode)
 
        for _, node := range nodes {
                if HasConnectivity(gateway.cert, node.Address) {
-                       cluster[node] = &client.NodeMetadata{}
+                       cluster[node] = &client.NodeMetadata{
+                               FailureDomain: domains[node.Address],
+                       }
                } else {
                        cluster[node] = nil
                }

From 68e5d4006f61446d56dd4592d524296365520954 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanay...@canonical.com>
Date: Thu, 2 Jul 2020 12:20:58 +0200
Subject: [PATCH 04/10] shared/version: Add clustering_failure_domains
 extension

Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com>
---
 doc/api-extensions.md | 5 +++++
 shared/version/api.go | 1 +
 2 files changed, 6 insertions(+)

diff --git a/doc/api-extensions.md b/doc/api-extensions.md
index 9a34d7871e..ea699c5d7f 100644
--- a/doc/api-extensions.md
+++ b/doc/api-extensions.md
@@ -1093,3 +1093,8 @@ The 5 entities that have UsedBy are:
 
 This adds support for creating and attaching custom block volumes to instances.
 It introduces the new `--type` flag when creating custom storage volumes, and 
accepts the values `fs` and `block`.
+
+## clustering\_failure\_domains
+
+This extension adds a new `failure_domain` field to the `PUT 
/1.0/cluster/<node>` API,
+which can be used to set the failure domain of a node.
diff --git a/shared/version/api.go b/shared/version/api.go
index 6b5a9909a9..52744904a4 100644
--- a/shared/version/api.go
+++ b/shared/version/api.go
@@ -216,6 +216,7 @@ var APIExtensions = []string{
        "network_state_bond_bridge",
        "usedby_consistency",
        "custom_block_volumes",
+       "clustering_failure_domains",
 }
 
 // APIExtensionsCount returns the number of available API extensions.

From 3ea751dde65009cf6576b45eb70db896fa79b544 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanay...@canonical.com>
Date: Thu, 2 Jul 2020 12:21:47 +0200
Subject: [PATCH 05/10] shared/api: Add FailureDomain field to ClusterMemberPut

Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com>
---
 shared/api/cluster.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/shared/api/cluster.go b/shared/api/cluster.go
index 42808d98c6..96c8d60909 100644
--- a/shared/api/cluster.go
+++ b/shared/api/cluster.go
@@ -75,4 +75,7 @@ type ClusterMemberPut struct {
 
        // API extension: clustering_architecture
        Architecture string `json:"architecture" yaml:"architecture"`
+
+       // API extension: clustering_failure_domains
+       FailureDomain string `json:"failure_domain" yaml:"failure_domain"`
 }

From cb8b33f7b5a8fdc21757e0a1cdac3a6549c6306c Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanay...@canonical.com>
Date: Thu, 2 Jul 2020 12:22:40 +0200
Subject: [PATCH 06/10] lxd/cluster: Populate FailureDomain field when listing
 cluster members

Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com>
---
 lxd/cluster/membership.go | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/lxd/cluster/membership.go b/lxd/cluster/membership.go
index 135f3417cb..3c475deff3 100644
--- a/lxd/cluster/membership.go
+++ b/lxd/cluster/membership.go
@@ -898,16 +898,34 @@ func List(state *state.State, gateway *Gateway) 
([]api.ClusterMember, error) {
        var err error
        var nodes []db.NodeInfo
        var offlineThreshold time.Duration
+       domains := map[string]string{}
 
        err = state.Cluster.Transaction(func(tx *db.ClusterTx) error {
                nodes, err = tx.GetNodes()
                if err != nil {
-                       return err
+                       return errors.Wrap(err, "Load nodes")
                }
 
                offlineThreshold, err = tx.GetNodeOfflineThreshold()
                if err != nil {
-                       return err
+                       return errors.Wrap(err, "Load offline threshold config")
+               }
+
+               nodesDomains, err := tx.GetNodesFailureDomains()
+               if err != nil {
+                       return errors.Wrap(err, "Load nodes failure domains")
+               }
+
+               domainsNames, err := tx.GetFailureDomainsNames()
+               if err != nil {
+                       return errors.Wrap(err, "Load failure domains names")
+               }
+
+               for _, node := range nodes {
+                       domainID := nodesDomains[node.Address]
+                       if domainID != 0 {
+                               domains[node.Address] = domainsNames[domainID]
+                       }
                }
 
                return nil
@@ -956,6 +974,7 @@ func List(state *state.State, gateway *Gateway) 
([]api.ClusterMember, error) {
                if err != nil {
                        return nil, err
                }
+               result[i].FailureDomain = domains[node.Address]
 
                if node.IsOffline(offlineThreshold) {
                        result[i].Status = "Offline"

From f44648765fb1271b3dc9955919473cf15af68e60 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanay...@canonical.com>
Date: Thu, 2 Jul 2020 12:23:22 +0200
Subject: [PATCH 07/10] lxd: Support changing failure domain in PUT
 /1.0/cluster/<node>

Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com>
---
 client/lxd_cluster.go |  5 +++++
 lxd/api_cluster.go    | 21 ++++++++++++++++++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/client/lxd_cluster.go b/client/lxd_cluster.go
index 7922ce1149..f67940b64f 100644
--- a/client/lxd_cluster.go
+++ b/client/lxd_cluster.go
@@ -113,6 +113,11 @@ func (r *ProtocolLXD) UpdateClusterMember(name string, 
member api.ClusterMemberP
        if !r.HasExtension("clustering_edit_roles") {
                return fmt.Errorf("The server is missing the required 
\"clustering_edit_roles\" API extension")
        }
+       if member.FailureDomain != "" {
+               if !r.HasExtension("clustering_failure_domains") {
+                       return fmt.Errorf("The server is missing the required 
\"clustering_failure_domains\" API extension")
+               }
+       }
 
        // Send the request
        _, _, err := r.query("PUT", fmt.Sprintf("/cluster/members/%s", name), 
member, ETag)
diff --git a/lxd/api_cluster.go b/lxd/api_cluster.go
index 4632cb8dbc..d8cfb0a74c 100644
--- a/lxd/api_cluster.go
+++ b/lxd/api_cluster.go
@@ -887,11 +887,17 @@ func clusterNodePut(d *Daemon, r *http.Request) 
response.Response {
 
        // Find the requested one.
        var current db.NodeInfo
+       var currentFailureDomain string
        var err error
        err = d.cluster.Transaction(func(tx *db.ClusterTx) error {
                current, err = tx.GetNodeByName(name)
                if err != nil {
-                       return err
+                       return errors.Wrap(err, "Load current node state")
+               }
+
+               currentFailureDomain, err = tx.GetNodeFailureDomain(current.ID)
+               if err != nil {
+                       return errors.Wrap(err, "Load current failure domain")
                }
 
                return nil
@@ -901,7 +907,11 @@ func clusterNodePut(d *Daemon, r *http.Request) 
response.Response {
        }
 
        // Validate the request is fine
-       err = util.EtagCheck(r, current.Roles)
+       etag := []interface{}{
+               current.Roles,
+               currentFailureDomain,
+       }
+       err = util.EtagCheck(r, etag)
        if err != nil {
                return response.PreconditionFailed(err)
        }
@@ -932,7 +942,12 @@ func clusterNodePut(d *Daemon, r *http.Request) 
response.Response {
 
                err := tx.UpdateNodeRoles(current.ID, dbRoles)
                if err != nil {
-                       return err
+                       return errors.Wrap(err, "Update roles")
+               }
+
+               err = tx.UpdateNodeFailureDomain(current.ID, req.FailureDomain)
+               if err != nil {
+                       return errors.Wrap(err, "Update failure domain")
                }
 
                return nil

From ebd2459a5b8244546ef597496288b62b5efe0d5f Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanay...@canonical.com>
Date: Thu, 2 Jul 2020 12:23:49 +0200
Subject: [PATCH 08/10] test: Add new clustering_failure_domains test case

Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com>
---
 test/main.sh              |  1 +
 test/suites/clustering.sh | 91 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)

diff --git a/test/main.sh b/test/main.sh
index f51a0ee0eb..364b203af2 100755
--- a/test/main.sh
+++ b/test/main.sh
@@ -182,6 +182,7 @@ run_test test_clustering_recover "clustering recovery"
 run_test test_clustering_handover "clustering handover"
 run_test test_clustering_rebalance "clustering rebalance"
 run_test test_clustering_remove_raft_node "custering remove raft node"
+run_test test_clustering_failure_domains "failure domains"
 # run_test test_clustering_upgrade "clustering upgrade"
 run_test test_projects_default "default project"
 run_test test_projects_crud "projects CRUD operations"
diff --git a/test/suites/clustering.sh b/test/suites/clustering.sh
index 49631e8d8f..240eee66d9 100644
--- a/test/suites/clustering.sh
+++ b/test/suites/clustering.sh
@@ -1901,3 +1901,94 @@ test_clustering_remove_raft_node() {
   kill_lxd "${LXD_THREE_DIR}"
   kill_lxd "${LXD_FOUR_DIR}"
 }
+
+test_clustering_failure_domains() {
+  # shellcheck disable=2039
+  local LXD_DIR
+
+  setup_clustering_bridge
+  prefix="lxd$$"
+  bridge="${prefix}"
+
+  setup_clustering_netns 1
+  LXD_ONE_DIR=$(mktemp -d -p "${TEST_DIR}" XXX)
+  chmod +x "${LXD_ONE_DIR}"
+  ns1="${prefix}1"
+  spawn_lxd_and_bootstrap_cluster "${ns1}" "${bridge}" "${LXD_ONE_DIR}"
+
+  # Add a newline at the end of each line. YAML as weird rules..
+  cert=$(sed ':a;N;$!ba;s/\n/\n\n/g' "${LXD_ONE_DIR}/server.crt")
+
+  # Spawn a second node
+  setup_clustering_netns 2
+  LXD_TWO_DIR=$(mktemp -d -p "${TEST_DIR}" XXX)
+  chmod +x "${LXD_TWO_DIR}"
+  ns2="${prefix}2"
+  spawn_lxd_and_join_cluster "${ns2}" "${bridge}" "${cert}" 2 1 
"${LXD_TWO_DIR}"
+
+  # Spawn a third node, using the non-leader node2 as join target.
+  setup_clustering_netns 3
+  LXD_THREE_DIR=$(mktemp -d -p "${TEST_DIR}" XXX)
+  chmod +x "${LXD_THREE_DIR}"
+  ns3="${prefix}3"
+  spawn_lxd_and_join_cluster "${ns3}" "${bridge}" "${cert}" 3 2 
"${LXD_THREE_DIR}"
+
+  # Spawn a fourth node, this will be a non-database node.
+  setup_clustering_netns 4
+  LXD_FOUR_DIR=$(mktemp -d -p "${TEST_DIR}" XXX)
+  chmod +x "${LXD_FOUR_DIR}"
+  ns4="${prefix}4"
+  spawn_lxd_and_join_cluster "${ns4}" "${bridge}" "${cert}" 4 1 
"${LXD_FOUR_DIR}"
+
+  # Spawn a fifth node, using non-database node4 as join target.
+  setup_clustering_netns 5
+  LXD_FIVE_DIR=$(mktemp -d -p "${TEST_DIR}" XXX)
+  chmod +x "${LXD_FIVE_DIR}"
+  ns5="${prefix}5"
+  spawn_lxd_and_join_cluster "${ns5}" "${bridge}" "${cert}" 5 4 
"${LXD_FIVE_DIR}"
+
+  # Spawn a sixth node, using non-database node4 as join target.
+  setup_clustering_netns 6
+  LXD_SIX_DIR=$(mktemp -d -p "${TEST_DIR}" XXX)
+  chmod +x "${LXD_SIX_DIR}"
+  ns6="${prefix}6"
+  spawn_lxd_and_join_cluster "${ns6}" "${bridge}" "${cert}" 6 4 
"${LXD_SIX_DIR}"
+
+  # Set failure domains
+  echo -e "roles: [\"database\"]\nfailure_domain: \"az1\"" | 
LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node1
+  echo -e "roles: [\"database\"]\nfailure_domain: \"az2\"" | 
LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node2
+  echo -e "roles: [\"database\"]\nfailure_domain: \"az3\"" | 
LXD_DIR="${LXD_THREE_DIR}" lxc cluster edit node3
+  echo -e "roles: []\nfailure_domain: \"az1\"" | LXD_DIR="${LXD_THREE_DIR}" 
lxc cluster edit node4
+  echo -e "roles: []\nfailure_domain: \"az2\"" | LXD_DIR="${LXD_THREE_DIR}" 
lxc cluster edit node5
+  echo -e "roles: []\nfailure_domain: \"az3\"" | LXD_DIR="${LXD_THREE_DIR}" 
lxc cluster edit node6
+
+  # Shutdown a node in az2, its replacement is picked from az2.
+  LXD_DIR="${LXD_TWO_DIR}" lxd shutdown
+  sleep 3
+
+  LXD_DIR="${LXD_ONE_DIR}" lxc cluster show node2 | grep -q "database: false"
+  LXD_DIR="${LXD_ONE_DIR}" lxc cluster show node5 | grep -q "database: true"
+
+  LXD_DIR="${LXD_SIX_DIR}" lxd shutdown
+  LXD_DIR="${LXD_FIVE_DIR}" lxd shutdown
+  LXD_DIR="${LXD_FOUR_DIR}" lxd shutdown
+  LXD_DIR="${LXD_THREE_DIR}" lxd shutdown
+  LXD_DIR="${LXD_ONE_DIR}" lxd shutdown
+  sleep 0.5
+  rm -f "${LXD_SIX_DIR}/unix.socket"
+  rm -f "${LXD_FIVE_DIR}/unix.socket"
+  rm -f "${LXD_FOUR_DIR}/unix.socket"
+  rm -f "${LXD_THREE_DIR}/unix.socket"
+  rm -f "${LXD_TWO_DIR}/unix.socket"
+  rm -f "${LXD_ONE_DIR}/unix.socket"
+
+  teardown_clustering_netns
+  teardown_clustering_bridge
+
+  kill_lxd "${LXD_ONE_DIR}"
+  kill_lxd "${LXD_TWO_DIR}"
+  kill_lxd "${LXD_THREE_DIR}"
+  kill_lxd "${LXD_FOUR_DIR}"
+  kill_lxd "${LXD_FIVE_DIR}"
+  kill_lxd "${LXD_SIX_DIR}"
+}

From a0120ab3234bd80280356e8c6487df8b8dc026d1 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanay...@canonical.com>
Date: Thu, 2 Jul 2020 12:27:34 +0200
Subject: [PATCH 09/10] doc: Add documentation about failure domains

Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com>
---
 doc/clustering.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/doc/clustering.md b/doc/clustering.md
index cb90f4ed55..e7137350a1 100644
--- a/doc/clustering.md
+++ b/doc/clustering.md
@@ -217,6 +217,17 @@ transition to the Blocked state, until you upgrade the 
very last
 one. At that point the blocked nodes will notice that there is no
 out-of-date node left and will become operational again.
 
+### Failure domains
+
+Failure domains can be used to indicate which nodes should be given preference
+when trying to assign roles to a cluster member that has been shutdown or has
+crashed. For example, if a cluster member that currently has the database role
+gets shutdown, LXD will try to assign its database role to another cluster
+member in the same failure domain, if one is available.
+
+To change the failure domain of a cluster member you can use the `lxc cluster
+edit <member>` command line tool, or the `PUT /1.0/cluster/<member>` REST API.
+
 ### Recover from quorum loss
 
 Every LXD cluster has up to 3 members that serve as database nodes. If you

From b6d795a3555960e8d2871480b72485dbfa3ffa33 Mon Sep 17 00:00:00 2001
From: Free Ekanayaka <free.ekanay...@canonical.com>
Date: Thu, 2 Jul 2020 12:33:42 +0200
Subject: [PATCH 10/10] lxc: Add failure domain column in "lxc cluster list"
 output

Signed-off-by: Free Ekanayaka <free.ekanay...@canonical.com>
---
 lxc/cluster.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lxc/cluster.go b/lxc/cluster.go
index eacd4ae36f..9d605990ce 100644
--- a/lxc/cluster.go
+++ b/lxc/cluster.go
@@ -123,7 +123,7 @@ func (c *cmdClusterList) Run(cmd *cobra.Command, args 
[]string) error {
                if member.Database {
                        database = "YES"
                }
-               line := []string{member.ServerName, member.URL, database, 
strings.ToUpper(member.Status), member.Message, member.Architecture}
+               line := []string{member.ServerName, member.URL, database, 
strings.ToUpper(member.Status), member.Message, member.Architecture, 
member.FailureDomain}
                data = append(data, line)
        }
        sort.Sort(byName(data))
@@ -135,6 +135,7 @@ func (c *cmdClusterList) Run(cmd *cobra.Command, args 
[]string) error {
                i18n.G("STATE"),
                i18n.G("MESSAGE"),
                i18n.G("ARCHITECTURE"),
+               i18n.G("FAILURE DOMAIN"),
        }
 
        return utils.RenderTable(c.flagFormat, header, data, members)
_______________________________________________
lxc-devel mailing list
lxc-devel@lists.linuxcontainers.org
http://lists.linuxcontainers.org/listinfo/lxc-devel

Reply via email to