The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/8242
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === - If a network was successfully "created" (using `n.Create()`) on a local node, but then failed to start (using `n.Start()`) then it was possible for setup done in `n.Create()` to be left behind because although `n.Delete()` was called on failure, the node status was still Pending and so the tear down was not performed. - To cope with this, and to better align with storage pool state management, I've moved the DB record deletion and cluster notification logic into the API route handler function, leaving the network package's `Delete()` function to always tear down local setup. - This allows the API route handler functions to decide for themselves (using `n.LocalStatus()`) whether it is appropriate to call `n.Delete()` depending on the scenario.
From bc7e0525ef8f585bbcda22d3a4bb160ae124b7e6 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 11 Dec 2020 14:43:56 +0000 Subject: [PATCH 1/8] lxd/network/network/interface: Adds Project function Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/network/network_interface.go | 1 + 1 file changed, 1 insertion(+) diff --git a/lxd/network/network_interface.go b/lxd/network/network_interface.go index 021cd88198..af8c8afcbe 100644 --- a/lxd/network/network_interface.go +++ b/lxd/network/network_interface.go @@ -31,6 +31,7 @@ type Network interface { Validate(config map[string]string) error ID() int64 Name() string + Project() string Description() string Status() string LocalStatus() string From 4b48e17ab725d8bf598b1bcf75021e46ffeedb3e Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 11 Dec 2020 14:44:14 +0000 Subject: [PATCH 2/8] lxd/network/driver/common: Adds Project function Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/network/driver_common.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lxd/network/driver_common.go b/lxd/network/driver_common.go index 03f4b9ab7d..c17bf1d8d6 100644 --- a/lxd/network/driver_common.go +++ b/lxd/network/driver_common.go @@ -130,6 +130,11 @@ func (n *common) Name() string { return n.name } +// Project returns the network project. +func (n *common) Project() string { + return n.project +} + // Description returns the network description. func (n *common) Description() string { return n.description From 40defa5130a86dedd14ec111ccd6bf0667519fc8 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 11 Dec 2020 14:44:35 +0000 Subject: [PATCH 3/8] lxd/network/driver/common: Remove cluster notification and DB record removal from delete() function We need more control over when we generate notifications and remove DB records, so this is being moved into the API route handler function (networkDelete()). This also aligns better with storage pools, where the notifications and DB record removal is also handled by API route handler function (storagePoolDelete()). Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/network/driver_common.go | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/lxd/network/driver_common.go b/lxd/network/driver_common.go index c17bf1d8d6..2c9138f1c1 100644 --- a/lxd/network/driver_common.go +++ b/lxd/network/driver_common.go @@ -368,34 +368,16 @@ func (n *common) rename(newName string) error { // delete the network from the database if clusterNotification is false. func (n *common) delete(clientType request.ClientType) error { - // Only delete database record if not cluster notification. - if clientType != request.ClientTypeNotifier { - // Notify all other nodes. If any node is down, an error will be returned. - notifier, err := cluster.NewNotifier(n.state, n.state.Endpoints.NetworkCert(), cluster.NotifyAll) - if err != nil { - return err - } - err = notifier(func(client lxd.InstanceServer) error { - return client.UseProject(n.project).DeleteNetwork(n.name) - }) - if err != nil { - return err - } - - // Remove the network from the database. - err = n.state.Cluster.DeleteNetwork(n.project, n.name) - if err != nil { - return err - } - - n.lifecycle("deleted", nil) - } - // Cleanup storage. if shared.PathExists(shared.VarPath("networks", n.name)) { os.RemoveAll(shared.VarPath("networks", n.name)) } + // Generate lifecycle event if not notification. + if clientType != request.ClientTypeNotifier { + n.lifecycle("deleted", nil) + } + return nil } From 4872518ed2b8a21197392e4445d84707aaa65484 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 11 Dec 2020 14:46:54 +0000 Subject: [PATCH 4/8] lxd/network/driver: Always delete when requested, ignore LocalStatus() pending This is now up to caller to decide whether it is appropriate to delete or not (using n.LocalStatus()). This is needed because if the network fails to start as part of a create operation on a local node we need to delete it, however it will still be in pending state and so would have previously been ignored. Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/network/driver_bridge.go | 19 ++++---- lxd/network/driver_macvlan.go | 1 + lxd/network/driver_ovn.go | 88 +++++++++++++++++----------------- lxd/network/driver_physical.go | 8 ++-- lxd/network/driver_sriov.go | 1 + 5 files changed, 56 insertions(+), 61 deletions(-) diff --git a/lxd/network/driver_bridge.go b/lxd/network/driver_bridge.go index 6f469f85e8..a733e5afc1 100644 --- a/lxd/network/driver_bridge.go +++ b/lxd/network/driver_bridge.go @@ -436,22 +436,19 @@ func (n *bridge) isRunning() bool { func (n *bridge) Delete(clientType request.ClientType) error { n.logger.Debug("Delete", log.Ctx{"clientType": clientType}) - // Bring the local network down if created on this node. - if n.LocalStatus() == api.NetworkStatusCreated || n.LocalStatus() == api.NetworkStatusUnknown { - if n.isRunning() { - err := n.Stop() - if err != nil { - return err - } - } - - // Delete apparmor profiles. - err := apparmor.NetworkDelete(n.state, n) + if n.isRunning() { + err := n.Stop() if err != nil { return err } } + // Delete apparmor profiles. + err := apparmor.NetworkDelete(n.state, n) + if err != nil { + return err + } + return n.common.delete(clientType) } diff --git a/lxd/network/driver_macvlan.go b/lxd/network/driver_macvlan.go index 60eac1a092..bba6d06369 100644 --- a/lxd/network/driver_macvlan.go +++ b/lxd/network/driver_macvlan.go @@ -45,6 +45,7 @@ func (n *macvlan) Validate(config map[string]string) error { // Delete deletes a network. func (n *macvlan) Delete(clientType request.ClientType) error { n.logger.Debug("Delete", log.Ctx{"clientType": clientType}) + return n.common.delete(clientType) } diff --git a/lxd/network/driver_ovn.go b/lxd/network/driver_ovn.go index 7b64b778c7..4d7a6a3851 100644 --- a/lxd/network/driver_ovn.go +++ b/lxd/network/driver_ovn.go @@ -1808,63 +1808,61 @@ func (n *ovn) deleteChassisGroupEntry() error { func (n *ovn) Delete(clientType request.ClientType) error { n.logger.Debug("Delete", log.Ctx{"clientType": clientType}) - if n.LocalStatus() == api.NetworkStatusCreated || n.LocalStatus() == api.NetworkStatusUnknown { - err := n.Stop() + err := n.Stop() + if err != nil { + return err + } + + if clientType == request.ClientTypeNormal { + client, err := n.getClient() if err != nil { return err } - if clientType == request.ClientTypeNormal { - client, err := n.getClient() - if err != nil { - return err - } - - err = client.LogicalRouterDelete(n.getRouterName()) - if err != nil { - return err - } + err = client.LogicalRouterDelete(n.getRouterName()) + if err != nil { + return err + } - err = client.LogicalSwitchDelete(n.getExtSwitchName()) - if err != nil { - return err - } + err = client.LogicalSwitchDelete(n.getExtSwitchName()) + if err != nil { + return err + } - err = client.LogicalSwitchDelete(n.getIntSwitchName()) - if err != nil { - return err - } + err = client.LogicalSwitchDelete(n.getIntSwitchName()) + if err != nil { + return err + } - err = client.LogicalRouterPortDelete(n.getRouterExtPortName()) - if err != nil { - return err - } + err = client.LogicalRouterPortDelete(n.getRouterExtPortName()) + if err != nil { + return err + } - err = client.LogicalRouterPortDelete(n.getRouterIntPortName()) - if err != nil { - return err - } + err = client.LogicalRouterPortDelete(n.getRouterIntPortName()) + if err != nil { + return err + } - err = client.LogicalSwitchPortDelete(n.getExtSwitchRouterPortName()) - if err != nil { - return err - } + err = client.LogicalSwitchPortDelete(n.getExtSwitchRouterPortName()) + if err != nil { + return err + } - err = client.LogicalSwitchPortDelete(n.getExtSwitchProviderPortName()) - if err != nil { - return err - } + err = client.LogicalSwitchPortDelete(n.getExtSwitchProviderPortName()) + if err != nil { + return err + } - err = client.LogicalSwitchPortDelete(n.getIntSwitchRouterPortName()) - if err != nil { - return err - } + err = client.LogicalSwitchPortDelete(n.getIntSwitchRouterPortName()) + if err != nil { + return err + } - // Must be done after logical router removal. - err = client.ChassisGroupDelete(n.getChassisGroupName()) - if err != nil { - return err - } + // Must be done after logical router removal. + err = client.ChassisGroupDelete(n.getChassisGroupName()) + if err != nil { + return err } } diff --git a/lxd/network/driver_physical.go b/lxd/network/driver_physical.go index 6cf8bd31e1..18fcdc3c8f 100644 --- a/lxd/network/driver_physical.go +++ b/lxd/network/driver_physical.go @@ -123,11 +123,9 @@ func (n *physical) Create(clientType request.ClientType) error { func (n *physical) Delete(clientType request.ClientType) error { n.logger.Debug("Delete", log.Ctx{"clientType": clientType}) - if n.LocalStatus() == api.NetworkStatusCreated || n.LocalStatus() == api.NetworkStatusUnknown { - err := n.Stop() - if err != nil { - return err - } + err := n.Stop() + if err != nil { + return err } return n.common.delete(clientType) diff --git a/lxd/network/driver_sriov.go b/lxd/network/driver_sriov.go index 988e1ea1cc..2300a573c5 100644 --- a/lxd/network/driver_sriov.go +++ b/lxd/network/driver_sriov.go @@ -45,6 +45,7 @@ func (n *sriov) Validate(config map[string]string) error { // Delete deletes a network. func (n *sriov) Delete(clientType request.ClientType) error { n.logger.Debug("Delete", log.Ctx{"clientType": clientType}) + return n.common.delete(clientType) } From 8485fc2937ded3135e4272df869d4c7f3eb203db Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 11 Dec 2020 14:49:28 +0000 Subject: [PATCH 5/8] lxc/networks: Remove revert removal on failure of clustered network in networksPost We need to leave network around in pending state so errors can be corrected. Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/networks.go | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/lxd/networks.go b/lxd/networks.go index c4e2e82b66..2f4a0b92c1 100644 --- a/lxd/networks.go +++ b/lxd/networks.go @@ -207,9 +207,6 @@ func networksPost(d *Daemon, r *http.Request) response.Response { return resp } - revert := revert.New() - defer revert.Fail() - targetNode := queryParam(r, "target") if targetNode != "" { if !netTypeInfo.NodeSpecificConfig { @@ -246,10 +243,6 @@ func networksPost(d *Daemon, r *http.Request) response.Response { if count > 1 { // Simulate adding pending node network config when the driver doesn't support per-node config. if !netTypeInfo.NodeSpecificConfig && clientType != request.ClientTypeJoiner { - revert.Add(func() { - d.cluster.DeleteNetwork(projectName, req.Name) - }) - // Create pending entry for each node. err = d.cluster.Transaction(func(tx *db.ClusterTx) error { nodes, err := tx.GetNodes() @@ -276,11 +269,13 @@ func networksPost(d *Daemon, r *http.Request) response.Response { return response.SmartError(err) } - revert.Success() return resp } // Non-clustered network creation. + revert := revert.New() + defer revert.Fail() + networks, err := d.cluster.GetNetworks(projectName) if err != nil { return response.InternalError(err) @@ -301,9 +296,7 @@ func networksPost(d *Daemon, r *http.Request) response.Response { if err != nil { return response.SmartError(errors.Wrapf(err, "Error inserting %q into database", req.Name)) } - revert.Add(func() { - d.cluster.DeleteNetwork(projectName, req.Name) - }) + revert.Add(func() { d.cluster.DeleteNetwork(projectName, req.Name) }) err = doNetworksCreate(d, projectName, req, clientType) if err != nil { From 0de956cd8e7ace5af4b454d8210c62ad5d25d2e6 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 11 Dec 2020 14:50:48 +0000 Subject: [PATCH 6/8] lxd/networks: Allow re-create of pending network when pending nodes already exist in networksPost This is needed as some network types (ovn) auto create the pending node records for each node without using the --target argument, Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/networks.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lxd/networks.go b/lxd/networks.go index 2f4a0b92c1..03b068fad8 100644 --- a/lxd/networks.go +++ b/lxd/networks.go @@ -252,7 +252,7 @@ func networksPost(d *Daemon, r *http.Request) response.Response { for _, node := range nodes { err = tx.CreatePendingNetwork(node.Name, projectName, req.Name, netType.DBType(), req.Config) - if err != nil { + if err != nil && errors.Cause(err) != db.ErrAlreadyDefined { return errors.Wrapf(err, "Failed creating pending network for node %q", node.Name) } } From 5b23efe39ab897bcbea474d3abbb5623d0f25236 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 11 Dec 2020 14:51:42 +0000 Subject: [PATCH 7/8] lxd/networks: Adds revert to doNetworksCreate Brings into line with storagePoolCreateLocal. Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/networks.go | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/lxd/networks.go b/lxd/networks.go index 03b068fad8..2426e00d2d 100644 --- a/lxd/networks.go +++ b/lxd/networks.go @@ -429,6 +429,9 @@ func networksPostCluster(d *Daemon, projectName string, req api.NetworksPost, cl // Create the network on the system. The clusterNotification flag is used to indicate whether creation request // is coming from a cluster notification (and if so we should not delete the database record on error). func doNetworksCreate(d *Daemon, projectName string, req api.NetworksPost, clientType request.ClientType) error { + revert := revert.New() + defer revert.Fail() + // Start the network. n, err := network.LoadByName(d.State(), projectName, req.Name) if err != nil { @@ -452,16 +455,13 @@ func doNetworksCreate(d *Daemon, projectName string, req api.NetworksPost, clien return err } + revert.Add(func() { n.Delete(clientType) }) + // Only start networks when not doing a cluster pre-join phase (this ensures that networks are only started // once the node has fully joined the clustered database and has consistent config with rest of the nodes). if clientType != request.ClientTypeJoiner { err = n.Start() if err != nil { - delErr := n.Delete(clientType) - if delErr != nil { - logger.Error("Failed clearing up network after failed create", log.Ctx{"project": projectName, "network": n.Name(), "err": delErr}) - } - return err } } @@ -471,14 +471,11 @@ func doNetworksCreate(d *Daemon, projectName string, req api.NetworksPost, clien return tx.NetworkNodeCreated(n.ID()) }) if err != nil { - delErr := n.Delete(clientType) - if delErr != nil { - logger.Error("Failed clearing up network after failed local status update", log.Ctx{"project": projectName, "network": n.Name(), "err": delErr}) - } return err } logger.Debug("Marked network local status as created", log.Ctx{"project": projectName, "network": req.Name}) + revert.Success() return nil } From 0d413c57563eb91ac002458dc2b536dd0d5cded3 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 11 Dec 2020 14:52:25 +0000 Subject: [PATCH 8/8] lxd/networks: Moves cluster notification an DB record removal into networkDelete This is so that only specific requests to delete the network trigger full removal, whereas using n.Delete() in a revert during create only deletes the local network setup, not the DB record. Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/networks.go | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/lxd/networks.go b/lxd/networks.go index 2426e00d2d..daf238facf 100644 --- a/lxd/networks.go +++ b/lxd/networks.go @@ -616,8 +616,39 @@ func networkDelete(d *Daemon, r *http.Request) response.Response { } } - // Delete the network from each member. - err = n.Delete(clientType) + if n.LocalStatus() != api.NetworkStatusPending { + err = n.Delete(clientType) + if err != nil { + return response.InternalError(err) + } + } + + // If this is a cluster notification, we're done, any database work will be done by the node that is + // originally serving the request. + if clusterNotification { + return response.EmptySyncResponse + } + + // If we are clustered, also notify all other nodes, if any. + clustered, err := cluster.Enabled(d.db) + if err != nil { + return response.SmartError(err) + } + if clustered { + notifier, err := cluster.NewNotifier(d.State(), d.endpoints.NetworkCert(), cluster.NotifyAll) + if err != nil { + return response.SmartError(err) + } + err = notifier(func(client lxd.InstanceServer) error { + return client.UseProject(n.Project()).DeleteNetwork(n.Name()) + }) + if err != nil { + return response.SmartError(err) + } + } + + // Remove the network from the database. + err = d.State().Cluster.DeleteNetwork(n.Project(), n.Name()) if err != nil { return response.SmartError(err) }
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel