The following pull request was submitted through Github.
It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/6782

This e-mail was sent by the LXC bot, direct replies will not reach the author
unless they happen to be subscribed to this list.

=== Description (from pull-request) ===
If the VM start up fails for some reason, clean up devices added on the host so we don't leave stray devices around.

Also, if failure occurs after VM process is launched, then actively kill VM process if `Start()` still fails to complete cleanly.
From b869287f958d4d53f1685a5f85a5d490dee98843 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Fri, 24 Jan 2020 17:31:30 +0000
Subject: [PATCH 1/2] lxd/instance/drivers/qmp/monitor: Prevent crashes with
 races closing closed channel

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 lxd/instance/drivers/qmp/monitor.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lxd/instance/drivers/qmp/monitor.go 
b/lxd/instance/drivers/qmp/monitor.go
index 79a2692519..4b96312228 100644
--- a/lxd/instance/drivers/qmp/monitor.go
+++ b/lxd/instance/drivers/qmp/monitor.go
@@ -154,7 +154,9 @@ func (m *Monitor) Wait() (chan struct{}, error) {
 // Disconnect forces a disconnection from QEMU.
 func (m *Monitor) Disconnect() {
        // Stop all go routines and disconnect from socket.
-       close(m.chDisconnect)
+       if !m.disconnected {
+               close(m.chDisconnect)
+       }
        m.disconnected = true
        m.qmp.Disconnect()
 

From a618982604575187f1a592783bfa5002fbbef817 Mon Sep 17 00:00:00 2001
From: Thomas Parrott <thomas.parr...@canonical.com>
Date: Fri, 24 Jan 2020 17:32:06 +0000
Subject: [PATCH 2/2] lxd/instance/drivers/driver/qemu: Improve clean up on
 start failure

Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com>
---
 lxd/instance/drivers/driver_qemu.go | 54 +++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/lxd/instance/drivers/driver_qemu.go 
b/lxd/instance/drivers/driver_qemu.go
index dd4196d9dc..ef0f5dc99b 100644
--- a/lxd/instance/drivers/driver_qemu.go
+++ b/lxd/instance/drivers/driver_qemu.go
@@ -378,7 +378,6 @@ func (vm *qemu) mount() (bool, error) {
 
 // unmount the instance's config volume if needed.
 func (vm *qemu) unmount() (bool, error) {
-       var pool storagePools.Pool
        pool, err := vm.getStoragePool()
        if err != nil {
                return false, err
@@ -591,15 +590,13 @@ func (vm *qemu) Start(stateful bool) error {
        defer revert.Fail()
 
        // Mount the instance's config volume.
-       ourMount, err := vm.mount()
+       _, err = vm.mount()
        if err != nil {
                op.Done(err)
                return err
        }
 
-       if ourMount {
-               revert.Add(func() { vm.unmount() })
-       }
+       revert.Add(func() { vm.unmount() })
 
        err = vm.generateConfigShare()
        if err != nil {
@@ -657,6 +654,16 @@ func (vm *qemu) Start(stateful bool) error {
                        continue
                }
 
+               // Use a local function argument to ensure the current device 
is added to the reverter.
+               func(localDev deviceConfig.DeviceNamed) {
+                       revert.Add(func() {
+                               err := vm.deviceStop(localDev.Name, 
localDev.Config)
+                               if err != nil {
+                                       logger.Errorf("Failed to cleanup device 
'%s': %v", localDev.Name, err)
+                               }
+                       })
+               }(dev)
+
                devConfs = append(devConfs, runConf)
        }
 
@@ -714,11 +721,13 @@ func (vm *qemu) Start(stateful bool) error {
                err := filepath.Walk(filepath.Join(vm.Path(), "config"),
                        func(path string, info os.FileInfo, err error) error {
                                if err != nil {
+                                       op.Done(err)
                                        return err
                                }
 
                                err = os.Chown(path, vm.state.OS.UnprivUID, -1)
                                if err != nil {
+                                       op.Done(err)
                                        return err
                                }
 
@@ -749,7 +758,9 @@ func (vm *qemu) Start(stateful bool) error {
        for _, file := range fdFiles {
                f, err := os.OpenFile(file, os.O_RDWR, 0)
                if err != nil {
-                       return errors.Wrapf(err, "Error opening exta file %q", 
file)
+                       err = errors.Wrapf(err, "Error opening exta file %q", 
file)
+                       op.Done(err)
+                       return err
                }
                defer f.Close() // Close file after qemu has started.
                cmd.ExtraFiles = append(cmd.ExtraFiles, f)
@@ -762,6 +773,25 @@ func (vm *qemu) Start(stateful bool) error {
                return err
        }
 
+       pid, err := vm.pid()
+       if err != nil {
+               logger.Errorf(`Failed to get VM process ID "%d"`, pid)
+               return err
+       }
+
+       revert.Add(func() {
+               proc, err := os.FindProcess(pid)
+               if err != nil {
+                       logger.Errorf(`Failed to find VM process "%d"`, pid)
+                       return
+               }
+
+               proc.Kill()
+               if err != nil {
+                       logger.Errorf(`Failed to kill VM process "%d"`, pid)
+               }
+       })
+
        // Start QMP monitoring.
        monitor, err := qmp.Connect(vm.getMonitorPath(), 
vm.getMonitorEventHandler())
        if err != nil {
@@ -781,13 +811,17 @@ func (vm *qemu) Start(stateful bool) error {
                // Record current state
                err = tx.ContainerSetState(vm.id, "RUNNING")
                if err != nil {
-                       return errors.Wrap(err, "Error updating instance state")
+                       err = errors.Wrap(err, "Error updating instance state")
+                       op.Done(err)
+                       return err
                }
 
                // Update time instance last started time
                err = tx.ContainerLastUsedUpdate(vm.id, time.Now().UTC())
                if err != nil {
-                       return errors.Wrap(err, "Error updating instance last 
used")
+                       err = errors.Wrap(err, "Error updating instance last 
used")
+                       op.Done(err)
+                       return err
                }
 
                return nil
@@ -797,9 +831,8 @@ func (vm *qemu) Start(stateful bool) error {
                return err
        }
 
-       vm.state.Events.SendLifecycle(vm.project, "virtual-machine-started", 
fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil)
-
        revert.Success()
+       vm.state.Events.SendLifecycle(vm.project, "virtual-machine-started", 
fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil)
        return nil
 }
 
@@ -938,7 +971,6 @@ func (vm *qemu) deviceStop(deviceName string, rawConfig 
deviceConfig.Device) err
 
        canHotPlug, _ := d.CanHotPlug()
 
-       // An empty netns path means we haven't been called from the LXC stop 
hook, so are running.
        if vm.IsRunning() && !canHotPlug {
                return fmt.Errorf("Device cannot be stopped when instance is 
running")
        }
_______________________________________________
lxc-devel mailing list
lxc-devel@lists.linuxcontainers.org
http://lists.linuxcontainers.org/listinfo/lxc-devel

Reply via email to