The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/6782
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) === If the VM start up fails for some reason, clean up devices added on the host so we don't leave stray devices around. Also, if failure occurs after VM process is launched, then actively kill VM process if `Start()` still fails to complete cleanly.
From b869287f958d4d53f1685a5f85a5d490dee98843 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 24 Jan 2020 17:31:30 +0000 Subject: [PATCH 1/2] lxd/instance/drivers/qmp/monitor: Prevent crashes with races closing closed channel Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/instance/drivers/qmp/monitor.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lxd/instance/drivers/qmp/monitor.go b/lxd/instance/drivers/qmp/monitor.go index 79a2692519..4b96312228 100644 --- a/lxd/instance/drivers/qmp/monitor.go +++ b/lxd/instance/drivers/qmp/monitor.go @@ -154,7 +154,9 @@ func (m *Monitor) Wait() (chan struct{}, error) { // Disconnect forces a disconnection from QEMU. func (m *Monitor) Disconnect() { // Stop all go routines and disconnect from socket. - close(m.chDisconnect) + if !m.disconnected { + close(m.chDisconnect) + } m.disconnected = true m.qmp.Disconnect() From a618982604575187f1a592783bfa5002fbbef817 Mon Sep 17 00:00:00 2001 From: Thomas Parrott <thomas.parr...@canonical.com> Date: Fri, 24 Jan 2020 17:32:06 +0000 Subject: [PATCH 2/2] lxd/instance/drivers/driver/qemu: Improve clean up on start failure Signed-off-by: Thomas Parrott <thomas.parr...@canonical.com> --- lxd/instance/drivers/driver_qemu.go | 54 +++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/lxd/instance/drivers/driver_qemu.go b/lxd/instance/drivers/driver_qemu.go index dd4196d9dc..ef0f5dc99b 100644 --- a/lxd/instance/drivers/driver_qemu.go +++ b/lxd/instance/drivers/driver_qemu.go @@ -378,7 +378,6 @@ func (vm *qemu) mount() (bool, error) { // unmount the instance's config volume if needed. func (vm *qemu) unmount() (bool, error) { - var pool storagePools.Pool pool, err := vm.getStoragePool() if err != nil { return false, err @@ -591,15 +590,13 @@ func (vm *qemu) Start(stateful bool) error { defer revert.Fail() // Mount the instance's config volume. - ourMount, err := vm.mount() + _, err = vm.mount() if err != nil { op.Done(err) return err } - if ourMount { - revert.Add(func() { vm.unmount() }) - } + revert.Add(func() { vm.unmount() }) err = vm.generateConfigShare() if err != nil { @@ -657,6 +654,16 @@ func (vm *qemu) Start(stateful bool) error { continue } + // Use a local function argument to ensure the current device is added to the reverter. + func(localDev deviceConfig.DeviceNamed) { + revert.Add(func() { + err := vm.deviceStop(localDev.Name, localDev.Config) + if err != nil { + logger.Errorf("Failed to cleanup device '%s': %v", localDev.Name, err) + } + }) + }(dev) + devConfs = append(devConfs, runConf) } @@ -714,11 +721,13 @@ func (vm *qemu) Start(stateful bool) error { err := filepath.Walk(filepath.Join(vm.Path(), "config"), func(path string, info os.FileInfo, err error) error { if err != nil { + op.Done(err) return err } err = os.Chown(path, vm.state.OS.UnprivUID, -1) if err != nil { + op.Done(err) return err } @@ -749,7 +758,9 @@ func (vm *qemu) Start(stateful bool) error { for _, file := range fdFiles { f, err := os.OpenFile(file, os.O_RDWR, 0) if err != nil { - return errors.Wrapf(err, "Error opening exta file %q", file) + err = errors.Wrapf(err, "Error opening exta file %q", file) + op.Done(err) + return err } defer f.Close() // Close file after qemu has started. cmd.ExtraFiles = append(cmd.ExtraFiles, f) @@ -762,6 +773,25 @@ func (vm *qemu) Start(stateful bool) error { return err } + pid, err := vm.pid() + if err != nil { + logger.Errorf(`Failed to get VM process ID "%d"`, pid) + return err + } + + revert.Add(func() { + proc, err := os.FindProcess(pid) + if err != nil { + logger.Errorf(`Failed to find VM process "%d"`, pid) + return + } + + proc.Kill() + if err != nil { + logger.Errorf(`Failed to kill VM process "%d"`, pid) + } + }) + // Start QMP monitoring. monitor, err := qmp.Connect(vm.getMonitorPath(), vm.getMonitorEventHandler()) if err != nil { @@ -781,13 +811,17 @@ func (vm *qemu) Start(stateful bool) error { // Record current state err = tx.ContainerSetState(vm.id, "RUNNING") if err != nil { - return errors.Wrap(err, "Error updating instance state") + err = errors.Wrap(err, "Error updating instance state") + op.Done(err) + return err } // Update time instance last started time err = tx.ContainerLastUsedUpdate(vm.id, time.Now().UTC()) if err != nil { - return errors.Wrap(err, "Error updating instance last used") + err = errors.Wrap(err, "Error updating instance last used") + op.Done(err) + return err } return nil @@ -797,9 +831,8 @@ func (vm *qemu) Start(stateful bool) error { return err } - vm.state.Events.SendLifecycle(vm.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil) - revert.Success() + vm.state.Events.SendLifecycle(vm.project, "virtual-machine-started", fmt.Sprintf("/1.0/virtual-machines/%s", vm.name), nil) return nil } @@ -938,7 +971,6 @@ func (vm *qemu) deviceStop(deviceName string, rawConfig deviceConfig.Device) err canHotPlug, _ := d.CanHotPlug() - // An empty netns path means we haven't been called from the LXC stop hook, so are running. if vm.IsRunning() && !canHotPlug { return fmt.Errorf("Device cannot be stopped when instance is running") }
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel