The following pull request was submitted through Github. It can be accessed and reviewed at: https://github.com/lxc/lxd/pull/1587
This e-mail was sent by the LXC bot, direct replies will not reach the author unless they happen to be subscribed to this list. === Description (from pull-request) ===
From 12197138f641d10ea02200f90207117ca2f0e517 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgra...@ubuntu.com> Date: Fri, 5 Feb 2016 09:11:44 +0100 Subject: [PATCH 1/5] Make blkio limits more robust MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only apply I/O limits when set by the user, ignore failure to find a block device when clearing its limit, have everything else return errors. Closes #1568 Signed-off-by: Stéphane Graber <stgra...@ubuntu.com> --- lxd/container_lxc.go | 141 +++++++++++++++++++++++++++++++++------------------ lxd/devices.go | 66 ++++++++++++++++-------- 2 files changed, 136 insertions(+), 71 deletions(-) diff --git a/lxd/container_lxc.go b/lxd/container_lxc.go index 325bb41..073b9b7 100644 --- a/lxd/container_lxc.go +++ b/lxd/container_lxc.go @@ -563,37 +563,51 @@ func (c *containerLXC) initLXC() error { } } - diskLimits, err := c.getDiskLimits() - if err != nil { - return err + hasDiskLimits := false + for _, m := range c.expandedDevices { + if m["type"] != "disk" { + continue + } + + if m["limits.read"] != "" || m["limits.write"] != "" || m["limits.max"] != "" { + hasDiskLimits = true + break + } } - for block, limit := range diskLimits { - if limit.readBps > 0 { - err = lxcSetConfigItem(cc, "lxc.cgroup.blkio.throttle.read_bps_device", fmt.Sprintf("%s %d", block, limit.readBps)) - if err != nil { - return err - } + if hasDiskLimits { + diskLimits, err := c.getDiskLimits() + if err != nil { + return err } - if limit.readIops > 0 { - err = lxcSetConfigItem(cc, "lxc.cgroup.blkio.throttle.read_iops_device", fmt.Sprintf("%s %d", block, limit.readIops)) - if err != nil { - return err + for block, limit := range diskLimits { + if limit.readBps > 0 { + err = lxcSetConfigItem(cc, "lxc.cgroup.blkio.throttle.read_bps_device", fmt.Sprintf("%s %d", block, limit.readBps)) + if err != nil { + return err + } } - } - if limit.writeBps > 0 { - err = lxcSetConfigItem(cc, "lxc.cgroup.blkio.throttle.write_bps_device", fmt.Sprintf("%s %d", block, limit.writeBps)) - if err != nil { - return err + if limit.readIops > 0 { + err = lxcSetConfigItem(cc, "lxc.cgroup.blkio.throttle.read_iops_device", fmt.Sprintf("%s %d", block, limit.readIops)) + if err != nil { + return err + } } - } - if limit.writeIops > 0 { - err = lxcSetConfigItem(cc, "lxc.cgroup.blkio.throttle.write_iops_device", fmt.Sprintf("%s %d", block, limit.writeIops)) - if err != nil { - return err + if limit.writeBps > 0 { + err = lxcSetConfigItem(cc, "lxc.cgroup.blkio.throttle.write_bps_device", fmt.Sprintf("%s %d", block, limit.writeBps)) + if err != nil { + return err + } + } + + if limit.writeIops > 0 { + err = lxcSetConfigItem(cc, "lxc.cgroup.blkio.throttle.write_iops_device", fmt.Sprintf("%s %d", block, limit.writeIops)) + if err != nil { + return err + } } } } @@ -3479,6 +3493,33 @@ func (c *containerLXC) removeDiskDevices() error { func (c *containerLXC) getDiskLimits() (map[string]deviceBlockLimit, error) { result := map[string]deviceBlockLimit{} + // Build a list of all valid block devices + validBlocks := []string{} + + dents, err := ioutil.ReadDir("/sys/class/block/") + if err != nil { + return nil, err + } + + for _, f := range dents { + fPath := filepath.Join("/sys/class/block/", f.Name()) + if shared.PathExists(fmt.Sprintf("%s/partition", fPath)) { + continue + } + + if !shared.PathExists(fmt.Sprintf("%s/dev", fPath)) { + continue + } + + block, err := ioutil.ReadFile(fmt.Sprintf("%s/dev", fPath)) + if err != nil { + return nil, err + } + + validBlocks = append(validBlocks, strings.TrimSuffix(string(block), "\n")) + } + + // Process all the limits blockLimits := map[string][]deviceBlockLimit{} for _, m := range c.expandedDevices { if m["type"] != "disk" { @@ -3491,48 +3532,49 @@ func (c *containerLXC) getDiskLimits() (map[string]deviceBlockLimit, error) { m["limits.write"] = m["limits.max"] } + // Parse the user input + readBps, readIops, writeBps, writeIops, err := deviceParseDiskLimit(m["limits.read"], m["limits.write"]) + if err != nil { + return nil, err + } + + // Set the source path source := m["source"] if m["path"] == "" { source = c.RootfsPath() } + // Get the backing block devices (major:minor) blocks, err := deviceGetParentBlocks(source) if err != nil { - return nil, err + if readBps == 0 && readIops == 0 && writeBps == 0 && writeIops == 0 { + // If the device doesn't exist, there is no limit to clear so ignore the failure + continue + } else { + return nil, err + } } - readBps, readIops, writeBps, writeIops, err := deviceParseDiskLimit(m["limits.read"], m["limits.write"]) - if err != nil { - return nil, err - } device := deviceBlockLimit{readBps: readBps, readIops: readIops, writeBps: writeBps, writeIops: writeIops} - for _, block := range blocks { - dev := strings.TrimPrefix(block, "/dev/") - - if strings.Contains(dev, "/") { - continue - } + blockStr := "" - if !shared.PathExists(fmt.Sprintf("/sys/class/block/%s/dev", dev)) { - return nil, fmt.Errorf("Disk %s is missing /sys/class/block entry", dev) - } - - block, err := ioutil.ReadFile(fmt.Sprintf("/sys/class/block/%s/dev", dev)) - if err != nil { - return nil, err - } - - fields := strings.SplitN(strings.TrimSuffix(string(block), "\n"), ":", 2) - if len(fields) != 2 { - return nil, fmt.Errorf("Invalid major:minor: %s", block) + if shared.StringInSlice(block, validBlocks) { + // Straightforward entry (full block device) + blockStr = block + } else { + // Attempt to deal with a partition (guess its parent) + fields := strings.SplitN(block, ":", 2) + fields[1] = "0" + if shared.StringInSlice(fmt.Sprintf("%s:%s", fields[0], fields[1]), validBlocks) { + blockStr = fmt.Sprintf("%s:%s", fields[0], fields[1]) + } } - if shared.PathExists(fmt.Sprintf("/sys/class/block/%s/partition", dev)) { - fields[1] = "0" + if blockStr == "" { + return nil, fmt.Errorf("Block device doesn't support quotas: %s", block) } - blockStr := fmt.Sprintf("%s:%s", fields[0], fields[1]) if blockLimits[blockStr] == nil { blockLimits[blockStr] = []deviceBlockLimit{} } @@ -3540,6 +3582,7 @@ func (c *containerLXC) getDiskLimits() (map[string]deviceBlockLimit, error) { } } + // Average duplicate limits for block, limits := range blockLimits { var readBpsCount, readBpsTotal, readIopsCount, readIopsTotal, writeBpsCount, writeBpsTotal, writeIopsCount, writeIopsTotal int64 diff --git a/lxd/devices.go b/lxd/devices.go index 93b7a40..9c4bd64 100644 --- a/lxd/devices.go +++ b/lxd/devices.go @@ -737,6 +737,7 @@ func deviceTotalMemory() (int64, error) { func deviceGetParentBlocks(path string) ([]string, error) { var devices []string + var device []string // Expand the mount path absPath, err := filepath.Abs(path) @@ -757,7 +758,6 @@ func deviceGetParentBlocks(path string) ([]string, error) { defer file.Close() scanner := bufio.NewScanner(file) - device := "" match := "" for scanner.Scan() { line := scanner.Text() @@ -774,23 +774,29 @@ func deviceGetParentBlocks(path string) ([]string, error) { match = rows[4] // Go backward to avoid problems with optional fields - device = rows[len(rows)-2] + device = []string{rows[2], rows[len(rows)-2]} } - if device == "" { + if device == nil { return nil, fmt.Errorf("Couldn't find a match /proc/self/mountinfo entry") } + // Handle the most simple case + if !strings.HasPrefix(device[0], "0:") { + return []string{device[0]}, nil + } + // Deal with per-filesystem oddities. We don't care about failures here // because any non-special filesystem => directory backend. fs, _ := filesystemDetect(expPath) if fs == "zfs" && shared.PathExists("/dev/zfs") { - poolName := strings.Split(device, "/")[0] + // Accessible zfs filesystems + poolName := strings.Split(device[1], "/")[0] output, err := exec.Command("zpool", "status", poolName).CombinedOutput() if err != nil { - return nil, fmt.Errorf("Failed to query zfs filesystem information for %s: %s", device, output) + return nil, fmt.Errorf("Failed to query zfs filesystem information for %s: %s", device[1], output) } for _, line := range strings.Split(string(output), "\n") { @@ -803,9 +809,10 @@ func deviceGetParentBlocks(path string) ([]string, error) { continue } + var path string if shared.PathExists(fields[0]) { if shared.IsBlockdevPath(fields[0]) { - devices = append(devices, fields[0]) + path = fields[0] } else { subDevices, err := deviceGetParentBlocks(fields[0]) if err != nil { @@ -817,17 +824,27 @@ func deviceGetParentBlocks(path string) ([]string, error) { } } } else if shared.PathExists(fmt.Sprintf("/dev/%s", fields[0])) { - devices = append(devices, fmt.Sprintf("/dev/%s", fields[0])) + path = fmt.Sprintf("/dev/%s", fields[0]) } else if shared.PathExists(fmt.Sprintf("/dev/disk/by-id/%s", fields[0])) { - devices = append(devices, fmt.Sprintf("/dev/disk/by-id/%s", fields[0])) + path = fmt.Sprintf("/dev/disk/by-id/%s", fields[0]) } else { - continue + return nil, fmt.Errorf("Unsupported zfs backing device: %s", fields[0]) + } + + if path != "" { + _, major, minor, err := deviceGetAttributes(fields[len(fields)-1]) + if err != nil { + return nil, err + } + + devices = append(devices, fmt.Sprintf("%d:%d", major, minor)) } } - } else if fs == "btrfs" && shared.PathExists(device) { - output, err := exec.Command("btrfs", "filesystem", "show", device).CombinedOutput() + } else if fs == "btrfs" && shared.PathExists(device[1]) { + // Accessible btrfs filesystems + output, err := exec.Command("btrfs", "filesystem", "show", device[1]).CombinedOutput() if err != nil { - return nil, fmt.Errorf("Failed to query btrfs filesystem information for %s: %s", device, output) + return nil, fmt.Errorf("Failed to query btrfs filesystem information for %s: %s", device[1], output) } for _, line := range strings.Split(string(output), "\n") { @@ -836,18 +853,23 @@ func deviceGetParentBlocks(path string) ([]string, error) { continue } - devices = append(devices, fields[len(fields)-1]) - } - } else if shared.PathExists(device) { - devices = append(devices, device) - } + _, major, minor, err := deviceGetAttributes(fields[len(fields)-1]) + if err != nil { + return nil, err + } - // Expand the device paths - for i, dev := range devices { - target, err := filepath.EvalSymlinks(dev) - if err == nil { - devices[i] = target + devices = append(devices, fmt.Sprintf("%d:%d", major, minor)) } + } else if shared.PathExists(device[1]) { + // Anything else with a valid path + _, major, minor, err := deviceGetAttributes(device[1]) + if err != nil { + return nil, err + } + + devices = append(devices, fmt.Sprintf("%d:%d", major, minor)) + } else { + return nil, fmt.Errorf("Invalid block device: %s", device[1]) } return devices, nil From 4a15a0918a33ee0be5689e42f941d33e33c55550 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgra...@ubuntu.com> Date: Tue, 9 Feb 2016 20:49:43 -0500 Subject: [PATCH 2/5] Change ShiftIfNecessary to shift on startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #1585 Signed-off-by: Stéphane Graber <stgra...@ubuntu.com> --- lxd/storage.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/lxd/storage.go b/lxd/storage.go index f3e4c9a..cb99710 100644 --- a/lxd/storage.go +++ b/lxd/storage.go @@ -1,6 +1,7 @@ package main import ( + "encoding/json" "fmt" "os" "os/exec" @@ -535,11 +536,19 @@ func ShiftIfNecessary(container container, srcIdmap *shared.IdmapSet) error { } if !reflect.DeepEqual(srcIdmap, dstIdmap) { - if err := srcIdmap.UnshiftRootfs(container.Path()); err != nil { - return err + var jsonIdmap string + if srcIdmap != nil { + idmapBytes, err := json.Marshal(srcIdmap.Idmap) + if err != nil { + return err + } + jsonIdmap = string(idmapBytes) + } else { + jsonIdmap = "[]" } - if err := dstIdmap.ShiftRootfs(container.Path()); err != nil { + err := container.ConfigKeySet("volatile.last_state.idmap", jsonIdmap) + if err != nil { return err } } From cd92657a0ad30eec410944b427ec18f337f6629f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgra...@ubuntu.com> Date: Tue, 9 Feb 2016 16:09:19 -0500 Subject: [PATCH 3/5] Add btrfs send/receive support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #1176 Signed-off-by: Stéphane Graber <stgra...@ubuntu.com> --- lxd/storage_btrfs.go | 231 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 227 insertions(+), 4 deletions(-) diff --git a/lxd/storage_btrfs.go b/lxd/storage_btrfs.go index 7e2207d..0aa522d 100644 --- a/lxd/storage_btrfs.go +++ b/lxd/storage_btrfs.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "io/ioutil" "os" "os/exec" "path" @@ -10,6 +11,7 @@ import ( "syscall" "github.com/gorilla/websocket" + "github.com/pborman/uuid" "github.com/lxc/lxd/shared" @@ -766,14 +768,235 @@ func (s *storageBtrfs) getSubVolumes(path string) ([]string, error) { return result, nil } +type btrfsMigrationSource struct { + lxdName string + deleteAfterSending bool + btrfsPath string + btrfsParent string + + btrfs *storageBtrfs +} + +func (s btrfsMigrationSource) Name() string { + return s.lxdName +} + +func (s btrfsMigrationSource) IsSnapshot() bool { + return !s.deleteAfterSending +} + +func (s btrfsMigrationSource) Send(conn *websocket.Conn) error { + args := []string{"send", s.btrfsPath} + if s.btrfsParent != "" { + args = append(args, "-p", s.btrfsParent) + } + + cmd := exec.Command("btrfs", args...) + + deleteAfterSending := func(path string) { + s.btrfs.subvolsDelete(path) + os.Remove(filepath.Dir(path)) + } + + stdout, err := cmd.StdoutPipe() + if err != nil { + if s.deleteAfterSending { + deleteAfterSending(s.btrfsPath) + } + return err + } + + stderr, err := cmd.StderrPipe() + if err != nil { + if s.deleteAfterSending { + deleteAfterSending(s.btrfsPath) + } + return err + } + + if err := cmd.Start(); err != nil { + if s.deleteAfterSending { + deleteAfterSending(s.btrfsPath) + } + return err + } + + <-shared.WebsocketSendStream(conn, stdout) + + output, err := ioutil.ReadAll(stderr) + if err != nil { + shared.Log.Error("problem reading btrfs send stderr", "err", err) + } + + err = cmd.Wait() + if err != nil { + shared.Log.Error("problem with btrfs send", "output", string(output)) + } + if s.deleteAfterSending { + deleteAfterSending(s.btrfsPath) + } + return err +} + func (s *storageBtrfs) MigrationType() MigrationFSType { - return MigrationFSType_RSYNC + if runningInUserns { + return MigrationFSType_RSYNC + } else { + return MigrationFSType_BTRFS + } } -func (s *storageBtrfs) MigrationSource(container container) ([]MigrationStorageSource, error) { - return rsyncMigrationSource(container) +func (s *storageBtrfs) MigrationSource(c container) ([]MigrationStorageSource, error) { + if runningInUserns { + return rsyncMigrationSource(c) + } + + sources := []MigrationStorageSource{} + + /* If the container is a snapshot, let's just send that; we don't need + * to send anything else, because that's all the user asked for. + */ + if c.IsSnapshot() { + sources = append(sources, btrfsMigrationSource{c.Name(), false, c.Path(), "", s}) + return sources, nil + } + + /* List all the snapshots in order of reverse creation. The idea here + * is that we send the oldest to newest snapshot, hopefully saving on + * xfer costs. Then, after all that, we send the container itself. + */ + snapshots, err := c.Snapshots() + if err != nil { + return nil, err + } + + for i, snap := range snapshots { + var prev container + if i > 0 { + prev = snapshots[i-1] + } + + btrfsPath := snap.Path() + parentName := "" + if prev != nil { + parentName = prev.Path() + } + + sources = append(sources, btrfsMigrationSource{snap.Name(), false, btrfsPath, parentName, s}) + } + + /* We can't send running fses, so let's snapshot the fs and send + * the snapshot. + */ + + tmpPath := containerPath(fmt.Sprintf("%s/.migration-send-%s", c.Name(), uuid.NewRandom().String()), true) + err = os.MkdirAll(tmpPath, 0700) + if err != nil { + return nil, err + } + + btrfsPath := fmt.Sprintf("%s/.root", tmpPath) + if err := s.subvolSnapshot(c.Path(), btrfsPath, true); err != nil { + return nil, err + } + + btrfsParent := "" + if len(sources) > 0 { + btrfsParent = sources[len(sources)-1].(btrfsMigrationSource).btrfsPath + } + + sources = append(sources, btrfsMigrationSource{c.Name(), true, btrfsPath, btrfsParent, s}) + + return sources, nil } func (s *storageBtrfs) MigrationSink(container container, snapshots []container, conn *websocket.Conn) error { - return rsyncMigrationSink(container, snapshots, conn) + if runningInUserns { + return rsyncMigrationSink(container, snapshots, conn) + } + + cName := container.Name() + + snapshotsPath := shared.VarPath(fmt.Sprintf("snapshots/%s", cName)) + if !shared.PathExists(snapshotsPath) { + err := os.MkdirAll(shared.VarPath(fmt.Sprintf("snapshots/%s", cName)), 0700) + if err != nil { + return err + } + } + + btrfsRecv := func(btrfsPath string, targetPath string, issnapshot bool) error { + args := []string{"receive", "-e", btrfsPath} + cmd := exec.Command("btrfs", args...) + + // Remove the existing pre-created subvolume + err := s.subvolsDelete(targetPath) + if err != nil { + return err + } + + stdin, err := cmd.StdinPipe() + if err != nil { + return err + } + + stderr, err := cmd.StderrPipe() + if err != nil { + return err + } + + if err := cmd.Start(); err != nil { + return err + } + + <-shared.WebsocketRecvStream(stdin, conn) + + output, err := ioutil.ReadAll(stderr) + if err != nil { + shared.Debugf("problem reading btrfs receive stderr %s", err) + } + + err = cmd.Wait() + if err != nil { + shared.Log.Error("problem with btrfs receive", log.Ctx{"output": string(output)}) + return err + } + + if !issnapshot { + err := s.subvolSnapshot(containerPath(fmt.Sprintf("%s/.root", cName), true), targetPath, false) + if err != nil { + shared.Log.Error("problem with btrfs snapshot", log.Ctx{"err": err}) + return err + } + + err = s.subvolsDelete(containerPath(fmt.Sprintf("%s/.root", cName), true)) + if err != nil { + shared.Log.Error("problem with btrfs delete", log.Ctx{"err": err}) + return err + } + } + + return nil + } + + for _, snap := range snapshots { + if err := btrfsRecv(containerPath(cName, true), snap.Path(), true); err != nil { + return err + } + } + + /* finally, do the real container */ + if err := btrfsRecv(containerPath(cName, true), container.Path(), false); err != nil { + return err + } + + // Cleanup + if ok, _ := shared.PathIsEmpty(snapshotsPath); ok { + err := os.Remove(snapshotsPath) + if err != nil { + return err + } + } + + return nil } From 498c5d441c31a089ba27ccc9dcf3f7ae9834123d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgra...@ubuntu.com> Date: Tue, 9 Feb 2016 21:29:01 -0500 Subject: [PATCH 4/5] Implement migration fallback to rsync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber <stgra...@ubuntu.com> --- lxd/migrate.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lxd/migrate.go b/lxd/migrate.go index 946d1be..b653bea 100644 --- a/lxd/migrate.go +++ b/lxd/migrate.go @@ -313,11 +313,11 @@ func (s *migrationSourceWs) Do(op *operation) error { return err } - // TODO: actually fall back on rsync. if *header.Fs != myType { - err := fmt.Errorf("mismatched storage types not supported yet") - s.sendControl(err) - return err + myType = MigrationFSType_RSYNC + header.Fs = &myType + + sources, _ = rsyncMigrationSource(s.container) } if s.live { @@ -490,15 +490,20 @@ func (c *migrationSink) do() error { if !c.live { criuType = nil } + + mySink := c.container.Storage().MigrationSink myType := c.container.Storage().MigrationType() resp := MigrationHeader{ Fs: &myType, Criu: criuType, } + // If the storage type the source has doesn't match what we have, then // we have to use rsync. if *header.Fs != *resp.Fs { - resp.Fs = MigrationFSType_RSYNC.Enum() + mySink = rsyncMigrationSink + myType = MigrationFSType_RSYNC + resp.Fs = &myType } if err := c.send(&resp); err != nil { @@ -593,7 +598,7 @@ func (c *migrationSink) do() error { srcIdmap.Idmap = shared.Extend(srcIdmap.Idmap, e) } - if err := c.container.Storage().MigrationSink(c.container, snapshots, c.fsConn); err != nil { + if err := mySink(c.container, snapshots, c.fsConn); err != nil { restore <- err c.sendControl(err) return From 8538acf138a5fd97e7fd9a83bebd381aa18e19a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgra...@ubuntu.com> Date: Tue, 9 Feb 2016 21:40:03 -0500 Subject: [PATCH 5/5] Fix migration of snapshots using rsync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Stéphane Graber <stgra...@ubuntu.com> --- lxd/storage.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lxd/storage.go b/lxd/storage.go index cb99710..a77bd6e 100644 --- a/lxd/storage.go +++ b/lxd/storage.go @@ -596,6 +596,13 @@ func rsyncMigrationSink(container container, snapshots []container, conn *websoc return err } + if len(snapshots) > 0 { + err := os.MkdirAll(shared.VarPath(fmt.Sprintf("snapshots/%s", container.Name())), 0700) + if err != nil { + return err + } + } + for _, snap := range snapshots { if err := RsyncRecv(shared.AddSlash(snap.Path()), conn); err != nil { return err
_______________________________________________ lxc-devel mailing list lxc-devel@lists.linuxcontainers.org http://lists.linuxcontainers.org/listinfo/lxc-devel