1. commonize waitpid users to use a single helper. We frequently want to run something in a clean namespace, or fork off a script. This lets us keep the function doing fork:(1)exec(2)waitpid simpler.
2. start a blockdev backend implementation. This will be used for mounting, copying, and snapshotting container filesystems. 3. implement lvm, directory, and overlayfs backends. 4. For overlayfs, support a new lxc.rootfs format of 'bdevtype:<extra>'. This means you can now use overlayfs-based containers without using lxc-start-ephemeral, by using lxc.rootfs = overlayfs:/readonly-dir:writeable-dir 5. add a set of simple clone testcases The testcase shows how to use this. There are two types of clones: copy and snapshot. Right now you create a copy clone from lvm->lvm and dir->dir, a snapshot clone from lvm->lvm, dir->overlayfs and overlayfs->overlayfs. Note that this means you can now use the api to do incremental image development, as is done manually by docker: // create original container, directory based c1 = lxc_container_new("c1", NULL); c1->save_config(c1, NULL); c1->createl(c1, "ubuntu", NULL); c1->load_config(c1, NULL); // start it, log in and make some changes c1->want_daemonize(c1); c1->startl(c1, 0, NULL, NULL); // log in and do stuff, shut it down // create overlayfs clone c2 = c1->clone(c1, "c2", NULL, LXC_CLONE_SNAPSHOT, "overlayfs", NULL, 0); lxc_container_put(c1); // start it, log in make some changes c2->want_daemonize(c2); c2->startl(c2, 0, NULL, NULL); // log in and do stuff, shut it down c3 = c2->clone(c2, "c3", NULL, LXC_CLONE_SNAPSHOT, "overlayfs", NULL, 0); lxc_container_put(c2); // etc c2 mounts c1's rootfs overlayed with /var/lib/lxc/c2/delta0. When c3 is created, c2's delta0 is rsync'ed to c3's, and c3 mounts c1's rootfs overlayed with its rsynced /var/lib/lxc/c3/delta0. Once Stéphane implements :) python bindings, lxc-clone can be switched to python using this implementation. Still to do (there's more, but off top of my head): 1. support btrfs, zfs, aufs 2. have clone handle other mount entries (right now it only clones the rootfs) 3. bindings 4. re-write lxc-clone 5. add lxc.numsnapshots - in the above case, c1 should not be destroyable until all its clones are gone 6. Move bdev to its own directory (src/bdev) with one backing store per file 7. Consider using fewer execs (for lvcreate etc) Signed-off-by: Serge Hallyn <serge.hal...@ubuntu.com> --- src/lxc/Makefile.am | 2 + src/lxc/bdev.c | 945 +++++++++++++++++++++++++++++++++++++++++++++++++ src/lxc/bdev.h | 50 +++ src/lxc/conf.c | 11 +- src/lxc/lxccontainer.c | 551 +++++++++++++++++++++++++--- src/lxc/lxccontainer.h | 38 ++ src/lxc/utils.c | 20 ++ src/lxc/utils.h | 5 + src/tests/Makefile.am | 4 +- src/tests/clonetest.c | 178 ++++++++++ 10 files changed, 1756 insertions(+), 48 deletions(-) create mode 100644 src/lxc/bdev.c create mode 100644 src/lxc/bdev.h create mode 100644 src/tests/clonetest.c diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am index ebeca466..cc2f163 100644 --- a/src/lxc/Makefile.am +++ b/src/lxc/Makefile.am @@ -16,6 +16,7 @@ pkginclude_HEADERS = \ attach.h \ lxccontainer.h \ lxclock.h \ + bdev.h \ version.h if IS_BIONIC @@ -36,6 +37,7 @@ so_PROGRAMS = liblxc.so liblxc_so_SOURCES = \ arguments.c arguments.h \ + bdev.c bdev.h \ commands.c commands.h \ start.c start.h \ stop.c \ diff --git a/src/lxc/bdev.c b/src/lxc/bdev.c new file mode 100644 index 0000000..7f109af --- /dev/null +++ b/src/lxc/bdev.c @@ -0,0 +1,945 @@ +/* + * lxc: linux Container library + * + * (C) Copyright IBM Corp. 2007, 2008 + * + * Authors: + * Daniel Lezcano <daniel.lezcano at free.fr> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * this is all just a first shot for experiment. If we go this route, much + * shoudl change. bdev should be a directory with per-bdev file. Things which + * I'm doing by calling out to userspace should sometimes be done through + * libraries like liblvm2 + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <unistd.h> +#include <errno.h> +#include <sched.h> +#include <sys/mount.h> +#include <sys/wait.h> +#include "lxc.h" +#include "config.h" +#include "conf.h" +#include "bdev.h" +#include "log.h" +#include "error.h" +#include "utils.h" +#include "namespace.h" +#include "parse.h" + +lxc_log_define(bdev, lxc); + +/* Define unshare() if missing from the C library */ +/* this is also in attach.c and lxccontainer.c: commonize it in utils.c */ +#ifndef HAVE_UNSHARE +static int unshare(int flags) +{ +#ifdef __NR_unshare +return syscall(__NR_unshare, flags); +#else +errno = ENOSYS; +return -1; +#endif +} +#endif + +static int do_rsync(const char *src, const char *dest) +{ + // call out to rsync + pid_t pid; + char *s; + size_t l; + + pid = fork(); + if (pid < 0) + return -1; + if (pid > 0) + return wait_for_pid(pid); + l = strlen(src) + 2; + s = malloc(l); + if (!s) + exit(1); + strcpy(s, src); + s[l-2] = '/'; + s[l-1] = '\0'; + + return execlp("rsync", "rsync", "-a", s, dest, (char *)NULL); +} + +static int blk_getsize(const char *path, unsigned long *size) +{ + int fd, ret; + + fd = open(path, O_RDONLY); + if (!fd) + return -1; + ret = ioctl(fd, BLKGETSIZE64, size); + close(fd); + return ret; +} + +/* + * These are copied from conf.c. However as conf.c will be moved to using + * the callback system, they can be pulled from there eventually, so we + * don't need to pollute utils.c with these low level functions + */ +static int find_fstype_cb(char* buffer, void *data) +{ + struct cbarg { + const char *rootfs; + const char *target; + int mntopt; + } *cbarg = data; + + char *fstype; + + /* we don't try 'nodev' entries */ + if (strstr(buffer, "nodev")) + return 0; + + fstype = buffer; + fstype += lxc_char_left_gc(fstype, strlen(fstype)); + fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0'; + + DEBUG("trying to mount '%s'->'%s' with fstype '%s'", + cbarg->rootfs, cbarg->target, fstype); + + if (mount(cbarg->rootfs, cbarg->target, fstype, cbarg->mntopt, NULL)) { + DEBUG("mount failed with error: %s", strerror(errno)); + return 0; + } + + INFO("mounted '%s' on '%s', with fstype '%s'", + cbarg->rootfs, cbarg->target, fstype); + + return 1; +} + +static int mount_unknow_fs(const char *rootfs, const char *target, int mntopt) +{ + int i; + + struct cbarg { + const char *rootfs; + const char *target; + int mntopt; + } cbarg = { + .rootfs = rootfs, + .target = target, + .mntopt = mntopt, + }; + + /* + * find the filesystem type with brute force: + * first we check with /etc/filesystems, in case the modules + * are auto-loaded and fall back to the supported kernel fs + */ + char *fsfile[] = { + "/etc/filesystems", + "/proc/filesystems", + }; + + for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) { + + int ret; + + if (access(fsfile[i], F_OK)) + continue; + + ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg); + if (ret < 0) { + ERROR("failed to parse '%s'", fsfile[i]); + return -1; + } + + if (ret) + return 0; + } + + ERROR("failed to determine fs type for '%s'", rootfs); + return -1; +} + +static int do_mkfs(const char *path, const char *fstype) +{ + pid_t pid; + + if ((pid = fork()) < 0) { + ERROR("error forking"); + return -1; + } + if (pid > 0) + return wait_for_pid(pid); + + return execlp("mkfs", "mkfs", "-t", fstype, path, NULL); +} + +static char *linkderef(char *path, char *dest) +{ + struct stat sbuf; + ssize_t ret; + + ret = stat(path, &sbuf); + if (ret < 0) + return NULL; + if (!S_ISLNK(sbuf.st_mode)) + return path; + ret = readlink(path, dest, MAXPATHLEN); + if (ret < 0) { + SYSERROR("error reading link %s", path); + return NULL; + } else if (ret >= MAXPATHLEN) { + ERROR("link in %s too long", path); + return NULL; + } + dest[ret] = '\0'; + return dest; +} + +/* + * Given a bdev (presumably blockdev-based), detect the fstype + * by trying mounting (in a private mntns) it. + * @bdev: bdev to investigate + * @type: preallocated char* in which to write the fstype + * @len: length of passed in char* + * Returns length of fstype, of -1 on error + */ +static int detect_fs(struct bdev *bdev, char *type, int len) +{ + int p[2], ret; + size_t linelen; + pid_t pid; + FILE *f; + char *sp1, *sp2, *sp3, *line = NULL; + + if (!bdev || !bdev->src || !bdev->dest) + return -1; + + if (pipe(p) < 0) + return -1; + if ((pid = fork()) < 0) + return -1; + if (pid > 0) { + int status; + close(p[1]); + memset(type, 0, len); + ret = read(p[0], type, len-1); + close(p[0]); + if (ret < 0) { + SYSERROR("error reading from pipe"); + wait(&status); + return -1; + } else if (ret == 0) { + ERROR("child exited early - fstype not found"); + wait(&status); + return -1; + } + wait(&status); + type[len-1] = '\0'; + INFO("detected fstype %s for %s", type, bdev->src); + return ret; + } + + if (unshare(CLONE_NEWNS) < 0) + exit(1); + + ret = mount_unknow_fs(bdev->src, bdev->dest, 0); + if (ret < 0) { + ERROR("failed mounting %s onto %s to detect fstype", bdev->src, bdev->dest); + exit(1); + } + // if symlink, get the real dev name + char devpath[MAXPATHLEN]; + char *l = linkderef(bdev->src, devpath); + if (!l) + exit(1); + f = fopen("/proc/self/mounts", "r"); + if (!f) + exit(1); + while (getline(&line, &linelen, f) != -1) { + sp1 = index(line, ' '); + if (!sp1) + exit(1); + *sp1 = '\0'; + if (strcmp(line, l)) + continue; + sp2 = index(sp1+1, ' '); + if (!sp2) + exit(1); + *sp2 = '\0'; + sp3 = index(sp2+1, ' '); + if (!sp3) + exit(1); + *sp3 = '\0'; + sp2++; + if (write(p[1], sp2, strlen(sp2)) != strlen(sp2)) + exit(1); + exit(0); + } + exit(1); +} + +struct bdev_type { + char *name; + struct bdev_ops *ops; +}; + +static int is_dir(const char *path) +{ + struct stat statbuf; + int ret = stat(path, &statbuf); + if (ret == 0 && S_ISDIR(statbuf.st_mode)) + return 1; + return 0; +} + +static int dir_detect(const char *path) +{ + if (strncmp(path, "dir:", 4) == 0) + return 1; // take their word for it + if (is_dir(path)) + return 1; + return 0; +} + +// +// XXXXXXX plain directory bind mount ops +// +int dir_mount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "dir")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return mount(bdev->src, bdev->dest, "bind", MS_BIND | MS_REC, NULL); +} + +int dir_umount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "dir")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return umount(bdev->dest); +} + +/* the bulk of this needs to become a common helper */ +static char *dir_new_path(char *src, const char *oldname, const char *name, + const char *oldpath, const char *lxcpath) +{ + char *ret, *p, *p2; + int l1, l2, nlen; + + nlen = strlen(src) + 1; + l1 = strlen(oldpath); + p = src; + /* if src starts with oldpath, look for oldname only after + * that path */ + if (strncmp(src, oldpath, l1) == 0) { + p += l1; + nlen += (strlen(lxcpath) - l1); + } + l2 = strlen(oldname); + while ((p = strstr(p, oldname)) != NULL) { + p += l2; + nlen += strlen(name) - l2; + } + + ret = malloc(nlen); + if (!ret) + return NULL; + + p = ret; + if (strncmp(src, oldpath, l1) == 0) { + p += sprintf(p, "%s", lxcpath); + src += l1; + } + + while ((p2 = strstr(src, oldname)) != NULL) { + strncpy(p, src, p2-src); // copy text up to oldname + p += p2-src; // move target pointer (p) + p += sprintf(p, "%s", name); // print new name in place of oldname + src = p2 + l2; // move src to end of oldname + } + sprintf(p, "%s", src); // copy the rest of src + return ret; +} + +/* + * for a simple directory bind mount, we substitute the old container + * name and paths for the new + */ +static int dir_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, int snap, + unsigned long newsize) +{ + if (snap) { + ERROR("directories cannot be snapshotted. Try overlayfs."); + return -1; + } + if (!orig->dest || !orig->src) + return -1; + if (orig->data) { + new->data = strdup(orig->data); + if (!new->data) + return -1; + } + + new->dest = dir_new_path(orig->dest, oldname, cname, oldpath, lxcpath); + if (!new->dest) + return -1; + new->src = dir_new_path(orig->src, oldname, cname, oldpath, lxcpath); + if (!new->src) + return -1; + + return 0; +} + +struct bdev_ops dir_ops = { + .detect = &dir_detect, + .mount = &dir_mount, + .umount = &dir_umount, + .clone_paths = &dir_clonepaths, +}; + +// +// LVM ops +// + +/* + * Look at /sys/dev/block/maj:min/dm/uuid. If it contains the hardcoded LVM + * prefix "LVM-", then this is an lvm2 LV + */ +static int lvm_detect(const char *path) +{ + char devp[MAXPATHLEN], buf[4]; + FILE *fout; + int ret; + struct stat statbuf; + + if (strncmp(path, "lvm:", 4) == 0) + return 1; // take their word for it + + ret = stat(path, &statbuf); + if (ret != 0) + return 0; + if (!S_ISBLK(statbuf.st_mode)) + return 0; + + ret = snprintf(devp, MAXPATHLEN, "/sys/dev/block/%d:%d/dm/uuid", + major(statbuf.st_rdev), minor(statbuf.st_rdev)); + if (ret < 0 || ret >= MAXPATHLEN) { + ERROR("lvm uuid pathname too long"); + return 0; + } + fout = fopen(devp, "r"); + if (!fout) + return 0; + ret = fread(buf, 1, 4, fout); + fclose(fout); + if (ret != 4 || strncmp(buf, "LVM-", 4) != 0) + return 0; + return 1; +} + +static int lvm_mount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "lvm")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + /* if we might pass in data sometime, then we'll have to enrich + * mount_unknow_fs */ + return mount_unknow_fs(bdev->src, bdev->dest, 0); +} + +static int lvm_umount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "lvm")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return umount(bdev->dest); +} + +/* + * path must be '/dev/$vg/$lv', $vg must be an existing VG, and $lv must + * not yet exist. This function will attempt to create /dev/$vg/$lv of + * size $size. + */ +static int lvm_create(const char *path, unsigned long size) +{ + int ret, pid; + char sz[24], *pathdup, *vg, *lv; + + if ((pid = fork()) < 0) { + SYSERROR("failed fork"); + return -1; + } + if (pid > 0) + return wait_for_pid(pid); + + // lvcreate default size is in M, not bytes. + ret = snprintf(sz, 24, "%lu", size/1000000); + if (ret < 0 || ret >= 24) + exit(1); + + pathdup = strdup(path); + if (!pathdup) + exit(1); + lv = rindex(pathdup, '/'); + if (!lv) { + free(pathdup); + exit(1); + } + *lv = '\0'; + lv++; + vg = rindex(pathdup, '/'); + if (!vg) + exit(1); + vg++; + ret = execlp("lvcreate", "lvcreate", "-L", sz, vg, "-n", lv, (char *)NULL); + free(pathdup); + return ret; +} + +static int lvm_snapshot(const char *orig, const char *path, unsigned long size) +{ + int ret, pid; + char sz[24], *pathdup, *lv; + + if ((pid = fork()) < 0) { + SYSERROR("failed fork"); + return -1; + } + if (pid > 0) + return wait_for_pid(pid); + // lvcreate default size is in M, not bytes. + ret = snprintf(sz, 24, "%lu", size/1000000); + if (ret < 0 || ret >= 24) + exit(1); + + pathdup = strdup(path); + if (!pathdup) + exit(1); + lv = rindex(pathdup, '/'); + if (!lv) { + free(pathdup); + exit(1); + } + *lv = '\0'; + lv++; + + ret = execlp("lvcreate", "lvcreate", "-s", "-L", sz, "-n", lv, orig, (char *)NULL); + free(pathdup); + return ret; +} + +static int lvm_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, int snap, + unsigned long newsize) +{ + char fstype[100]; + unsigned long size = newsize; + + if (!orig->src || !orig->dest) + return -1; + + if (orig->data) { + new->data = strdup(orig->data); + if (!new->data) + return -1; + } + new->dest = dir_new_path(orig->dest, oldname, cname, oldpath, lxcpath); + if (!new->dest) + return -1; + if (mkdir_p(new->dest, 0755) < 0) + return -1; + + + new->src = dir_new_path(orig->src, oldname, cname, oldpath, lxcpath); + if (!new->src) + return -1; + + if (!newsize && blk_getsize(orig->src, &size) < 0) { + ERROR("Error getting size of %s", orig->src); + return -1; + } + if (snap) { + if (lvm_snapshot(orig->src, new->src, size) < 0) { + ERROR("could not create %s snapshot of %s", new->src, orig->src); + return -1; + } + } else { + if (lvm_create(new->src, size) < 0) { + ERROR("Error creating new lvm blockdev"); + return -1; + } + if (detect_fs(orig, fstype, 100) < 0) { + ERROR("could not find fstype for %s", orig->src); + return -1; + } + if (do_mkfs(new->src, fstype) < 0) { + ERROR("Error creating filesystem type %s on %s", fstype, + new->src); + return -1; + } + } + + return 0; +} + +struct bdev_ops lvm_ops = { + .detect = &lvm_detect, + .mount = &lvm_mount, + .umount = &lvm_umount, + .clone_paths = &lvm_clonepaths, +}; + +// +// overlayfs ops +// + +static int overlayfs_detect(const char *path) +{ + if (strncmp(path, "overlayfs:", 10) == 0) + return 1; // take their word for it + return 0; +} + +// +// XXXXXXX plain directory bind mount ops +// +int overlayfs_mount(struct bdev *bdev) +{ + char *options, *dup, *lower, *upper; + int len; + int ret; + + if (strcmp(bdev->type, "overlayfs")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + + // separately mount it first + // mount -t overlayfs -oupperdir=${upper},lowerdir=${lower} lower dest + dup = strdupa(bdev->src); + if (!(lower = index(dup, ':'))) + return -22; + if (!(upper = index(++lower, ':'))) + return -22; + *upper = '\0'; + upper++; + + // TODO We should check whether bdev->src is a blockdev, and if so + // but for now, only support overlays of a basic directory + + len = strlen(lower) + strlen(upper) + strlen("upperdir=,lowerdir=") + 1; + options = alloca(len); + ret = snprintf(options, len, "upperdir=%s,lowerdir=%s", upper, lower); + if (ret < 0 || ret >= len) + return -1; + ret = mount(lower, bdev->dest, "overlayfs", MS_MGC_VAL, options); + if (ret < 0) + SYSERROR("overlayfs: error mounting %s onto %s options %s", + lower, bdev->dest, options); + else + INFO("overlayfs: mounted %s onto %s options %s", + lower, bdev->dest, options); + return ret; +} + +int overlayfs_umount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "overlayfs")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return umount(bdev->dest); +} + +static int overlayfs_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, int snap, + unsigned long newsize) +{ + if (!snap) { + ERROR("overlayfs is only for snapshot clones"); + return -22; + } + + if (!orig->src || !orig->dest) + return -1; + + new->dest = dir_new_path(orig->dest, oldname, cname, oldpath, lxcpath); + if (!new->dest) + return -1; + if (mkdir_p(new->dest, 0755) < 0) + return -1; + + if (strcmp(orig->type, "dir") == 0) { + char *delta; + int ret, len; + if (!snap) + return -1; + // if we have /var/lib/lxc/c2/rootfs, then delta will be + // /var/lib/lxc/c2/delta0 + delta = strdup(new->dest); + if (!delta) { + return -1; + } + if (strlen(delta) < 6) { + free(delta); + return -22; + } + strcpy(&delta[strlen(delta)-6], "delta0"); + if ((ret = mkdir(delta, 0755)) < 0) { + SYSERROR("error: mkdir %s", delta); + free(delta); + return -1; + } + + // the src will be 'overlayfs:lowerdir:upperdir' + len = strlen(delta) + strlen(orig->src) + 12; + new->src = malloc(len); + if (!new->src) { + free(delta); + return -ENOMEM; + } + ret = snprintf(new->src, len, "overlayfs:%s:%s", orig->src, delta); + free(delta); + if (ret < 0 || ret >= len) + return -ENOMEM; + } else if (strcmp(orig->type, "lvm") == 0) { + ERROR("overlayfs clone of lvm container is not yet supported"); + return -1; + } else if (strcmp(orig->type, "overlayfs") == 0) { + // What exactly do we want to do here? + // I think we want to use the original lowerdir, with a + // private delta which is originally rsynced from the + // original delta + char *osrc, *odelta, *nsrc, *ndelta; + int len, ret; + if (!(osrc = strdup(orig->src))) + return -22; + nsrc = index(osrc, ':') + 1; + if (nsrc != osrc + 10 || (odelta = index(nsrc, ':')) == NULL) { + free(osrc); + return -22; + } + *odelta = '\0'; + odelta++; + ndelta = dir_new_path(odelta, oldname, cname, oldpath, lxcpath); + if (!ndelta) { + free(osrc); + return -ENOMEM; + } + if (do_rsync(odelta, ndelta) < 0) { + ERROR("copying overlayfs delta"); + return -1; + } + len = strlen(nsrc) + strlen(ndelta) + 12; + new->src = malloc(len); + if (!new->src) { + free(osrc); + free(ndelta); + return -ENOMEM; + } + ret = snprintf(new->src, len, "overlayfs:%s:%s", nsrc, ndelta); + free(osrc); + free(ndelta); + if (ret < 0 || ret >= len) + return -ENOMEM; + } + + return 0; +} +struct bdev_ops overlayfs_ops = { + .detect = &overlayfs_detect, + .mount = &overlayfs_mount, + .umount = &overlayfs_umount, + .clone_paths = &overlayfs_clonepaths, +}; + +struct bdev_type bdevs[] = { + {.name = "lvm", .ops = &lvm_ops,}, + {.name = "dir", .ops = &dir_ops,}, + {.name = "overlayfs", .ops = &overlayfs_ops,}, +}; + +static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type); + +void bdev_put(struct bdev *bdev) +{ + if (bdev->data) + free(bdev->data); + if (bdev->src) + free(bdev->src); + if (bdev->dest) + free(bdev->dest); + free(bdev); +} + +struct bdev *bdev_get(const char *type) +{ + int i; + struct bdev *bdev; + + for (i=0; i<numbdevs; i++) { + if (strcmp(bdevs[i].name, type) == 0) + break; + } + if (i == numbdevs) + return NULL; + bdev = malloc(sizeof(struct bdev)); + if (!bdev) + return NULL; + memset(bdev, 0, sizeof(struct bdev)); + bdev->ops = bdevs[i].ops; + bdev->type = bdevs[i].name; + return bdev; +} + +struct bdev *bdev_init(const char *src, const char *dst, const char *data) +{ + int i; + struct bdev *bdev; + + for (i=0; i<numbdevs; i++) { + int r; + r = bdevs[i].ops->detect(src); + if (r) + break; + } + if (i == numbdevs) + return NULL; + bdev = malloc(sizeof(struct bdev)); + if (!bdev) + return NULL; + memset(bdev, 0, sizeof(struct bdev)); + bdev->ops = bdevs[i].ops; + bdev->type = bdevs[i].name; + if (data) + bdev->data = strdup(data); + if (src) + bdev->src = strdup(src); + if (dst) + bdev->dest = strdup(dst); + + return bdev; +} + +/* + * If we're not snaphotting, then bdev_copy becomes a simple case of mount + * the original, mount the new, and rsync the contents. + */ +struct bdev *bdev_copy(const char *src, const char *oldname, const char *cname, + const char *oldpath, const char *lxcpath, const char *bdevtype, + int snap, const char *bdevdata, unsigned long newsize) +{ + struct bdev *orig, *new; + pid_t pid; + + /* if the container name doesn't show up in the rootfs path, then + * we don't know how to come up with a new name + */ + if (strstr(src, oldname) == NULL) { + ERROR("original rootfs path %s doesn't include container name %s", + src, oldname); + return NULL; + } + + orig = bdev_init(src, NULL, NULL); + if (!orig) { + ERROR("failed to detect blockdev type for %s\n", src); + return NULL; + } + + if (!orig->dest) { + int ret; + orig->dest = malloc(MAXPATHLEN); + if (!orig->dest) { + ERROR("out of memory"); + bdev_put(orig); + return NULL; + } + ret = snprintf(orig->dest, MAXPATHLEN, "%s/%s/rootfs", oldpath, oldname); + if (ret < 0 || ret >= MAXPATHLEN) { + ERROR("rootfs path too long"); + bdev_put(orig); + return NULL; + } + } + + new = bdev_get(bdevtype ? bdevtype : orig->type); + if (!new) { + ERROR("no such block device type: %s", bdevtype ? bdevtype : orig->type); + bdev_put(orig); + return NULL; + } + + if (new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath, snap, newsize) < 0) { + ERROR("failed getting pathnames for cloned storage: %s\n", src); + bdev_put(orig); + bdev_put(new); + return NULL; + } + + pid = fork(); + if (pid < 0) { + SYSERROR("fork"); + bdev_put(orig); + bdev_put(new); + return NULL; + } + + if (pid > 0) { + int ret = wait_for_pid(pid); + bdev_put(orig); + if (ret < 0) { + bdev_put(new); + return NULL; + } + return new; + } + + if (unshare(CLONE_NEWNS) < 0) { + SYSERROR("unshare CLONE_NEWNS"); + exit(1); + } + if (snap) + exit(0); + + // If not a snapshot, copy the fs. + if (orig->ops->mount(orig) < 0) { + ERROR("failed mounting %s onto %s\n", src, orig->dest); + exit(1); + } + if (new->ops->mount(new) < 0) { + ERROR("failed mounting %s onto %s\n", new->src, new->dest); + exit(1); + } + if (do_rsync(orig->dest, new->dest) < 0) { + ERROR("rsyncing %s to %s\n", orig->src, new->src); + exit(1); + } + // don't bother umounting, ns exit will do that + + exit(0); +} diff --git a/src/lxc/bdev.h b/src/lxc/bdev.h new file mode 100644 index 0000000..131f158 --- /dev/null +++ b/src/lxc/bdev.h @@ -0,0 +1,50 @@ +#ifndef __LXC_BDEV_H +#define __LXC_BDEV_H +/* blockdev operations for: + * dir, raw, btrfs, overlayfs, aufs, lvm, loop, zfs, btrfs + * someday: qemu-nbd, qcow2, qed + */ + +#include "config.h" +#include "lxccontainer.h" + +struct bdev; + +struct bdev_ops { + /* detect whether path is of this bdev type */ + int (*detect)(const char *path); + // mount requires src and dest to be set. + int (*mount)(struct bdev *bdev); + int (*umount)(struct bdev *bdev); + /* given original mount, rename the paths for cloned container */ + int (*clone_paths)(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, + int snap, unsigned long newsize); +}; + +struct bdev { + struct bdev_ops *ops; + char *type; + char *src; + char *dest; + char *data; +}; + +/* + * Instantiate a bdev object. The src is used to determine which blockdev + * type this should be. The dst and data are optional, and will be used + * in case of mount/umount. + * + * Optionally, src can be 'dir:/var/lib/lxc/c1' or 'lvm:/dev/lxc/c1'. For + * other backing stores, this will allow additional options. In particular, + * "overlayfs:/var/lib/lxc/canonical/rootfs:/var/lib/lxc/c1/delta" will mean + * use /var/lib/lxc/canonical/rootfs as lower dir, and /var/lib/lxc/c1/delta + * as the upper, writeable layer. + */ +struct bdev *bdev_init(const char *src, const char *dst, const char *data); + +struct bdev *bdev_copy(const char *src, const char *oldname, const char *cname, + const char *oldpath, const char *lxcpath, const char *bdevtype, + int snap, const char *bdevdata, unsigned long newsize); +void bdev_put(struct bdev *bdev); +#endif diff --git a/src/lxc/conf.c b/src/lxc/conf.c index cf97eef..67b1c7f 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -64,6 +64,7 @@ #include "log.h" #include "lxc.h" /* for lxc_cgroup_set() */ #include "caps.h" /* for lxc_caps_last_cap() */ +#include "bdev.h" #if HAVE_APPARMOR #include <apparmor.h> @@ -590,8 +591,8 @@ int pin_rootfs(const char *rootfs) return -2; if (!realpath(rootfs, absrootfs)) { - SYSERROR("failed to get real path for '%s'", rootfs); - return -1; + INFO("failed to get real path for '%s', not pinning", rootfs); + return -2; } if (access(absrootfs, F_OK)) { @@ -1163,6 +1164,12 @@ static int setup_rootfs(struct lxc_conf *conf) } } + // First try mounting rootfs using a bdev + struct bdev *bdev = bdev_init(rootfs->path, rootfs->mount, NULL); + if (bdev && bdev->ops->mount(bdev) == 0) { + DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount); + return 0; + } if (mount_rootfs(rootfs->path, rootfs->mount)) { ERROR("failed to mount rootfs"); return -1; diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c index 1df6a98..40c1c3c 100644 --- a/src/lxc/lxccontainer.c +++ b/src/lxc/lxccontainer.c @@ -17,22 +17,41 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <errno.h> +#include <sched.h> +#include "config.h" #include "lxc.h" #include "state.h" #include "lxccontainer.h" #include "conf.h" -#include "config.h" #include "confile.h" #include "cgroup.h" #include "commands.h" #include "version.h" #include "log.h" -#include <unistd.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <errno.h> +#include "bdev.h" #include <lxc/utils.h> +/* Define unshare() if missing from the C library */ +/* this is also in attach.c and lxccontainer.c: commonize it in utils.c */ +#ifndef HAVE_UNSHARE +static int unshare(int flags) +{ +#ifdef __NR_unshare + return syscall(__NR_unshare, flags); +#else + errno = ENOSYS; + return -1; +#endif +} +#else +int unshare(int); +#endif + lxc_log_define(lxc_container, lxc); /* LOCKING @@ -534,10 +553,8 @@ static bool lxcapi_create(struct lxc_container *c, char *t, char *const argv[]) { bool bret = false; pid_t pid; - int ret, status; - char *tpath = NULL; - int len, nargs = 0; - char **newargv; + char *tpath = NULL, **newargv; + int ret, len, nargs = 0; if (!c) return false; @@ -564,7 +581,7 @@ static bool lxcapi_create(struct lxc_container *c, char *t, char *const argv[]) goto out; /* we're going to fork. but since we'll wait for our child, we - don't need to lxc_container_get */ + * don't need to lxc_container_get */ if (lxclock(c->slock, 0)) { ERROR("failed to grab global container lock for %s\n", c->name); @@ -635,26 +652,8 @@ static bool lxcapi_create(struct lxc_container *c, char *t, char *const argv[]) exit(1); } -again: - ret = waitpid(pid, &status, 0); - if (ret == -1) { - if (errno == -EINTR) - goto again; - SYSERROR("waitpid failed"); - goto out_unlock; - } - if (ret != pid) - goto again; - if (!WIFEXITED(status)) { // did not exit normally - // we could set an error code and string inside the - // container_struct here if we like - ERROR("container creation template exited abnormally\n"); - goto out_unlock; - } - - if (WEXITSTATUS(status) != 0) { - ERROR("container creation template for %s exited with %d\n", - c->name, WEXITSTATUS(status)); + if (wait_for_pid(pid) != 0) { + ERROR("container creation template for %s failed\n", c->name); goto out_unlock; } @@ -820,7 +819,6 @@ static bool lxcapi_save_config(struct lxc_container *c, const char *alt_file) static bool lxcapi_destroy(struct lxc_container *c) { pid_t pid; - int ret, status; if (!c) return false; @@ -838,23 +836,12 @@ static bool lxcapi_destroy(struct lxc_container *c) exit(1); } -again: - ret = waitpid(pid, &status, 0); - if (ret == -1) { - if (errno == -EINTR) - goto again; - perror("waitpid"); - return false; - } - if (ret != pid) - goto again; - if (!WIFEXITED(status)) { // did not exit normally - // we could set an error code and string inside the - // container_struct here if we like + if (wait_for_pid(pid) < 0) { + ERROR("Error destroying container %s", c->name); return false; } - return WEXITSTATUS(status) == 0; + return true; } static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, const char *v) @@ -1025,6 +1012,479 @@ const char *lxc_get_version(void) return lxc_version(); } +static int copy_file(char *old, char *new) +{ + int in, out; + ssize_t len, ret; + char buf[8096]; + struct stat sbuf; + + if (file_exists(new)) { + ERROR("copy destination %s exists", new); + return -1; + } + ret = stat(old, &sbuf); + if (ret < 0) { + SYSERROR("stat'ing %s", old); + return -1; + } + + in = open(old, O_RDONLY); + if (in < 0) { + SYSERROR("opening original file %s", old); + return -1; + } + out = open(new, O_CREAT | O_EXCL | O_WRONLY, 0644); + if (out < 0) { + SYSERROR("opening new file %s", new); + close(in); + return -1; + } + + while (1) { + len = read(in, buf, 8096); + if (len < 0) { + SYSERROR("reading old file %s", old); + goto err; + } + if (len == 0) + break; + ret = write(out, buf, len); + if (ret < len) { // should we retry? + SYSERROR("write to new file %s was interrupted", new); + goto err; + } + } + close(in); + close(out); + + // we set mode, but not owner/group + ret = chmod(new, sbuf.st_mode); + if (ret) { + SYSERROR("setting mode on %s", new); + return -1; + } + + return 0; + +err: + close(in); + close(out); + return -1; +} + +/* + * we're being passed result of two strstrs(x, y). We want to write + * all data up to the first found string, or to end of the string if + * neither string was found. + * This function will return the earliest found string if any, or else + * NULL + */ +static const char *lowest_nonnull(const char *p1, const char *p2) +{ + if (!p1) + return p2; + if (!p2) + return p1; + return p1 < p2 ? p1 : p2; +} + +static int update_name_and_paths(const char *path, struct lxc_container *oldc, + const char *newname, const char *newpath) +{ + FILE *f; + size_t flen; + char *contents; + const char *p0, *p1, *p2, *end; + const char *oldpath = oldc->get_config_path(oldc); + const char *oldname = oldc->name; + + f = fopen(path, "r"); + if (!f) { + SYSERROR("opening old config"); + return -1; + } + if (fseek(f, 0, SEEK_END) < 0) { + SYSERROR("seeking to end of old config"); + fclose(f); + return -1; + } + flen = ftell(f); + if (flen < 0) { + fclose(f); + SYSERROR("telling size of old config"); + return -1; + } + if (fseek(f, 0, SEEK_SET) < 0) { + fclose(f); + SYSERROR("rewinding old config"); + return -1; + } + contents = malloc(flen); + if (!contents) { + SYSERROR("out of memory"); + fclose(f); + } + if (fread(contents, 1, flen, f) != flen) { + free(contents); + fclose(f); + SYSERROR("reading old config"); + return -1; + } + if (fclose(f) < 0) { + free(contents); + SYSERROR("closing old config"); + return -1; + } + + f = fopen(path, "w"); + if (!f) { + SYSERROR("reopening config"); + free(contents); + return -1; + } + + p0 = contents; + end = contents + flen; + while (1) { + p1 = strstr(p0, oldpath); + p2 = strstr(p0, oldname); + if (!p1 && !p2) { + // write the rest and be done + if (fwrite(p0, 1, (end-p0), f) != (end-p0)) { + SYSERROR("writing new config"); + free(contents); + fclose(f); + return -1; + } + free(contents); + fclose(f); + // success + return 0; + } else { + const char *p = lowest_nonnull(p1, p2); + const char *new = (p == p2) ? newname : newpath; + if (fwrite(p0, 1, (p-p0), f) != (p-p0)) { + SYSERROR("writing new config"); + free(contents); + fclose(f); + return -1; + } + p0 = p; + // now write the newpath or newname + if (fwrite(new, 1, strlen(new), f) != strlen(new)) { + SYSERROR("writing new name or path in new config"); + free(contents); + fclose(f); + return -1; + } + p0 += (p == p2) ? strlen(oldname) : strlen(oldpath); + } + } +} + +static int copyhooks(struct lxc_container *oldc, struct lxc_container *c) +{ + int i; + int ret; + struct lxc_list *it; + + for (i=0; i<NUM_LXC_HOOKS; i++) { + lxc_list_for_each(it, &c->lxc_conf->hooks[i]) { + char *hookname = it->elem; + char *fname = rindex(hookname, '/'); + char tmppath[MAXPATHLEN]; + if (!fname) // relative path - we don't support, but maybe we should + return 0; + // copy the script, and change the entry in confile + ret = snprintf(tmppath, MAXPATHLEN, "%s/%s/%s", + c->config_path, c->name, fname+1); + if (ret < 0 || ret >= MAXPATHLEN) + return -1; + ret = copy_file(it->elem, tmppath); + if (ret < 0) + return -1; + free(it->elem); + it->elem = strdup(tmppath); + if (!it->elem) { + ERROR("out of memory copying hook path"); + return -1; + } + update_name_and_paths(it->elem, oldc, c->name, c->get_config_path(c)); + } + } + + c->save_config(c, NULL); + return 0; +} + +static void new_hwaddr(char *hwaddr) +{ + snprintf(hwaddr, 18, "00:16:3e:%02x:%02x:%02x", + rand() % 255, rand() % 255, rand() % 255); +} + +static void network_new_hwaddrs(struct lxc_container *c) +{ + struct lxc_list *it; + lxc_list_for_each(it, &c->lxc_conf->network) { + struct lxc_netdev *n = it->elem; + if (n->hwaddr) + new_hwaddr(n->hwaddr); + } +} + +static int copy_fstab(struct lxc_container *oldc, struct lxc_container *c) +{ + char newpath[MAXPATHLEN]; + char *oldpath = oldc->lxc_conf->fstab; + int ret; + + if (!oldpath) + return 0; + + char *p = rindex(oldpath, '/'); + if (!p) + return -1; + ret = snprintf(newpath, MAXPATHLEN, "%s/%s%s", + c->config_path, c->name, p); + if (ret < 0 || ret >= MAXPATHLEN) { + ERROR("error printing new path for %s", oldpath); + return -1; + } + if (file_exists(newpath)) { + ERROR("error: fstab file %s exists", newpath); + return -1; + } + + if (copy_file(oldpath, newpath) < 0) { + ERROR("error: copying %s to %s", oldpath, newpath); + return -1; + } + free(c->lxc_conf->fstab); + c->lxc_conf->fstab = strdup(newpath); + if (!c->lxc_conf->fstab) { + ERROR("error: allocating pathname"); + return -1; + } + + return 0; +} + +static int copy_storage(struct lxc_container *c0, struct lxc_container *c, + const char *newtype, int flags, const char *bdevdata, unsigned long newsize) +{ + struct bdev *bdev; + + bdev = bdev_copy(c0->lxc_conf->rootfs.path, c0->name, c->name, + c0->config_path, c->config_path, newtype, !!(flags & LXC_CLONE_SNAPSHOT), + bdevdata, newsize); + if (!bdev) { + ERROR("error copying storage"); + return -1; + } + free(c->lxc_conf->rootfs.path); + c->lxc_conf->rootfs.path = strdup(bdev->src); + bdev_put(bdev); + if (!c->lxc_conf->rootfs.path) + return -1; + // here we could also update all lxc.mount.entries or even + // items in the lxc.mount fstab list. As discussed on m-l, + // we could do either any source paths starting with the + // lxcpath/oldname, or simply anythign which is not a virtual + // fs or a bind mount. + return 0; +} + +static int clone_update_rootfs(struct lxc_container *c, int flags) +{ + int ret = -1; + char path[MAXPATHLEN]; + struct bdev *bdev; + FILE *fout; + pid_t pid; + + if (flags & LXC_CLONE_KEEPNAME) + return 0; + + /* update hostname in rootfs */ + /* we're going to mount, so run in a clean namespace to simplify cleanup */ + + pid = fork(); + if (pid < 0) + return -1; + if (pid > 0) + return wait_for_pid(pid); + + if (unshare(CLONE_NEWNS) < 0) { + ERROR("error unsharing mounts"); + exit(1); + } + bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL); + if (!bdev) + exit(1); + if (bdev->ops->mount(bdev) < 0) + exit(1); + ret = snprintf(path, MAXPATHLEN, "%s/etc/hostname", bdev->dest); + if (ret < 0 || ret >= MAXPATHLEN) + exit(1); + if (!(fout = fopen(path, "w"))) { + SYSERROR("unable to open %s: ignoring\n", path); + exit(0); + } + if (fprintf(fout, "%s", c->name) < 0) + exit(1); + if (fclose(fout) < 0) + exit(1); + exit(0); +} + +/* + * We want to support: +sudo lxc-clone -o o1 -n n1 -s -L|-fssize fssize -v|--vgname vgname \ + -p|--lvprefix lvprefix -t|--fstype fstype -B backingstore + +-s [ implies overlayfs] +-s -B overlayfs +-s -B aufs + +only rootfs gets converted (copied/snapshotted) on clone. +*/ + +static int create_file_dirname(char *path) +{ + char *p = rindex(path, '/'); + int ret; + + if (!p) + return -1; + *p = '\0'; + ret = mkdir(path, 0755); + if (ret && errno != EEXIST) + SYSERROR("creating container path %s\n", path); + *p = '/'; + return ret; +} + +struct lxc_container *lxcapi_clone(struct lxc_container *c, const char *newname, + const char *lxcpath, enum lxc_clone_flags flags, + const char *bdevtype, const char *bdevdata, unsigned long newsize) +{ + struct lxc_container *c2 = NULL; + char newpath[MAXPATHLEN]; + int ret; + const char *n, *l; + FILE *fout; + + if (!c || !c->is_defined(c)) + return NULL; + + if (lxclock(c->privlock, 0)) + return NULL; + + if (c->is_running(c)) { + ERROR("error: Original container (%s) is running", c->name); + goto out; + } + + // Make sure the container doesn't yet exist. + n = newname ? newname : c->name; + l = lxcpath ? lxcpath : c->get_config_path(c); + ret = snprintf(newpath, MAXPATHLEN, "%s/%s/config", l, n); + if (ret < 0 || ret >= MAXPATHLEN) { + SYSERROR("clone: failed making config pathname"); + goto out; + } + if (file_exists(newpath)) { + ERROR("error: clone: %s exists", newpath); + goto out; + } + + if (create_file_dirname(newpath) < 0) { + ERROR("Error creating container dir for %s", newpath); + goto out; + } + + // copy the configuration, tweak it as needed, + fout = fopen(newpath, "w"); + if (!fout) { + SYSERROR("open %s", newpath); + goto out; + } + write_config(fout, c->lxc_conf); + fclose(fout); + + if (update_name_and_paths(newpath, c, n, l) < 0) { + ERROR("Error updating name in cloned config"); + goto out; + } + + sprintf(newpath, "%s/%s/rootfs", l, n); + if (mkdir(newpath, 0755) < 0) { + SYSERROR("error creating %s", newpath); + goto out; + } + + c2 = lxc_container_new(n, l); + if (!c) { + ERROR("clone: failed to create new container (%s %s)", n, l); + goto out; + } + + // copy hooks if requested + if (flags & LXC_CLONE_COPYHOOKS) { + ret = copyhooks(c, c2); + if (ret < 0) { + ERROR("error copying hooks"); + c2->destroy(c2); + lxc_container_put(c2); + goto out; + } + } + + if (copy_fstab(c, c2) < 0) { + ERROR("error copying fstab"); + c2->destroy(c2); + lxc_container_put(c2); + goto out; + } + + // update macaddrs + if (!(flags & LXC_CLONE_KEEPMACADDR)) + network_new_hwaddrs(c2); + + // copy/snapshot rootfs's + ret = copy_storage(c, c2, bdevtype, flags, bdevdata, newsize); + if (ret < 0) { + c2->destroy(c2); + lxc_container_put(c2); + goto out; + } + + if (!c2->save_config(c2, NULL)) { + c2->destroy(c2); + lxc_container_put(c2); + goto out; + } + + if (clone_update_rootfs(c2, flags) < 0) { + //c2->destroy(c2); + lxc_container_put(c2); + goto out; + } + + // TODO: update c's lxc.snapshot = count + lxcunlock(c->privlock); + return c2; + +out: + lxcunlock(c->privlock); + if (c2) + lxc_container_put(c2); + + return NULL; +} + struct lxc_container *lxc_container_new(const char *name, const char *configpath) { struct lxc_container *c; @@ -1101,6 +1561,7 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath c->set_cgroup_item = lxcapi_set_cgroup_item; c->get_config_path = lxcapi_get_config_path; c->set_config_path = lxcapi_set_config_path; + c->clone = lxcapi_clone; /* we'll allow the caller to update these later */ if (lxc_log_init(NULL, "none", NULL, "lxc_container", 0)) { diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h index de9854c..3bebdf3 100644 --- a/src/lxc/lxccontainer.h +++ b/src/lxc/lxccontainer.h @@ -1,9 +1,19 @@ +#ifndef __LXC_CONTAINER_H +#define __LXC_CONTAINER_H #include "lxclock.h" #include <stdlib.h> #include <malloc.h> #include <stdbool.h> +enum lxc_clone_flags { + LXC_CLONE_KEEPNAME, + LXC_CLONE_COPYHOOKS, + LXC_CLONE_KEEPMACADDR, + LXC_CLONE_SNAPSHOT, + LXC_CLONE_MAXFLAGS, +}; + struct lxc_container { // private fields char *name; @@ -72,6 +82,33 @@ struct lxc_container { const char *(*get_config_path)(struct lxc_container *c); bool (*set_config_path)(struct lxc_container *c, const char *path); + /* + * @c: the original container + * @newname: new name for the container. If NULL, the same name is used, and + * a new lxcpath MUST be specified. + * @lxcpath: lxcpath in which to create the new container. If NULL, then the + * original container's lxcpath will be used. (Shoudl we use the default + * instead?) + * @flags: additional flags to modify cloning behavior. + * LXC_CLONE_KEEPNAME: don't edit the rootfs to change the hostname. + * LXC_CLONE_COPYHOOKS: copy all hooks into the container dir + * LXC_CLONE_KEEPMACADDR: don't change the mac address on network interfaces. + * LXC_CLONE_SNAPSHOT: snapshot the original filesystem(s). If @devtype was not + * specified, then do so with the native bdevtype if possible, else use an + * overlayfs. + * @bdevtype: optionally force the cloned bdevtype to a specified plugin. By + * default the original is used (subject to snapshot requirements). + * @bdevdata: information about how to create the new storage (i.e. fstype and + * fsdata) + * @newsize: in case of a block device backing store, an optional size. If 0, + * then the original backing store's size will be used if possible. Note this + * only applies to the rootfs. For any other filesystems, the original size + * will be duplicated. + */ + struct lxc_container *(*clone)(struct lxc_container *c, const char *newname, + const char *lxcpath, enum lxc_clone_flags flags, const char *bdevtype, + const char *bdevdata, unsigned long newsize); + #if 0 bool (*commit_cgroups)(struct lxc_container *c); bool (*reread_cgroups)(struct lxc_container *c); @@ -93,3 +130,4 @@ const char *lxc_get_version(void); char ** lxc_get_valid_keys(); char ** lxc_get_valid_values(char *key); #endif +#endif diff --git a/src/lxc/utils.c b/src/lxc/utils.c index e07ca7b..c4cd6a2 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -34,6 +34,8 @@ #include <dirent.h> #include <fcntl.h> #include <libgen.h> +#include <sys/types.h> +#include <sys/wait.h> #include "log.h" @@ -188,3 +190,21 @@ out: fclose(fin); return default_lxcpath; } + +int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == -EINTR) + goto again; + return -1; + } + if (ret != pid) + goto again; + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + return -1; + return 0; +} diff --git a/src/lxc/utils.h b/src/lxc/utils.h index 8954503..0a27903 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -32,4 +32,9 @@ extern int mkdir_p(const char *dir, mode_t mode); */ extern const char *default_lxc_path(void); +/* + * wait on a child we forked + */ +extern int wait_for_pid(pid_t pid); + #endif diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am index 4cbeeb3..c0ce648 100644 --- a/src/tests/Makefile.am +++ b/src/tests/Makefile.am @@ -13,6 +13,7 @@ lxc_test_get_item_SOURCES = get_item.c lxc_test_getkeys_SOURCES = getkeys.c lxc_test_lxcpath_SOURCES = lxcpath.c lxc_test_cgpath_SOURCES = cgpath.c +lxc_test_clonetest_SOURCES = clonetest.c AM_CFLAGS=-I$(top_srcdir)/src \ -DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \ @@ -23,7 +24,7 @@ AM_CFLAGS=-I$(top_srcdir)/src \ bin_PROGRAMS = lxc-test-containertests lxc-test-locktests lxc-test-startone \ lxc-test-destroytest lxc-test-saveconfig lxc-test-createtest \ lxc-test-shutdowntest lxc-test-get_item lxc-test-getkeys lxc-test-lxcpath \ - lxc-test-cgpath + lxc-test-cgpath lxc-test-clonetest endif @@ -38,4 +39,5 @@ EXTRA_DIST = \ lxcpath.c \ saveconfig.c \ shutdowntest.c \ + clonetest.c \ startone.c diff --git a/src/tests/clonetest.c b/src/tests/clonetest.c new file mode 100644 index 0000000..fcb5ea6 --- /dev/null +++ b/src/tests/clonetest.c @@ -0,0 +1,178 @@ +/* liblxcapi + * + * Copyright © 2012 Serge Hallyn <serge.hal...@ubuntu.com>. + * Copyright © 2012 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include "../lxc/lxccontainer.h" + +#include <unistd.h> +#include <signal.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <errno.h> + +#define MYNAME "clonetest1" +#define MYNAME2 "clonetest2" + +int main(int argc, char *argv[]) +{ + struct lxc_container *c = NULL, *c2 = NULL, *c3 = NULL; + int ret = 1; + + c = lxc_container_new(MYNAME, NULL); + c2 = lxc_container_new(MYNAME2, NULL); + if (c) { + c->destroy(c); + lxc_container_put(c); + c = NULL; + } + if (c2) { + c2->destroy(c2); + lxc_container_put(c2); + c2 = NULL; + } + + if ((c = lxc_container_new(MYNAME, NULL)) == NULL) { + fprintf(stderr, "%d: error opening lxc_container %s\n", __LINE__, MYNAME); + ret = 1; + goto out; + } + c->save_config(c, NULL); + if (!c->createl(c, "ubuntu", NULL)) { + fprintf(stderr, "%d: failed to create a container\n", __LINE__); + goto out; + } + c->load_config(c, NULL); + + if (!c->is_defined(c)) { + fprintf(stderr, "%d: %s thought it was not defined\n", __LINE__, MYNAME); + goto out; + } + + c2 = c->clone(c, MYNAME2, NULL, 0, NULL, NULL, 0); + if (!c2) { + fprintf(stderr, "%d: %s clone returned NULL\n", __LINE__, MYNAME2); + goto out; + } + + if (!c2->is_defined(c)) { + fprintf(stderr, "%d: %s not defined after clone\n", __LINE__, MYNAME2); + goto out; + } + + fprintf(stderr, "directory backing store tests passed\n"); + + // now test with lvm + // Only do this if clonetestlvm1 exists - user has to set this up + // in advance + //c2->destroy(c2); + lxc_container_put(c2); + //c->destroy(c); + lxc_container_put(c); + c = NULL; + + c2 = lxc_container_new("clonetestlvm2", NULL); + if (c2) { + if (c2->is_defined(c2)) + c2->destroy(c2); + lxc_container_put(c2); + } + c2 = lxc_container_new("clonetest-o1", NULL); + if (c2) { + if (c2->is_defined(c2)) + c2->destroy(c2); + lxc_container_put(c2); + } + c2 = lxc_container_new("clonetest-o2", NULL); + if (c2) { + if (c2->is_defined(c2)) + c2->destroy(c2); + lxc_container_put(c2); + } + c2 = NULL; + + // lvm-copied + c = lxc_container_new("clonetestlvm1", NULL); + if (!c) { + fprintf(stderr, "failed loading clonetestlvm1\n"); + goto out; + } + if (!c->is_defined(c)) { + fprintf(stderr, "clonetestlvm1 does not exist, skipping lvm tests\n"); + ret = 0; + goto out; + } + + if ((c2 = c->clone(c, "clonetestlvm2", NULL, 0, NULL, NULL, 0)) == NULL) { + fprintf(stderr, "lvm clone failed\n"); + goto out; + } + + lxc_container_put(c2); + + // lvm-snapshot + c2 = lxc_container_new("clonetestlvm3", NULL); + if (c2) { + if (c2->is_defined(c2)) + c2->destroy(c2); + lxc_container_put(c2); + c2 = NULL; + } + + if ((c2 = c->clone(c, "clonetestlvm3", NULL, LXC_CLONE_SNAPSHOT, NULL, NULL, 0)) == NULL) { + fprintf(stderr, "lvm clone failed\n"); + goto out; + } + lxc_container_put(c2); + lxc_container_put(c); + c = c2 = NULL; + + if ((c = lxc_container_new(MYNAME, NULL)) == NULL) { + fprintf(stderr, "error opening original container for overlay test\n"); + goto out; + } + + // Now create an overlayfs clone of a dir-backed container + if ((c2 = c->clone(c, "clonetest-o1", NULL, LXC_CLONE_SNAPSHOT, "overlayfs", NULL, 0)) == NULL) { + fprintf(stderr, "overlayfs clone of dir failed\n"); + goto out; + } + + // Now create an overlayfs clone of the overlayfs clone + if ((c3 = c2->clone(c2, "clonetest-o2", NULL, LXC_CLONE_SNAPSHOT, "overlayfs", NULL, 0)) == NULL) { + fprintf(stderr, "overlayfs clone of overlayfs failed\n"); + goto out; + } + + fprintf(stderr, "all clone tests passed for %s\n", c->name); + ret = 0; + +out: + if (c3) { + lxc_container_put(c3); + } + if (c2) { + //c2->destroy(c2); // keep around to verify manuall + lxc_container_put(c2); + } + if (c) { + //c->destroy(c); + lxc_container_put(c); + } + exit(ret); +} -- 1.8.1.2 ------------------------------------------------------------------------------ Precog is a next-generation analytics platform capable of advanced analytics on semi-structured data. The platform includes APIs for building apps and a phenomenal toolset for data science. Developers can use our toolset for easy data analysis & visualization. Get a free account! http://www2.precog.com/precogplatform/slashdotnewsletter _______________________________________________ Lxc-devel mailing list Lxc-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/lxc-devel