The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-123.1.2.vz7.4.10 ------> commit b41a8db9cdf3a598c9abe35cb968b0ab476e8eeb Author: Cyrill Gorcunov <gorcu...@odin.com> Date: Wed May 6 20:34:38 2015 +0400
ve/cgroup: devices -- Modify exception list for docker sake When docker runs up it modifies nested device cgroups. The devices it needs to operate with are almost the same we've had in our exception list already except: 1) Add ACC_MKNOD for every device we have This is harmless operation simply to make docker happy. 2) Add setting up ACC_MKNOD for devices created for container via set_device_perms_ve. At the moment this is important for VT use inside container. 3) Add MISC_MAJOR:200 for tun device Tun/tap is safe to use inside container as far as I know. p.s. khorenko@ approved this kind of change in pcs7. 4) For some reason docker requires write access to /dev/random, grand it (since we're prohibiting writing to /dev/random from inside of ve on kernel level, it's safe to do). v2: - Use ns_capable(CAP_VE_SYS_ADMIN) instead of plain capable(CAP_SYS_ADMIN) for docker sake. Note the vanilla kernel no longer has any can_attach helper, but to make the patch smaller lets keep it. ns_capable should be enough for security, after all the user in container may attach own tasks only. v3: - Use nsown_capable. v4: - Switch back to plain capable test. It turned out that vanilla kernel has no cap test in devcgroup_can_attach (neither it has this helper), while nsown_capable looks like be too relaxed. So I think we could use plain capable() as we do in PCS6 kernel same time requiring CAP_VE_SYS_ADMIN to present inside container. Signed-off-by: Cyrill Gorcunov <gorcu...@odin.com> Acked-by: Konstantin Khorenko <khore...@odin.com> CC: Vladimir Davydov <vdavy...@odin.com> CC: Pavel Emelyanov <xe...@odin.com> CC: Andrey Vagin <ava...@odin.com> --- security/device_cgroup.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 53adb00..31024f7 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -16,6 +16,7 @@ #include <uapi/linux/vzcalluser.h> #include <linux/major.h> #include <linux/module.h> +#include <linux/capability.h> #define ACC_MKNOD 1 #define ACC_READ 2 @@ -80,7 +81,7 @@ static int devcgroup_can_attach(struct cgroup *new_cgrp, { struct task_struct *task = cgroup_taskset_first(set); - if (current != task && !capable(CAP_SYS_ADMIN)) + if (current != task && !capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN)) return -EPERM; return 0; } @@ -662,7 +663,7 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, struct cgroup *p = devcgroup->css.cgroup; struct dev_cgroup *parent = NULL; - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !capable(CAP_VE_SYS_ADMIN)) return -EPERM; if (p->parent) @@ -984,21 +985,22 @@ int devcgroup_inode_mknod(int mode, dev_t dev) #ifdef CONFIG_VE static struct dev_exception_item default_whitelist_items[] = { - { ~0, ~0, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD }, - { ~0, ~0, DEV_BLOCK, ACC_HIDDEN | ACC_MKNOD }, - { UNIX98_PTY_MASTER_MAJOR, ~0, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE }, - { UNIX98_PTY_SLAVE_MAJOR, ~0, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE }, - { PTY_MASTER_MAJOR, ~0, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE }, - { PTY_SLAVE_MAJOR, ~0, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE }, - { MEM_MAJOR, /* null */ 3, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE }, - { MEM_MAJOR, /* zero */ 5, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE }, - { MEM_MAJOR, /* full */ 7, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE }, - { TTYAUX_MAJOR, /* tty */ 0, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE }, - { TTYAUX_MAJOR, /* console */ 1, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE }, - { TTYAUX_MAJOR, /* ptmx */ 2, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE }, - { MEM_MAJOR, /* random */ 8, DEV_CHAR, ACC_HIDDEN | ACC_READ }, - { MEM_MAJOR, /* urandom */ 9, DEV_CHAR, ACC_HIDDEN | ACC_READ | ACC_WRITE }, - { MEM_MAJOR, /* kmsg */ 11, DEV_CHAR, ACC_HIDDEN | ACC_WRITE }, + { ~0, ~0, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD }, + { ~0, ~0, DEV_BLOCK, ACC_HIDDEN | ACC_MKNOD }, + { UNIX98_PTY_MASTER_MAJOR, ~0, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, + { UNIX98_PTY_SLAVE_MAJOR, ~0, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, + { PTY_MASTER_MAJOR, ~0, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, + { PTY_SLAVE_MAJOR, ~0, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, + { MEM_MAJOR, 3, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* null */ + { MEM_MAJOR, 5, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* zero */ + { MEM_MAJOR, 7, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* full */ + { TTYAUX_MAJOR, 0, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* tty */ + { TTYAUX_MAJOR, 1, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* console */ + { TTYAUX_MAJOR, 2, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* ptmx */ + { MEM_MAJOR, 8, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* random */ + { MEM_MAJOR, 9, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* urandom */ + { MEM_MAJOR, 11, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_WRITE }, /* kmsg */ + { MISC_MAJOR, 200, DEV_CHAR, ACC_HIDDEN | ACC_MKNOD | ACC_READ | ACC_WRITE }, /* tun */ }; static LIST_HEAD(default_whitelist); @@ -1069,7 +1071,7 @@ int devcgroup_set_perms_ve(struct cgroup *cgroup, else return -EINVAL; - new.access = decode_ve_perms(mask); + new.access = decode_ve_perms(mask) | (mask ? ACC_MKNOD : 0); new.major = new.minor = ~0; switch (type & VE_USE_MASK) { _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel