13 нояб. 2015 г. 14:58 пользователь Evgenii Shatokhin <eshatok...@odin.com> написал: > > 13.11.2015 16:40, Stanislav Kinsburskiу пишет: > > Acked-by: Stanislav Kinsburskiy <skinsbur...@odin.com> > > > > BTW, have you checked, that this patch works as expected? > > Just to make sure we didn't miss anything. > > I have built the kernel RPMs with it and re-checked that this "mount bomb" > https://github.com/avagin/ctb/blob/master/mounts/run.sh > ends with "cannot allocate memory" and makes no harm. > > I did not test !CONFIG_VE case though. BTW, noone needs our kernel without CONFIG_VE. I suspect, that it's not even compilable without this option.
> > > > > 13 нояб. 2015 г. 14:01 пользователь Evgenii Shatokhin <eshatok...@odin.com> > > написал: > >> > >> https://jira.sw.ru/browse/PSBM-34438 > >> > >> (This fix was adapted from PCS6.) > >> > >> It is possible for a container to create lots of mount points, which may > >> make operations with them slower. As some of these operations take > >> global locks (namespace_sem, vfsmount_lock), it might affect other > >> containers as well. > >> > >> Let us limit the maximum number of mount points a VE may create. The > >> limit can be customized via /proc/sys/fs/ve-mount-nr knob. > >> > >> Changes in v.3: > >> > >> * Revisited VE-specific parts of the patch to reduce the impact on the > >> generic code. > >> > >> Changes in v.2: > >> > >> * The situations where VE0 mounts something and another VE unmounts it > >> seem to be of no concern. If so, it is OK not to alter struct mount: > >> the mount counter for a VE may become unbalanced but this is > >> acceptable here. > >> > >> * The sysctl knob is now defined alongside other VE sysctls. > >> > >> Signed-off-by: Evgenii Shatokhin <eshatok...@odin.com> > >> --- > >> fs/namespace.c | 9 ++++++++- > >> include/linux/ve.h | 27 +++++++++++++++++++++++++++ > >> kernel/ve/ve.c | 2 ++ > >> kernel/ve/veowner.c | 15 +++++++++++++++ > >> 4 files changed, 52 insertions(+), 1 deletion(-) > >> > >> diff --git a/fs/namespace.c b/fs/namespace.c > >> index 8909c13..b4ea5a5 100644 > >> --- a/fs/namespace.c > >> +++ b/fs/namespace.c > >> @@ -165,7 +165,12 @@ unsigned int mnt_get_count(struct mount *mnt) > >> > >> static struct mount *alloc_vfsmnt(const char *name) > >> { > >> - struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); > >> + struct mount *mnt; > >> + > >> + if (!ve_mount_allowed()) > >> + return NULL; > >> + > >> + mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); > >> if (mnt) { > >> int err; > >> > >> @@ -202,6 +207,7 @@ static struct mount *alloc_vfsmnt(const char *name) > >> INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); > >> #endif > >> } > >> + ve_mount_nr_inc(); > >> return mnt; > >> > >> #ifdef CONFIG_SMP > >> @@ -542,6 +548,7 @@ int sb_prepare_remount_readonly(struct super_block > >> *sb) > >> > >> static void free_vfsmnt(struct mount *mnt) > >> { > >> + ve_mount_nr_dec(); > >> kfree(mnt->mnt_devname); > >> mnt_free_id(mnt); > >> #ifdef CONFIG_SMP > >> diff --git a/include/linux/ve.h b/include/linux/ve.h > >> index 86b95c3..1249102 100644 > >> --- a/include/linux/ve.h > >> +++ b/include/linux/ve.h > >> @@ -128,6 +128,10 @@ struct ve_struct { > >> unsigned long aio_nr; > >> unsigned long aio_max_nr; > >> #endif > >> + /* Number of mounts. May become unbalanced if VE0 mounts something > >> + * and the VE unmounts it. This is acceptable. > >> + */ > >> + int mnt_nr; > >> }; > >> > >> struct ve_devmnt { > >> @@ -145,6 +149,8 @@ extern int nr_ve; > >> extern struct proc_dir_entry *proc_vz_dir; > >> extern struct cgroup_subsys ve_subsys; > >> > >> +extern unsigned int sysctl_ve_mount_nr; > >> + > >> #ifdef CONFIG_VE_IPTABLES > >> extern __u64 ve_setup_iptables_mask(__u64 init_mask); > >> #endif > >> @@ -222,6 +228,23 @@ extern struct tty_driver *vtty_console_driver(int > >> *index); > >> extern int vtty_open_master(envid_t veid, int idx); > >> #endif /* CONFIG_TTY */ > >> > >> +static inline int ve_mount_allowed(void) > >> +{ > >> + struct ve_struct *ve = get_exec_env(); > >> + > >> + return ve_is_super(ve) || ve->mnt_nr < sysctl_ve_mount_nr; > >> +} > >> + > >> +static inline void ve_mount_nr_inc(void) > >> +{ > >> + get_exec_env()->mnt_nr++; > >> +} > >> + > >> +static inline void ve_mount_nr_dec(void) > >> +{ > >> + get_exec_env()->mnt_nr--; > >> +} > >> + > >> #else /* CONFIG_VE */ > >> > >> #define ve_uevent_seqnum uevent_seqnum > >> @@ -253,6 +276,10 @@ static inline void monotonic_abs_to_ve(clockid_t > >> which_clock, > >> struct timespec *tp) { } > >> static inline void monotonic_ve_to_abs(clockid_t which_clock, > >> struct timepsec *tp) { } > >> + > >> +static inline int ve_mount_allowed(void) { return 1; } > >> +static inline void ve_mount_nr_inc(void) { } > >> +static inline void ve_mount_nr_dec(void) { } > >> #endif /* CONFIG_VE */ > >> > >> #endif /* _LINUX_VE_H */ > >> diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c > >> index e9219e6..ac2babb 100644 > >> --- a/kernel/ve/ve.c > >> +++ b/kernel/ve/ve.c > >> @@ -82,6 +82,7 @@ struct ve_struct ve0 = { > >> #endif > >> .sched_lat_ve.cur = &ve0_lat_stats, > >> .init_cred = &init_cred, > >> + .mnt_nr = 0, > >> }; > >> EXPORT_SYMBOL(ve0); > >> > >> @@ -653,6 +654,7 @@ do_init: > >> ve->aio_nr = 0; > >> ve->aio_max_nr = AIO_MAX_NR_DEFAULT; > >> #endif > >> + ve->mnt_nr = 0; > >> > >> return &ve->css; > >> > >> diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c > >> index 316e4d0..1a7e735 100644 > >> --- a/kernel/ve/veowner.c > >> +++ b/kernel/ve/veowner.c > >> @@ -55,6 +55,14 @@ static void prepare_proc(void) > >> int ve_xattr_policy = VE_XATTR_POLICY_ACCEPT; > >> static int ve_area_access_check; > >> > >> +/* > >> + * Operations with a big amount of mount points can require a lot of > >> time. > >> + * These operations take the global lock namespace_sem, so they can > >> affect > >> + * other containers. Let us allow no more than sysctl_ve_mount_nr mount > >> + * points for a VE. > >> + */ > >> +unsigned int sysctl_ve_mount_nr = 4096; > >> + > >> static struct ctl_table vz_fs_table[] = { > >> { > >> .procname = "ve-area-access-check", > >> @@ -77,6 +85,13 @@ static struct ctl_table vz_fs_table[] = { > >> .mode = 0644 | S_ISVTX, > >> .proc_handler = &proc_dointvec_virtual, > >> }, > >> + { > >> + .procname = "ve-mount-nr", > >> + .data = &sysctl_ve_mount_nr, > >> + .maxlen = sizeof(sysctl_ve_mount_nr), > >> + .mode = 0644, > >> + .proc_handler = proc_dointvec, > >> + }, > >> { 0 } > >> }; > >> > >> -- > >> 2.3.2 > >> > _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel