On 6/2/26 6:44 PM, Vladimir Riabchun wrote:
In case of !CONFIG_VE we will not return page we will return just data so we still need to check in the paths were page is used and free conditionally otherwise we will have double free. So if we have to check for it I prefer to allocate conditionally too. Its better to minimize memory use for something that is unused.On 6/2/26 16:44, Vasileios Almpanis wrote:In legacy mount callpaths, userspace might pass mount options as flags. These flags escape our checks in ve_devmnt_process allowing devices to be mounted inside containers with options not specified in the allowed field. Introduce helpers that take these flags and already existing tables of flag -> string representation to construct a comma separated value string from them, and append them to userspace provided data. Then pass this string to parse_monolithic_mount_data enforcing the same checks symmetrically in both mount and fsconfig syscalls. In the remount path, run legacy_merge_mount_data() before ve_devmnt_process() so container device mount policy sees MS_* flags from the legacy mount(2) API, not only the user-supplied option string. Keep ve_prepare_mount_options() for legacy parsers that do not use generic_parse_monolithic(). https://virtuozzo.atlassian.net/browse/VSTOR-132330 Signed-off-by: Vasileios Almpanis <[email protected]> Feature: ve: ve generic structures --- Changes since v1: - Replace open-coded flag loops with append_entry() helper (pointer- advancing style) that unifies the comma-insert-copy pattern across all three call sites- Rework legacy_merge_mount_data() to allocate the page upfront, write user data first then append sb flags via vfs_format_sb_flags(); this eliminates the intermediate flags_buf[128], the total size calculation,and the +1/+2 arithmetic - Fix inconsistent error code: replace -EINVAL with -ENOSPC for the buffer-too-small case - Add comment on FS_BINARY_MOUNTDATA explaining why those filesystems are skipped - Add blank line before return in legacy_merge_mount_data() - Remove excess blank line after parse_monolithic_mount_data() in do_remount() fs/fs_context.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/internal.h | 8 ++++ fs/namespace.c | 32 +++++++++++--- 3 files changed, 147 insertions(+), 7 deletions(-) diff --git a/fs/fs_context.c b/fs/fs_context.c index 76f34f3d468e..ae6ea684d596 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c@@ -81,6 +81,120 @@ static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)return -ENOPARAM; } +#ifdef CONFIG_VE +/* + * Append s to the buffer at dst, preceded by a comma if len > 0.+ * cap is the total buffer capacity (excluding the null terminator slot). + * Updates *len and returns the new write position, or ERR_PTR(-ENOSPC).+ */+static char *append_entry(char *dst, size_t *len, size_t cap, const char *s)+{ + size_t s_len = strlen(s); + size_t write_chars = (*len > 0) + s_len; + + if (*len + write_chars > cap) + return ERR_PTR(-ENOSPC); + if (*len) + *dst++ = ','; + memcpy(dst, s, s_len); + *len += write_chars; + return dst + s_len; +} + +/*+ * Append fc->sb_flags as comma-separated option names into [*dstp, cap).+ * Updates *dstp and *lenp on success. + * Matches vfs_parse_sb_flag() / common_{set,clear}_sb_flag tables. + */ +static int vfs_format_sb_flags(struct fs_context *fc, + char **dstp, size_t *lenp, size_t cap) +{ + const struct constant_table *p; + unsigned int flags = fc->sb_flags; + unsigned int mask = fc->sb_flags_mask; + char *dst = *dstp; + + for (p = common_set_sb_flag; p->name; p++) { + if (!(flags & p->value)) + continue; + dst = append_entry(dst, lenp, cap, p->name); + if (IS_ERR(dst)) + return PTR_ERR(dst); + } + + for (p = common_clear_sb_flag; p->name; p++) { + if (!(mask & p->value) || (flags & p->value)) + continue; + dst = append_entry(dst, lenp, cap, p->name); + if (IS_ERR(dst)) + return PTR_ERR(dst); + } + + *dstp = dst; + return 0; +} + +/*+ * For legacy mount(2), MS_* mount flags are folded into fc->sb_flags and are + * not present in the monolithic data string. Build a page with user data + * followed by those flags for ve_devmnt checks in vfs_parse_monolithic_sep.+ *+ * Returns @data when nothing needs to be added, a new page otherwise, or + * ERR_PTR() on failure. The caller must free_page() when the result != @data.+ */ +void *legacy_merge_mount_data(struct fs_context *fc, void *data) +{ + struct ve_struct *ve = get_exec_env(); + char *page, *dst; + size_t len = 0; + int err; +My idea here was to allocate page and copy everything from data here. Something like: page = (char *)__get_free_page(GFP_KERNEL); dst = page; if (!page) return ERR_PTR(-ENOMEM); if (data) dst = append_entry(dst, &len, PAGE_SIZE - 1, (char *)data); and then always return new page.I think it won't be a big overhead - only one temporal page for a big event - mount.
This needs to stay for !CONFIG_VE which just returns data otherwise we will have double-free+ if (ve_is_super(ve)) + return data; + + if (!fc->fs_type || !(fc->fs_type->fs_flags & FS_REQUIRES_DEV)) + return data; + + /* + * Filesystems with binary mount data (e.g. btrfs) bypass option + * string parsing entirely, so our checks cannot apply here. + */ + if (fc->fs_type->fs_flags & FS_BINARY_MOUNTDATA) + return data; + + page = (char *)__get_free_page(GFP_KERNEL); + if (!page) + return ERR_PTR(-ENOMEM); + + dst = page; + + if (data && *(char *)data) { + dst = append_entry(dst, &len, PAGE_SIZE - 1, (char *)data); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + goto err_free; + } + } + + err = vfs_format_sb_flags(fc, &dst, &len, PAGE_SIZE - 1); + if (err) + goto err_free; + + if (!len) {We can just return page, it's fine that it is empty, we already did the hardest part - allocation.+ free_page((unsigned long)page); + return data; + } + + *dst = '\0'; + + return page; + +err_free: + free_page((unsigned long)page); + return ERR_PTR(err); +} +#endif /* CONFIG_VE */ + /** * vfs_parse_fs_param_source - Handle setting "source" via parameter * @fc: The filesystem context to modify diff --git a/fs/internal.h b/fs/internal.h index 0064345cb9d0..120c239df394 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -46,6 +46,14 @@ extern void __init chrdev_init(void); */ extern const struct fs_context_operations legacy_fs_context_ops; extern int parse_monolithic_mount_data(struct fs_context *, void *); +#ifdef CONFIG_VE +void *legacy_merge_mount_data(struct fs_context *fc, void *data); +#else+static inline void *legacy_merge_mount_data(struct fs_context *fc, void *data)+{ + return data; +} +#endif extern void vfs_clean_context(struct fs_context *fc); extern int finish_clean_context(struct fs_context *fc); diff --git a/fs/namespace.c b/fs/namespace.c index acd4507e1247..b2defe2006ca 100644 --- a/fs/namespace.c +++ b/fs/namespace.c@@ -3295,6 +3295,7 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,struct super_block *sb = path->mnt->mnt_sb; struct mount *mnt = real_mount(path->mnt); struct fs_context *fc; + void *mnt_data = NULL; if (!check_mnt(mnt)) return -EINVAL;@@ -3315,13 +3316,17 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,*/ fc->oldapi = true; - err = ve_prepare_mount_options(fc, data); - if (err) { - put_fs_context(fc); - return err; + mnt_data = legacy_merge_mount_data(fc, data); + if (IS_ERR(mnt_data)) { + err = PTR_ERR(mnt_data); + goto err; } - err = parse_monolithic_mount_data(fc, data); + err = ve_prepare_mount_options(fc, mnt_data); + if (err) + goto free_mnt_data; + + err = parse_monolithic_mount_data(fc, mnt_data); if (!err) { down_write(&sb->s_umount); err = -EPERM;@@ -3338,6 +3343,10 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,mnt_warn_timestamp_expiry(path, &mnt->mnt); +free_mnt_data:If we always return new page, we can just do free_page without any checks.
+ if (mnt_data && mnt_data != data) + free_page((unsigned long)mnt_data); +err: put_fs_context(fc); return err; }@@ -3774,8 +3783,17 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,subtype, strlen(subtype)); if (!err && name) err = vfs_parse_fs_string(fc, "source", name, strlen(name)); - if (!err) - err = parse_monolithic_mount_data(fc, data); + if (!err) { + void *mnt_data = legacy_merge_mount_data(fc, data); + + if (IS_ERR(mnt_data)) { + err = PTR_ERR(mnt_data); + } else { + err = parse_monolithic_mount_data(fc, mnt_data); + if (mnt_data != data) + free_page((unsigned long)mnt_data); + } + } if (!err && !mount_capable(fc)) err = -EPERM; if (!err)
-- Best regards, Vasileios Almpanis Software Developer, Virtuozzo. _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
