On 06.08.2021 17:29, Alexander Mikhalitsyn wrote:
After rebase to RHEL 8.4 binfmt_misc fs uses new fscontext API,
our implementation is incorrect. We have to use get_tree_keyed()
helper instead of get_tree_single() which allows only one
superblock per HN.

Fixes: 90fb0e274 ("ve/fs/binfmt: virtualization")

https://jira.sw.ru/browse/PSBM-132709

Signed-off-by: Alexander Mikhalitsyn <[email protected]>
---
  fs/binfmt_misc.c | 42 ++++++++++++++++++++++++++++++++++++------
  1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c21d03fd3f15..ad1d9d603034 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -833,10 +833,8 @@ static const struct file_operations bm_status_operations = 
{
  static void bm_put_super(struct super_block *sb)
  {
        struct binfmt_misc *bm_data = BINFMT_MISC(sb);
-       struct ve_struct *ve = sb->s_fs_info;
bm_data->enabled = 0;
-       put_ve(ve);
  }
static const struct super_operations s_ops = {
@@ -875,8 +873,6 @@ static int bm_fill_super(struct super_block *sb, struct 
fs_context *fc)
        }
sb->s_op = &s_ops;
-       sb->s_fs_info = ve;
-       get_ve(ve);
bm_data->enabled = 1; @@ -885,10 +881,36 @@ static int bm_fill_super(struct super_block *sb, struct fs_context *fc) static int bm_get_tree(struct fs_context *fc)
  {
-       return get_tree_single(fc, bm_fill_super);
+       struct ve_struct *ve = get_exec_env();
+
+       /*
+        * We need one binfmt_misc superblock per VE,
+        * use get_tree_keyed() helper to get vfs_tree.
+        *
+        * It allows us to find sb by key (in our case ve is the key),
+        * and if it doesn't exists creates new.
+        *
+        * Important: we take ve refcnt here. It will be put
+        * in one of two places:
+        * 1. bm_free_fc() on error path (wrong mnt opt provided for instance)
+        * 2. bm_umount() when sb refcnt becomes zero (last mount umounted)
+        */
+       return get_tree_keyed(fc, bm_fill_super, get_ve(ve));

I can imagine that we can get here as much times as we want (N), each mount syscall would get us here. So in case of no error paths we take N references on same VE but ->kill_sb( would be called only once and we release only one reference. This way VE would likely become stuck forever...

+}
+
+static void bm_free_fc(struct fs_context *fc)
+{
+       /*
+        * fc->s_fs_info will be NULL if *no* error was occured
+        * see fs/super.c sget_fc() helper
+        */
+       if (fc->s_fs_info)
+               put_ve(fc->s_fs_info);
  }
+
  static const struct fs_context_operations bm_context_ops = {
+       .free           = bm_free_fc,
        .get_tree       = bm_get_tree,
  };
@@ -898,6 +920,14 @@ static int bm_init_fs_context(struct fs_context *fc)
        return 0;
  }
+static void bm_umount(struct super_block *sb)
+{
+       struct ve_struct *ve = sb->s_fs_info;
+
+       kill_litter_super(sb);
+       put_ve(ve);
+}
+
  static struct linux_binfmt misc_format = {
        .module = THIS_MODULE,
        .load_binary = load_misc_binary,
@@ -907,7 +937,7 @@ static struct file_system_type bm_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "binfmt_misc",
        .init_fs_context = bm_init_fs_context,
-       .kill_sb        = kill_litter_super,
+       .kill_sb        = bm_umount,
        .fs_flags       = FS_VIRTUALIZED | FS_VE_MOUNT,
  };
  MODULE_ALIAS_FS("binfmt_misc");


--
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.
_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to