Currently sync reads 've->root_path.mnt' mount and iterate over it childs.
This doesn't work, because
 a) not all in-container mounts are in childs list of root mount.
 b) ve->root_path.mnt points to incorrect 'struct mount *'.

This patch slightly rework's mounts traversal. Now sync iterates over all
mounts of mount namespaces in ve. List of ve's mount namespaces maintained
via mntns_list.

https://jira.sw.ru/browse/PSBM-44125

Signed-off-by: Andrey Ryabinin <[email protected]>
---
 fs/mount.h         |  1 +
 fs/namespace.c     |  7 +++++++
 fs/sync.c          | 40 +++++++++++++++++++++-------------------
 include/linux/ve.h |  1 +
 kernel/ve/ve.c     |  4 ++++
 5 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/fs/mount.h b/fs/mount.h
index b496064..285484c 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -7,6 +7,7 @@ struct mnt_namespace {
        unsigned int            proc_inum;
        struct mount *  root;
        struct list_head        list;
+       struct list_head        mntns_list;
        struct user_namespace   *user_ns;
        u64                     seq;    /* Sequence number to prevent loops */
        wait_queue_head_t poll;
diff --git a/fs/namespace.c b/fs/namespace.c
index fa9ee9e..8c02d0c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2569,6 +2569,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct 
user_namespace *user_ns)
        atomic_set(&new_ns->count, 1);
        new_ns->root = NULL;
        INIT_LIST_HEAD(&new_ns->list);
+       INIT_LIST_HEAD(&new_ns->mntns_list);
        init_waitqueue_head(&new_ns->poll);
        new_ns->event = 0;
        new_ns->user_ns = get_user_ns(user_ns);
@@ -2609,6 +2610,8 @@ static struct mnt_namespace *dup_mnt_ns(struct 
mnt_namespace *mnt_ns,
        list_add_tail(&new_ns->list, &new->mnt_list);
        br_write_unlock(&vfsmount_lock);
 
+       list_add(&new_ns->mntns_list, &get_exec_env()->mntns_list);
+
        /*
         * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
         * as belonging to new namespace.  We have already acquired a private
@@ -2673,7 +2676,10 @@ static struct mnt_namespace *create_mnt_ns(struct 
vfsmount *m)
                struct mount *mnt = real_mount(m);
                mnt->mnt_ns = new_ns;
                new_ns->root = mnt;
+               down_write(&namespace_sem);
                list_add(&mnt->mnt_list, &new_ns->list);
+               list_add(&new_ns->mntns_list, &get_exec_env()->mntns_list);
+               up_write(&namespace_sem);
        } else {
                mntput(m);
        }
@@ -2954,6 +2960,7 @@ void put_mnt_ns(struct mnt_namespace *ns)
        namespace_lock();
        br_write_lock(&vfsmount_lock);
        umount_tree(ns->root, 0);
+       list_del(&ns->mntns_list);
        br_write_unlock(&vfsmount_lock);
        namespace_unlock();
        free_mnt_ns(ns);
diff --git a/fs/sync.c b/fs/sync.c
index 0eb621f..abb52f5 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -129,33 +129,35 @@ static int sync_filesystem_collected(struct list_head 
*sync_list, struct super_b
 
 static int sync_collect_filesystems(struct ve_struct *ve, struct list_head 
*sync_list)
 {
-       struct mount *root = real_mount(ve->root_path.mnt);
        struct mount *mnt;
+       struct mnt_namespace *mnt_ns;
        struct sync_sb *ss;
        int ret = 0;
 
        BUG_ON(!list_empty(sync_list));
 
        down_read(&namespace_sem);
-       for (mnt = root; mnt; mnt = next_mnt(mnt, root)) {
-               if (sync_filesystem_collected(sync_list, mnt->mnt.mnt_sb))
-                       continue;
-
-               ss = kmalloc(sizeof(*ss), GFP_KERNEL);
-               if (ss == NULL) {
-                       ret = -ENOMEM;
-                       break;
+       list_for_each_entry(mnt_ns, &ve->mntns_list, mntns_list) {
+               list_for_each_entry(mnt, &mnt_ns->list, mnt_list) {
+                       if (sync_filesystem_collected(sync_list, 
mnt->mnt.mnt_sb))
+                               continue;
+
+                       ss = kmalloc(sizeof(*ss), GFP_KERNEL);
+                       if (ss == NULL) {
+                               ret = -ENOMEM;
+                               break;
+                       }
+                       ss->sb = mnt->mnt.mnt_sb;
+                       /*
+                        * We hold mount point and thus can be sure, that 
superblock is
+                        * alive. And it means, that we can safely increase 
it's usage
+                        * counter.
+                        */
+                       spin_lock(&sb_lock);
+                       ss->sb->s_count++;
+                       spin_unlock(&sb_lock);
+                       list_add_tail(&ss->list, sync_list);
                }
-               ss->sb = mnt->mnt.mnt_sb;
-               /*
-                * We hold mount point and thus can be sure, that superblock is
-                * alive. And it means, that we can safely increase it's usage
-                * counter.
-                */
-               spin_lock(&sb_lock);
-               ss->sb->s_count++;
-               spin_unlock(&sb_lock);
-               list_add_tail(&ss->list, sync_list);
        }
        up_read(&namespace_sem);
        return ret;
diff --git a/include/linux/ve.h b/include/linux/ve.h
index e603d9e..243bea1 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -115,6 +115,7 @@ struct ve_struct {
        struct list_head        devmnt_list;
        struct mutex            devmnt_mutex;
 
+       struct list_head        mntns_list;
        struct kmapset_key      ve_sysfs_perms;
 
 #ifdef CONFIG_AIO
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 231f398..ffd55e4 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -80,6 +80,7 @@ struct ve_struct ve0 = {
        .sched_lat_ve.cur       = &ve0_lat_stats,
        .init_cred              = &init_cred,
        .mnt_nr                 = 0,
+       .mntns_list             = LIST_HEAD_INIT(ve0.mntns_list),
 };
 EXPORT_SYMBOL(ve0);
 
@@ -652,6 +653,7 @@ do_init:
        INIT_LIST_HEAD(&ve->devices);
        INIT_LIST_HEAD(&ve->ve_list);
        INIT_LIST_HEAD(&ve->devmnt_list);
+       INIT_LIST_HEAD(&ve->mntns_list);
        mutex_init(&ve->devmnt_mutex);
        kmapset_init_key(&ve->ve_sysfs_perms);
 
@@ -706,6 +708,8 @@ static void ve_destroy(struct cgroup *cg)
        kmapset_unlink(&ve->ve_sysfs_perms, &ve_sysfs_perms);
        free_ve_devmnts(ve);
 
+       WARN_ON(!list_empty(&ve->mntns_list));
+
        ve_log_destroy(ve);
 #if IS_ENABLED(CONFIG_BINFMT_MISC)
        kfree(ve->binfmt_misc);
-- 
2.4.10

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to