When freeing a deeply nested user namespace free_user_ns calls
put_user_ns on it's parent which may in turn call free_user_ns again.
When -fno-optimize-sibling-calls is passed to gcc one stack frame per
user namespace is left on the stack, potentially overflowing the
kernel stack.  CONFIG_FRAME_POINTER forces -fno-optimize-sibling-calls
so we can't count on gcc to optimize this code.

Remove struct kref and use a plain atomic_t.  Making the code more
flexible and easier to comprehend.  Make the loop in free_user_ns
explict to guarantee that the stack does not overflow with
CONFIG_FRAME_POINTER enabled.

I have tested this fix with a simple program that uses unshare to
create a deeply nested user namespace structure and then calls exit.
With 1000 nesteuser namespaces before this change running my test
program causes the kernel to die a horrible death.  With 10,000,000
nested user namespaces after this change my test program runs to
completion and causes no harm.

Pointed-out-by: Vasily Kulikov <seg...@openwall.com>
Signed-off-by: "Eric W. Biederman" <ebied...@xmission.com>
---
 include/linux/user_namespace.h |   10 +++++-----
 kernel/user.c                  |    4 +---
 kernel/user_namespace.c        |   17 +++++++++--------
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index b9bd2e6..4ce0093 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -21,7 +21,7 @@ struct user_namespace {
        struct uid_gid_map      uid_map;
        struct uid_gid_map      gid_map;
        struct uid_gid_map      projid_map;
-       struct kref             kref;
+       atomic_t                count;
        struct user_namespace   *parent;
        kuid_t                  owner;
        kgid_t                  group;
@@ -35,18 +35,18 @@ extern struct user_namespace init_user_ns;
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 {
        if (ns)
-               kref_get(&ns->kref);
+               atomic_inc(&ns->count);
        return ns;
 }
 
 extern int create_user_ns(struct cred *new);
 extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred);
-extern void free_user_ns(struct kref *kref);
+extern void free_user_ns(struct user_namespace *ns);
 
 static inline void put_user_ns(struct user_namespace *ns)
 {
-       if (ns)
-               kref_put(&ns->kref, free_user_ns);
+       if (ns && atomic_dec_and_test(&ns->count))
+               free_user_ns(ns);
 }
 
 struct seq_operations;
diff --git a/kernel/user.c b/kernel/user.c
index 33acb5e..57ebfd4 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -47,9 +47,7 @@ struct user_namespace init_user_ns = {
                        .count = 4294967295U,
                },
        },
-       .kref = {
-               .refcount       = ATOMIC_INIT(3),
-       },
+       .count = ATOMIC_INIT(3),
        .owner = GLOBAL_ROOT_UID,
        .group = GLOBAL_ROOT_GID,
        .proc_inum = PROC_USER_INIT_INO,
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 2b042c4..24f8ec3 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -78,7 +78,7 @@ int create_user_ns(struct cred *new)
                return ret;
        }
 
-       kref_init(&ns->kref);
+       atomic_set(&ns->count, 1);
        /* Leave the new->user_ns reference with the new user namespace. */
        ns->parent = parent_ns;
        ns->owner = owner;
@@ -104,15 +104,16 @@ int unshare_userns(unsigned long unshare_flags, struct 
cred **new_cred)
        return create_user_ns(cred);
 }
 
-void free_user_ns(struct kref *kref)
+void free_user_ns(struct user_namespace *ns)
 {
-       struct user_namespace *parent, *ns =
-               container_of(kref, struct user_namespace, kref);
+       struct user_namespace *parent;
 
-       parent = ns->parent;
-       proc_free_inum(ns->proc_inum);
-       kmem_cache_free(user_ns_cachep, ns);
-       put_user_ns(parent);
+       do {
+               parent = ns->parent;
+               proc_free_inum(ns->proc_inum);
+               kmem_cache_free(user_ns_cachep, ns);
+               ns = parent;
+       } while (atomic_dec_and_test(&parent->count));
 }
 EXPORT_SYMBOL(free_user_ns);
 
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to