Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-07 Thread Serge Hallyn
Quoting Tejun Heo (t...@kernel.org):
> Hello, Serge.
> 
> On Thu, Dec 03, 2015 at 04:47:06PM -0600, Serge E. Hallyn wrote:
> ...
> > +   dentry = dget(sb->s_root);
> > +   if (!kn->parent) // this is the root
> > +   return dentry;
> > +
> > +   knparent = find_kn_ancestor_below(kn, NULL);
> > +   BUG_ON(!knparent);
> 
> Doing WARN_ON() and returning failure is better, I think.  Failing ns
> mount is an okay failure mode and a lot better than crashing the
> system.

Ok - this shouldn't be user-triggerable, so if it happens it really
is a bug in our code, but I'll change it,

> Also, how about find_next_ancestor() for the name of the
> function?

Yeah it's static anyway :)

will change, squash, and resend the set.

> > +   do {
> > +   struct dentry *dtmp;
> > +   struct kernfs_node *kntmp;
> > +
> > +   if (kn == knparent)
> > +   return dentry;
> > +   kntmp = find_kn_ancestor_below(kn, knparent);
> > +   BUG_ON(!kntmp);
> > +   dtmp = lookup_one_len(kntmp->name, dentry, strlen(kntmp->name));
> > +   dput(dentry);
> > +   if (IS_ERR(dtmp))
> > +   return dtmp;
> > +   knparent = kntmp;
> > +   dentry = dtmp;
> > +   } while (1);
> 
> Other than the nitpicks, looks good to me.
> 
> Thanks.
> 
> -- 
> tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-07 Thread Tejun Heo
Hello, Serge.

On Thu, Dec 03, 2015 at 04:47:06PM -0600, Serge E. Hallyn wrote:
...
> + dentry = dget(sb->s_root);
> + if (!kn->parent) // this is the root
> + return dentry;
> +
> + knparent = find_kn_ancestor_below(kn, NULL);
> + BUG_ON(!knparent);

Doing WARN_ON() and returning failure is better, I think.  Failing ns
mount is an okay failure mode and a lot better than crashing the
system.  Also, how about find_next_ancestor() for the name of the
function?

> + do {
> + struct dentry *dtmp;
> + struct kernfs_node *kntmp;
> +
> + if (kn == knparent)
> + return dentry;
> + kntmp = find_kn_ancestor_below(kn, knparent);
> + BUG_ON(!kntmp);
> + dtmp = lookup_one_len(kntmp->name, dentry, strlen(kntmp->name));
> + dput(dentry);
> + if (IS_ERR(dtmp))
> + return dtmp;
> + knparent = kntmp;
> + dentry = dtmp;
> + } while (1);

Other than the nitpicks, looks good to me.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-07 Thread Serge Hallyn
Quoting Tejun Heo (t...@kernel.org):
> Hello, Serge.
> 
> On Thu, Dec 03, 2015 at 04:47:06PM -0600, Serge E. Hallyn wrote:
> ...
> > +   dentry = dget(sb->s_root);
> > +   if (!kn->parent) // this is the root
> > +   return dentry;
> > +
> > +   knparent = find_kn_ancestor_below(kn, NULL);
> > +   BUG_ON(!knparent);
> 
> Doing WARN_ON() and returning failure is better, I think.  Failing ns
> mount is an okay failure mode and a lot better than crashing the
> system.

Ok - this shouldn't be user-triggerable, so if it happens it really
is a bug in our code, but I'll change it,

> Also, how about find_next_ancestor() for the name of the
> function?

Yeah it's static anyway :)

will change, squash, and resend the set.

> > +   do {
> > +   struct dentry *dtmp;
> > +   struct kernfs_node *kntmp;
> > +
> > +   if (kn == knparent)
> > +   return dentry;
> > +   kntmp = find_kn_ancestor_below(kn, knparent);
> > +   BUG_ON(!kntmp);
> > +   dtmp = lookup_one_len(kntmp->name, dentry, strlen(kntmp->name));
> > +   dput(dentry);
> > +   if (IS_ERR(dtmp))
> > +   return dtmp;
> > +   knparent = kntmp;
> > +   dentry = dtmp;
> > +   } while (1);
> 
> Other than the nitpicks, looks good to me.
> 
> Thanks.
> 
> -- 
> tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-07 Thread Tejun Heo
Hello, Serge.

On Thu, Dec 03, 2015 at 04:47:06PM -0600, Serge E. Hallyn wrote:
...
> + dentry = dget(sb->s_root);
> + if (!kn->parent) // this is the root
> + return dentry;
> +
> + knparent = find_kn_ancestor_below(kn, NULL);
> + BUG_ON(!knparent);

Doing WARN_ON() and returning failure is better, I think.  Failing ns
mount is an okay failure mode and a lot better than crashing the
system.  Also, how about find_next_ancestor() for the name of the
function?

> + do {
> + struct dentry *dtmp;
> + struct kernfs_node *kntmp;
> +
> + if (kn == knparent)
> + return dentry;
> + kntmp = find_kn_ancestor_below(kn, knparent);
> + BUG_ON(!kntmp);
> + dtmp = lookup_one_len(kntmp->name, dentry, strlen(kntmp->name));
> + dput(dentry);
> + if (IS_ERR(dtmp))
> + return dtmp;
> + knparent = kntmp;
> + dentry = dtmp;
> + } while (1);

Other than the nitpicks, looks good to me.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-03 Thread Serge E. Hallyn
On Wed, Dec 02, 2015 at 12:05:51PM -0500, Tejun Heo wrote:
> On Wed, Dec 02, 2015 at 11:02:39AM -0600, Serge E. Hallyn wrote:
> > On Wed, Dec 02, 2015 at 11:58:39AM -0500, Tejun Heo wrote:
> > > On Wed, Dec 02, 2015 at 10:56:37AM -0600, Serge E. Hallyn wrote:
> > > > Can it be flushed when we know that the cgroup is being pinned by
> > > > a css_set?  (There's either a task or a cgroup_namespace pinning it
> > > > or we wouldn't get here)
> > > 
> > > Yeap, it can be flushed.  There's no ref coming out of cgroup to the
> > > vfs objects.
> > 
> > Ok, thanks.  Still seems to me to be more work to actually walk the
> > path ourselves, but I'll go that route and see what it looks like :)
> 
> I just dislike having two separate paths instantiating the same
> objects and would prefer doing it the same way userland would do if
> that isn't too complex but yeah it might turn out to be a lot more
> work.
> 
> Thanks a lot!

Here's a patch to make that change.  Seems to be working for me.  If it
looks ok I can fold it into the prevoius patches and resend the new set.

PATCH 1/1] kernfs_obtain_root: switch to walking the path [fold up]

Signed-off-by: Serge Hallyn 
---
 fs/kernfs/mount.c | 80 ---
 1 file changed, 47 insertions(+), 33 deletions(-)

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index cc41fe1..027f4ca 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "kernfs-internal.h"
 
@@ -62,6 +63,27 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block 
*sb)
return NULL;
 }
 
+/*
+ * find the next ancestor in the path down to @child, where @parent was the
+ * parent whose child we want to find.
+ *
+ * Say the path is /a/b/c/d.  @child is d, @parent is NULL.  We return the root
+ * node.  If @parent is b, then we return the node for c.
+ * Passing in d as @parent is not ok.
+ */
+static struct kernfs_node *
+find_kn_ancestor_below(struct kernfs_node *child, struct kernfs_node *parent)
+{
+   BUG_ON(child == parent);
+
+   while (child->parent != parent) {
+   BUG_ON(!child->parent);
+   child = child->parent;
+   }
+
+   return child;
+}
+
 /**
  * kernfs_obtain_root - get a dentry for the given kernfs_node
  * @sb: the kernfs super_block
@@ -74,42 +96,34 @@ struct dentry *kernfs_obtain_root(struct super_block *sb,
  struct kernfs_node *kn)
 {
struct dentry *dentry;
-   struct inode *inode;
+   struct kernfs_node *knparent = NULL;
 
BUG_ON(sb->s_op != _sops);
 
-   /* inode for the given kernfs_node should already exist. */
-   inode = kernfs_get_inode(sb, kn);
-   if (!inode) {
-   pr_debug("kernfs: could not get inode for '");
-   pr_cont_kernfs_path(kn);
-   pr_cont("'.\n");
-   return ERR_PTR(-EINVAL);
-   }
-
-   /* instantiate and link root dentry */
-   dentry = d_obtain_root(inode);
-   if (!dentry) {
-   pr_debug("kernfs: could not get dentry for '");
-   pr_cont_kernfs_path(kn);
-   pr_cont("'.\n");
-   return ERR_PTR(-ENOMEM);
-   }
-
-   /*
-* If this is a new dentry, set it up. We need kernfs_mutex because
-* this may be called by callers other than kernfs_fill_super.
-*/
-   mutex_lock(_mutex);
-   if (!dentry->d_fsdata) {
-   kernfs_get(kn);
-   dentry->d_fsdata = kn;
-   } else {
-   WARN_ON(dentry->d_fsdata != kn);
-   }
-   mutex_unlock(_mutex);
-
-   return dentry;
+   dentry = dget(sb->s_root);
+   if (!kn->parent) // this is the root
+   return dentry;
+
+   knparent = find_kn_ancestor_below(kn, NULL);
+   BUG_ON(!knparent);
+
+   do {
+   struct dentry *dtmp;
+   struct kernfs_node *kntmp;
+
+   if (kn == knparent)
+   return dentry;
+   kntmp = find_kn_ancestor_below(kn, knparent);
+   BUG_ON(!kntmp);
+   dtmp = lookup_one_len(kntmp->name, dentry, strlen(kntmp->name));
+   dput(dentry);
+   if (IS_ERR(dtmp))
+   return dtmp;
+   knparent = kntmp;
+   dentry = dtmp;
+   } while (1);
+
+   // notreached
 }
 
 static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-03 Thread Serge E. Hallyn
On Wed, Dec 02, 2015 at 12:05:51PM -0500, Tejun Heo wrote:
> On Wed, Dec 02, 2015 at 11:02:39AM -0600, Serge E. Hallyn wrote:
> > On Wed, Dec 02, 2015 at 11:58:39AM -0500, Tejun Heo wrote:
> > > On Wed, Dec 02, 2015 at 10:56:37AM -0600, Serge E. Hallyn wrote:
> > > > Can it be flushed when we know that the cgroup is being pinned by
> > > > a css_set?  (There's either a task or a cgroup_namespace pinning it
> > > > or we wouldn't get here)
> > > 
> > > Yeap, it can be flushed.  There's no ref coming out of cgroup to the
> > > vfs objects.
> > 
> > Ok, thanks.  Still seems to me to be more work to actually walk the
> > path ourselves, but I'll go that route and see what it looks like :)
> 
> I just dislike having two separate paths instantiating the same
> objects and would prefer doing it the same way userland would do if
> that isn't too complex but yeah it might turn out to be a lot more
> work.
> 
> Thanks a lot!

Here's a patch to make that change.  Seems to be working for me.  If it
looks ok I can fold it into the prevoius patches and resend the new set.

PATCH 1/1] kernfs_obtain_root: switch to walking the path [fold up]

Signed-off-by: Serge Hallyn 
---
 fs/kernfs/mount.c | 80 ---
 1 file changed, 47 insertions(+), 33 deletions(-)

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index cc41fe1..027f4ca 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "kernfs-internal.h"
 
@@ -62,6 +63,27 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block 
*sb)
return NULL;
 }
 
+/*
+ * find the next ancestor in the path down to @child, where @parent was the
+ * parent whose child we want to find.
+ *
+ * Say the path is /a/b/c/d.  @child is d, @parent is NULL.  We return the root
+ * node.  If @parent is b, then we return the node for c.
+ * Passing in d as @parent is not ok.
+ */
+static struct kernfs_node *
+find_kn_ancestor_below(struct kernfs_node *child, struct kernfs_node *parent)
+{
+   BUG_ON(child == parent);
+
+   while (child->parent != parent) {
+   BUG_ON(!child->parent);
+   child = child->parent;
+   }
+
+   return child;
+}
+
 /**
  * kernfs_obtain_root - get a dentry for the given kernfs_node
  * @sb: the kernfs super_block
@@ -74,42 +96,34 @@ struct dentry *kernfs_obtain_root(struct super_block *sb,
  struct kernfs_node *kn)
 {
struct dentry *dentry;
-   struct inode *inode;
+   struct kernfs_node *knparent = NULL;
 
BUG_ON(sb->s_op != _sops);
 
-   /* inode for the given kernfs_node should already exist. */
-   inode = kernfs_get_inode(sb, kn);
-   if (!inode) {
-   pr_debug("kernfs: could not get inode for '");
-   pr_cont_kernfs_path(kn);
-   pr_cont("'.\n");
-   return ERR_PTR(-EINVAL);
-   }
-
-   /* instantiate and link root dentry */
-   dentry = d_obtain_root(inode);
-   if (!dentry) {
-   pr_debug("kernfs: could not get dentry for '");
-   pr_cont_kernfs_path(kn);
-   pr_cont("'.\n");
-   return ERR_PTR(-ENOMEM);
-   }
-
-   /*
-* If this is a new dentry, set it up. We need kernfs_mutex because
-* this may be called by callers other than kernfs_fill_super.
-*/
-   mutex_lock(_mutex);
-   if (!dentry->d_fsdata) {
-   kernfs_get(kn);
-   dentry->d_fsdata = kn;
-   } else {
-   WARN_ON(dentry->d_fsdata != kn);
-   }
-   mutex_unlock(_mutex);
-
-   return dentry;
+   dentry = dget(sb->s_root);
+   if (!kn->parent) // this is the root
+   return dentry;
+
+   knparent = find_kn_ancestor_below(kn, NULL);
+   BUG_ON(!knparent);
+
+   do {
+   struct dentry *dtmp;
+   struct kernfs_node *kntmp;
+
+   if (kn == knparent)
+   return dentry;
+   kntmp = find_kn_ancestor_below(kn, knparent);
+   BUG_ON(!kntmp);
+   dtmp = lookup_one_len(kntmp->name, dentry, strlen(kntmp->name));
+   dput(dentry);
+   if (IS_ERR(dtmp))
+   return dtmp;
+   knparent = kntmp;
+   dentry = dtmp;
+   } while (1);
+
+   // notreached
 }
 
 static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-02 Thread Tejun Heo
On Wed, Dec 02, 2015 at 11:02:39AM -0600, Serge E. Hallyn wrote:
> On Wed, Dec 02, 2015 at 11:58:39AM -0500, Tejun Heo wrote:
> > On Wed, Dec 02, 2015 at 10:56:37AM -0600, Serge E. Hallyn wrote:
> > > Can it be flushed when we know that the cgroup is being pinned by
> > > a css_set?  (There's either a task or a cgroup_namespace pinning it
> > > or we wouldn't get here)
> > 
> > Yeap, it can be flushed.  There's no ref coming out of cgroup to the
> > vfs objects.
> 
> Ok, thanks.  Still seems to me to be more work to actually walk the
> path ourselves, but I'll go that route and see what it looks like :)

I just dislike having two separate paths instantiating the same
objects and would prefer doing it the same way userland would do if
that isn't too complex but yeah it might turn out to be a lot more
work.

Thanks a lot!

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-02 Thread Serge E. Hallyn
On Wed, Dec 02, 2015 at 11:58:39AM -0500, Tejun Heo wrote:
> On Wed, Dec 02, 2015 at 10:56:37AM -0600, Serge E. Hallyn wrote:
> > Can it be flushed when we know that the cgroup is being pinned by
> > a css_set?  (There's either a task or a cgroup_namespace pinning it
> > or we wouldn't get here)
> 
> Yeap, it can be flushed.  There's no ref coming out of cgroup to the
> vfs objects.

Ok, thanks.  Still seems to me to be more work to actually walk the
path ourselves, but I'll go that route and see what it looks like :)

thanks
-serge
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-02 Thread Tejun Heo
On Wed, Dec 02, 2015 at 10:56:37AM -0600, Serge E. Hallyn wrote:
> Can it be flushed when we know that the cgroup is being pinned by
> a css_set?  (There's either a task or a cgroup_namespace pinning it
> or we wouldn't get here)

Yeap, it can be flushed.  There's no ref coming out of cgroup to the
vfs objects.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-02 Thread Serge E. Hallyn
On Wed, Dec 02, 2015 at 11:53:12AM -0500, Tejun Heo wrote:
> Hello, Serge.
> 
> On Tue, Dec 01, 2015 at 03:58:53PM -0600, Serge E. Hallyn wrote:
> > I mispoke before though - it's not the hierarchy's root dentry,
> > but rather a dentry for a descendent cgroup which will become the
> > root dentry for the new superblock.  We do know that there must be
> > a css_set with a cgroup.  I'm still trying to track down whether
> > that cgrou's inode's dentry can ever be flushed.  I would think
> > not but am not sure.
> 
> Hmmm... I'm not really following.  The inode can be flushed and that's
> why it needs to be walked down from root.  What am I missing here?

Can it be flushed when we know that the cgroup is being pinned by
a css_set?  (There's either a task or a cgroup_namespace pinning it
or we wouldn't get here)

-serge
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-02 Thread Tejun Heo
Hello, Serge.

On Tue, Dec 01, 2015 at 03:58:53PM -0600, Serge E. Hallyn wrote:
> I mispoke before though - it's not the hierarchy's root dentry,
> but rather a dentry for a descendent cgroup which will become the
> root dentry for the new superblock.  We do know that there must be
> a css_set with a cgroup.  I'm still trying to track down whether
> that cgrou's inode's dentry can ever be flushed.  I would think
> not but am not sure.

Hmmm... I'm not really following.  The inode can be flushed and that's
why it needs to be walked down from root.  What am I missing here?

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-02 Thread Tejun Heo
Hello, Serge.

On Tue, Dec 01, 2015 at 03:58:53PM -0600, Serge E. Hallyn wrote:
> I mispoke before though - it's not the hierarchy's root dentry,
> but rather a dentry for a descendent cgroup which will become the
> root dentry for the new superblock.  We do know that there must be
> a css_set with a cgroup.  I'm still trying to track down whether
> that cgrou's inode's dentry can ever be flushed.  I would think
> not but am not sure.

Hmmm... I'm not really following.  The inode can be flushed and that's
why it needs to be walked down from root.  What am I missing here?

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-02 Thread Tejun Heo
On Wed, Dec 02, 2015 at 10:56:37AM -0600, Serge E. Hallyn wrote:
> Can it be flushed when we know that the cgroup is being pinned by
> a css_set?  (There's either a task or a cgroup_namespace pinning it
> or we wouldn't get here)

Yeap, it can be flushed.  There's no ref coming out of cgroup to the
vfs objects.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-02 Thread Serge E. Hallyn
On Wed, Dec 02, 2015 at 11:58:39AM -0500, Tejun Heo wrote:
> On Wed, Dec 02, 2015 at 10:56:37AM -0600, Serge E. Hallyn wrote:
> > Can it be flushed when we know that the cgroup is being pinned by
> > a css_set?  (There's either a task or a cgroup_namespace pinning it
> > or we wouldn't get here)
> 
> Yeap, it can be flushed.  There's no ref coming out of cgroup to the
> vfs objects.

Ok, thanks.  Still seems to me to be more work to actually walk the
path ourselves, but I'll go that route and see what it looks like :)

thanks
-serge
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-02 Thread Tejun Heo
On Wed, Dec 02, 2015 at 11:02:39AM -0600, Serge E. Hallyn wrote:
> On Wed, Dec 02, 2015 at 11:58:39AM -0500, Tejun Heo wrote:
> > On Wed, Dec 02, 2015 at 10:56:37AM -0600, Serge E. Hallyn wrote:
> > > Can it be flushed when we know that the cgroup is being pinned by
> > > a css_set?  (There's either a task or a cgroup_namespace pinning it
> > > or we wouldn't get here)
> > 
> > Yeap, it can be flushed.  There's no ref coming out of cgroup to the
> > vfs objects.
> 
> Ok, thanks.  Still seems to me to be more work to actually walk the
> path ourselves, but I'll go that route and see what it looks like :)

I just dislike having two separate paths instantiating the same
objects and would prefer doing it the same way userland would do if
that isn't too complex but yeah it might turn out to be a lot more
work.

Thanks a lot!

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-02 Thread Serge E. Hallyn
On Wed, Dec 02, 2015 at 11:53:12AM -0500, Tejun Heo wrote:
> Hello, Serge.
> 
> On Tue, Dec 01, 2015 at 03:58:53PM -0600, Serge E. Hallyn wrote:
> > I mispoke before though - it's not the hierarchy's root dentry,
> > but rather a dentry for a descendent cgroup which will become the
> > root dentry for the new superblock.  We do know that there must be
> > a css_set with a cgroup.  I'm still trying to track down whether
> > that cgrou's inode's dentry can ever be flushed.  I would think
> > not but am not sure.
> 
> Hmmm... I'm not really following.  The inode can be flushed and that's
> why it needs to be walked down from root.  What am I missing here?

Can it be flushed when we know that the cgroup is being pinned by
a css_set?  (There's either a task or a cgroup_namespace pinning it
or we wouldn't get here)

-serge
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-01 Thread Serge E. Hallyn
On Tue, Dec 01, 2015 at 11:46:49AM -0500, Tejun Heo wrote:
> Hey, Serge.
> 
> On Mon, Nov 30, 2015 at 10:07:04PM -0600, Serge E. Hallyn wrote:
> > So actually the way the code is now, the first mount cannot
> > be done from a non-init user namespace; and kernfs_obtain_root()
> > is only called from non-init user namespace.  So can we assume
> > that the root dentry will be instantiated?  (or can it get
> > evicted?)
> > 
> > If we can assume that then most of that fn can go away.
> 
> The v2 hierarchy is always mounted and non-init ns shouldn't be able
> to create new v1 hierarchies, so the root dentry should always be
> there.

I mispoke before though - it's not the hierarchy's root dentry,
but rather a dentry for a descendent cgroup which will become the
root dentry for the new superblock.  We do know that there must be
a css_set with a cgroup.  I'm still trying to track down whether
that cgrou's inode's dentry can ever be flushed.  I would think
not but am not sure.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-01 Thread Tejun Heo
Hey, Serge.

On Mon, Nov 30, 2015 at 10:07:04PM -0600, Serge E. Hallyn wrote:
> So actually the way the code is now, the first mount cannot
> be done from a non-init user namespace; and kernfs_obtain_root()
> is only called from non-init user namespace.  So can we assume
> that the root dentry will be instantiated?  (or can it get
> evicted?)
> 
> If we can assume that then most of that fn can go away.

The v2 hierarchy is always mounted and non-init ns shouldn't be able
to create new v1 hierarchies, so the root dentry should always be
there.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-01 Thread Serge E. Hallyn
On Tue, Dec 01, 2015 at 11:46:49AM -0500, Tejun Heo wrote:
> Hey, Serge.
> 
> On Mon, Nov 30, 2015 at 10:07:04PM -0600, Serge E. Hallyn wrote:
> > So actually the way the code is now, the first mount cannot
> > be done from a non-init user namespace; and kernfs_obtain_root()
> > is only called from non-init user namespace.  So can we assume
> > that the root dentry will be instantiated?  (or can it get
> > evicted?)
> > 
> > If we can assume that then most of that fn can go away.
> 
> The v2 hierarchy is always mounted and non-init ns shouldn't be able
> to create new v1 hierarchies, so the root dentry should always be
> there.

I mispoke before though - it's not the hierarchy's root dentry,
but rather a dentry for a descendent cgroup which will become the
root dentry for the new superblock.  We do know that there must be
a css_set with a cgroup.  I'm still trying to track down whether
that cgrou's inode's dentry can ever be flushed.  I would think
not but am not sure.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-12-01 Thread Tejun Heo
Hey, Serge.

On Mon, Nov 30, 2015 at 10:07:04PM -0600, Serge E. Hallyn wrote:
> So actually the way the code is now, the first mount cannot
> be done from a non-init user namespace; and kernfs_obtain_root()
> is only called from non-init user namespace.  So can we assume
> that the root dentry will be instantiated?  (or can it get
> evicted?)
> 
> If we can assume that then most of that fn can go away.

The v2 hierarchy is always mounted and non-init ns shouldn't be able
to create new v1 hierarchies, so the root dentry should always be
there.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-30 Thread Serge E. Hallyn
On Mon, Nov 30, 2015 at 10:09:38AM -0500, Tejun Heo wrote:
> Hello, Serge.
> 
> On Thu, Nov 26, 2015 at 11:17:45PM -0600, Serge E. Hallyn wrote:
> > > Wouldn't it be simpler to walk dentry from kernfs root than
> > > duplicating dentry instantiation?
> > 
> > Sorry I don't think I'm following.  Are you suggesting walking the
> > kn->parent chain backward and doing d_lookup() at each point starting
> > with sb->s_root?
> 
> Yeah, something like that.  I wonder whether there are already code
> paths doing that.  What we need is a straight path walk.  I could be
> wrong but it shouldn't be that complex and if it works out we can
> avoid introducing another instantiation / lookup path.
> 
> Thanks.

So actually the way the code is now, the first mount cannot
be done from a non-init user namespace; and kernfs_obtain_root()
is only called from non-init user namespace.  So can we assume
that the root dentry will be instantiated?  (or can it get
evicted?)

If we can assume that then most of that fn can go away.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-30 Thread Tejun Heo
Hello, Serge.

On Thu, Nov 26, 2015 at 11:17:45PM -0600, Serge E. Hallyn wrote:
> > Wouldn't it be simpler to walk dentry from kernfs root than
> > duplicating dentry instantiation?
> 
> Sorry I don't think I'm following.  Are you suggesting walking the
> kn->parent chain backward and doing d_lookup() at each point starting
> with sb->s_root?

Yeah, something like that.  I wonder whether there are already code
paths doing that.  What we need is a straight path walk.  I could be
wrong but it shouldn't be that complex and if it works out we can
avoid introducing another instantiation / lookup path.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-30 Thread Tejun Heo
Hello, Serge.

On Thu, Nov 26, 2015 at 11:17:45PM -0600, Serge E. Hallyn wrote:
> > Wouldn't it be simpler to walk dentry from kernfs root than
> > duplicating dentry instantiation?
> 
> Sorry I don't think I'm following.  Are you suggesting walking the
> kn->parent chain backward and doing d_lookup() at each point starting
> with sb->s_root?

Yeah, something like that.  I wonder whether there are already code
paths doing that.  What we need is a straight path walk.  I could be
wrong but it shouldn't be that complex and if it works out we can
avoid introducing another instantiation / lookup path.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-30 Thread Serge E. Hallyn
On Mon, Nov 30, 2015 at 10:09:38AM -0500, Tejun Heo wrote:
> Hello, Serge.
> 
> On Thu, Nov 26, 2015 at 11:17:45PM -0600, Serge E. Hallyn wrote:
> > > Wouldn't it be simpler to walk dentry from kernfs root than
> > > duplicating dentry instantiation?
> > 
> > Sorry I don't think I'm following.  Are you suggesting walking the
> > kn->parent chain backward and doing d_lookup() at each point starting
> > with sb->s_root?
> 
> Yeah, something like that.  I wonder whether there are already code
> paths doing that.  What we need is a straight path walk.  I could be
> wrong but it shouldn't be that complex and if it works out we can
> avoid introducing another instantiation / lookup path.
> 
> Thanks.

So actually the way the code is now, the first mount cannot
be done from a non-init user namespace; and kernfs_obtain_root()
is only called from non-init user namespace.  So can we assume
that the root dentry will be instantiated?  (or can it get
evicted?)

If we can assume that then most of that fn can go away.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-26 Thread Serge E. Hallyn
On Tue, Nov 24, 2015 at 12:16:10PM -0500, Tejun Heo wrote:
> Hello,
> 
> On Mon, Nov 16, 2015 at 01:51:44PM -0600, se...@hallyn.com wrote:
> > +struct dentry *kernfs_obtain_root(struct super_block *sb,
> > + struct kernfs_node *kn)
> > +{
> > +   struct dentry *dentry;
> > +   struct inode *inode;
> > +
> > +   BUG_ON(sb->s_op != _sops);
> > +
> > +   /* inode for the given kernfs_node should already exist. */
> > +   inode = ilookup(sb, kn->ino);
> > +   if (!inode) {
> > +   pr_debug("kernfs: could not get inode for '");
> > +   pr_cont_kernfs_path(kn);
> > +   pr_cont("'.\n");
> > +   return ERR_PTR(-EINVAL);
> > +   }
> 
> Hmmm... but inode might not have been instantiated yet.  Why not use
> kernfs_get_inode()?
> 
> > +   /* instantiate and link root dentry */
> > +   dentry = d_obtain_root(inode);
> > +   if (!dentry) {
> > +   pr_debug("kernfs: could not get dentry for '");
> > +   pr_cont_kernfs_path(kn);
> > +   pr_cont("'.\n");
> > +   return ERR_PTR(-ENOMEM);
> > +   }
> > +
> > +   /* If this is a new dentry, set it up. We need kernfs_mutex because this
> > +* may be called by callers other than kernfs_fill_super. */
> 
> Formatting.
> 
> > +   mutex_lock(_mutex);
> > +   if (!dentry->d_fsdata) {
> > +   kernfs_get(kn);
> > +   dentry->d_fsdata = kn;
> > +   } else {
> > +   WARN_ON(dentry->d_fsdata != kn);
> > +   }
> > +   mutex_unlock(_mutex);
> > +
> > +   return dentry;
> > +}
> 
> Wouldn't it be simpler to walk dentry from kernfs root than
> duplicating dentry instantiation?

Sorry I don't think I'm following.  Are you suggesting walking the
kn->parent chain backward and doing d_lookup() at each point starting
with sb->s_root?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-26 Thread Serge E. Hallyn
On Tue, Nov 24, 2015 at 12:16:10PM -0500, Tejun Heo wrote:
> Hello,
> 
> On Mon, Nov 16, 2015 at 01:51:44PM -0600, se...@hallyn.com wrote:
> > +struct dentry *kernfs_obtain_root(struct super_block *sb,
> > + struct kernfs_node *kn)
> > +{
> > +   struct dentry *dentry;
> > +   struct inode *inode;
> > +
> > +   BUG_ON(sb->s_op != _sops);
> > +
> > +   /* inode for the given kernfs_node should already exist. */
> > +   inode = ilookup(sb, kn->ino);
> > +   if (!inode) {
> > +   pr_debug("kernfs: could not get inode for '");
> > +   pr_cont_kernfs_path(kn);
> > +   pr_cont("'.\n");
> > +   return ERR_PTR(-EINVAL);
> > +   }
> 
> Hmmm... but inode might not have been instantiated yet.  Why not use
> kernfs_get_inode()?
> 
> > +   /* instantiate and link root dentry */
> > +   dentry = d_obtain_root(inode);
> > +   if (!dentry) {
> > +   pr_debug("kernfs: could not get dentry for '");
> > +   pr_cont_kernfs_path(kn);
> > +   pr_cont("'.\n");
> > +   return ERR_PTR(-ENOMEM);
> > +   }
> > +
> > +   /* If this is a new dentry, set it up. We need kernfs_mutex because this
> > +* may be called by callers other than kernfs_fill_super. */
> 
> Formatting.
> 
> > +   mutex_lock(_mutex);
> > +   if (!dentry->d_fsdata) {
> > +   kernfs_get(kn);
> > +   dentry->d_fsdata = kn;
> > +   } else {
> > +   WARN_ON(dentry->d_fsdata != kn);
> > +   }
> > +   mutex_unlock(_mutex);
> > +
> > +   return dentry;
> > +}
> 
> Wouldn't it be simpler to walk dentry from kernfs root than
> duplicating dentry instantiation?

Sorry I don't think I'm following.  Are you suggesting walking the
kn->parent chain backward and doing d_lookup() at each point starting
with sb->s_root?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-25 Thread Tejun Heo
On Wed, Nov 25, 2015 at 07:55:53PM +, Serge Hallyn wrote:
> Quoting Tejun Heo (t...@kernel.org):
> > Hello, Serge.
> > 
> > On Wed, Nov 25, 2015 at 12:01:56AM -0600, Serge E. Hallyn wrote:
> > > that was my goal with 
> > > https://git.kernel.org/cgit/linux/kernel/git/sergeh/linux-security.git/commit/?h=cgroupns.v4=8eb75d2bb24df59e262f050dce567d2332adc5f3
> > > (which was sent inline earlier in this thread in response to Eric)  Does
> > > that look sufficient?
> > 
> > Hmmm... but that wouldn't work with non-root and user ns.  I think
> 
> Are you sure?  IIUC that code block is only hit when we didn't find
> an already-mounted subsystem.

Heh, you're right.  This should work.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-25 Thread Serge Hallyn
Quoting Tejun Heo (t...@kernel.org):
> Hello, Serge.
> 
> On Wed, Nov 25, 2015 at 12:01:56AM -0600, Serge E. Hallyn wrote:
> > that was my goal with 
> > https://git.kernel.org/cgit/linux/kernel/git/sergeh/linux-security.git/commit/?h=cgroupns.v4=8eb75d2bb24df59e262f050dce567d2332adc5f3
> > (which was sent inline earlier in this thread in response to Eric)  Does
> > that look sufficient?
> 
> Hmmm... but that wouldn't work with non-root and user ns.  I think

Are you sure?  IIUC that code block is only hit when we didn't find
an already-mounted subsystem.

> what's necessary is ensuring that namespace scoped mount never creates
> a new hierarchy but always reuses an existing one.
> 
> Thanks.
> 
> -- 
> tejun
> ___
> Containers mailing list
> contain...@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-25 Thread Tejun Heo
Hello, Serge.

On Wed, Nov 25, 2015 at 12:01:56AM -0600, Serge E. Hallyn wrote:
> that was my goal with 
> https://git.kernel.org/cgit/linux/kernel/git/sergeh/linux-security.git/commit/?h=cgroupns.v4=8eb75d2bb24df59e262f050dce567d2332adc5f3
> (which was sent inline earlier in this thread in response to Eric)  Does
> that look sufficient?

Hmmm... but that wouldn't work with non-root and user ns.  I think
what's necessary is ensuring that namespace scoped mount never creates
a new hierarchy but always reuses an existing one.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-25 Thread Tejun Heo
Hello, Serge.

On Wed, Nov 25, 2015 at 12:01:56AM -0600, Serge E. Hallyn wrote:
> that was my goal with 
> https://git.kernel.org/cgit/linux/kernel/git/sergeh/linux-security.git/commit/?h=cgroupns.v4=8eb75d2bb24df59e262f050dce567d2332adc5f3
> (which was sent inline earlier in this thread in response to Eric)  Does
> that look sufficient?

Hmmm... but that wouldn't work with non-root and user ns.  I think
what's necessary is ensuring that namespace scoped mount never creates
a new hierarchy but always reuses an existing one.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-25 Thread Serge Hallyn
Quoting Tejun Heo (t...@kernel.org):
> Hello, Serge.
> 
> On Wed, Nov 25, 2015 at 12:01:56AM -0600, Serge E. Hallyn wrote:
> > that was my goal with 
> > https://git.kernel.org/cgit/linux/kernel/git/sergeh/linux-security.git/commit/?h=cgroupns.v4=8eb75d2bb24df59e262f050dce567d2332adc5f3
> > (which was sent inline earlier in this thread in response to Eric)  Does
> > that look sufficient?
> 
> Hmmm... but that wouldn't work with non-root and user ns.  I think

Are you sure?  IIUC that code block is only hit when we didn't find
an already-mounted subsystem.

> what's necessary is ensuring that namespace scoped mount never creates
> a new hierarchy but always reuses an existing one.
> 
> Thanks.
> 
> -- 
> tejun
> ___
> Containers mailing list
> contain...@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/containers
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-25 Thread Tejun Heo
On Wed, Nov 25, 2015 at 07:55:53PM +, Serge Hallyn wrote:
> Quoting Tejun Heo (t...@kernel.org):
> > Hello, Serge.
> > 
> > On Wed, Nov 25, 2015 at 12:01:56AM -0600, Serge E. Hallyn wrote:
> > > that was my goal with 
> > > https://git.kernel.org/cgit/linux/kernel/git/sergeh/linux-security.git/commit/?h=cgroupns.v4=8eb75d2bb24df59e262f050dce567d2332adc5f3
> > > (which was sent inline earlier in this thread in response to Eric)  Does
> > > that look sufficient?
> > 
> > Hmmm... but that wouldn't work with non-root and user ns.  I think
> 
> Are you sure?  IIUC that code block is only hit when we didn't find
> an already-mounted subsystem.

Heh, you're right.  This should work.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-24 Thread Serge E. Hallyn
On Tue, Nov 24, 2015 at 12:16:10PM -0500, Tejun Heo wrote:
...
> > +   if (ns != _cgroup_ns) {
> > +   struct dentry *nsdentry;
> > +   struct cgroup *cgrp;
> > +
> > +   cgrp = cset_cgroup_from_root(ns->root_cgrps, root);
> > +   nsdentry = kernfs_obtain_root(dentry->d_sb,
> > +   cgrp->kn);
> > +   dput(dentry);
> > +   dentry = nsdentry;
> > +   }
> > +   }
> 
> So, this would effectively allow namespace mounts to claim controllers
> which aren't configured otherwise which doesn't seem like a good idea.
> I think the right thing to do for namespace mounts is to always
> require an existing superblock.

that was my goal with 
https://git.kernel.org/cgit/linux/kernel/git/sergeh/linux-security.git/commit/?h=cgroupns.v4=8eb75d2bb24df59e262f050dce567d2332adc5f3
(which was sent inline earlier in this thread in response to Eric)  Does
that look sufficient?

thanks,
-serge
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-24 Thread Tejun Heo
Hello,

On Mon, Nov 16, 2015 at 01:51:44PM -0600, se...@hallyn.com wrote:
> +struct dentry *kernfs_obtain_root(struct super_block *sb,
> +   struct kernfs_node *kn)
> +{
> + struct dentry *dentry;
> + struct inode *inode;
> +
> + BUG_ON(sb->s_op != _sops);
> +
> + /* inode for the given kernfs_node should already exist. */
> + inode = ilookup(sb, kn->ino);
> + if (!inode) {
> + pr_debug("kernfs: could not get inode for '");
> + pr_cont_kernfs_path(kn);
> + pr_cont("'.\n");
> + return ERR_PTR(-EINVAL);
> + }

Hmmm... but inode might not have been instantiated yet.  Why not use
kernfs_get_inode()?

> + /* instantiate and link root dentry */
> + dentry = d_obtain_root(inode);
> + if (!dentry) {
> + pr_debug("kernfs: could not get dentry for '");
> + pr_cont_kernfs_path(kn);
> + pr_cont("'.\n");
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + /* If this is a new dentry, set it up. We need kernfs_mutex because this
> +  * may be called by callers other than kernfs_fill_super. */

Formatting.

> + mutex_lock(_mutex);
> + if (!dentry->d_fsdata) {
> + kernfs_get(kn);
> + dentry->d_fsdata = kn;
> + } else {
> + WARN_ON(dentry->d_fsdata != kn);
> + }
> + mutex_unlock(_mutex);
> +
> + return dentry;
> +}

Wouldn't it be simpler to walk dentry from kernfs root than
duplicating dentry instantiation?

> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 1d696de..0a3e893 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -2112,11 +2120,31 @@ out_free:
>   kfree(opts.release_agent);
>   kfree(opts.name);
>  
> - if (ret)
> + if (ret) {
> + put_cgroup_ns(ns);
>   return ERR_PTR(ret);
> + }
>  
>   dentry = kernfs_mount(fs_type, flags, root->kf_root,
>   CGROUP_SUPER_MAGIC, _sb);
> +
> + if (!IS_ERR(dentry)) {
> + /* In non-init cgroup namespace, instead of root cgroup's
> +  * dentry, we return the dentry corresponding to the
> +  * cgroupns->root_cgrp.
> +  */

Formatting.

> + if (ns != _cgroup_ns) {
> + struct dentry *nsdentry;
> + struct cgroup *cgrp;
> +
> + cgrp = cset_cgroup_from_root(ns->root_cgrps, root);
> + nsdentry = kernfs_obtain_root(dentry->d_sb,
> + cgrp->kn);
> + dput(dentry);
> + dentry = nsdentry;
> + }
> + }

So, this would effectively allow namespace mounts to claim controllers
which aren't configured otherwise which doesn't seem like a good idea.
I think the right thing to do for namespace mounts is to always
require an existing superblock.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-24 Thread Tejun Heo
Hello,

On Mon, Nov 16, 2015 at 01:51:44PM -0600, se...@hallyn.com wrote:
> +struct dentry *kernfs_obtain_root(struct super_block *sb,
> +   struct kernfs_node *kn)
> +{
> + struct dentry *dentry;
> + struct inode *inode;
> +
> + BUG_ON(sb->s_op != _sops);
> +
> + /* inode for the given kernfs_node should already exist. */
> + inode = ilookup(sb, kn->ino);
> + if (!inode) {
> + pr_debug("kernfs: could not get inode for '");
> + pr_cont_kernfs_path(kn);
> + pr_cont("'.\n");
> + return ERR_PTR(-EINVAL);
> + }

Hmmm... but inode might not have been instantiated yet.  Why not use
kernfs_get_inode()?

> + /* instantiate and link root dentry */
> + dentry = d_obtain_root(inode);
> + if (!dentry) {
> + pr_debug("kernfs: could not get dentry for '");
> + pr_cont_kernfs_path(kn);
> + pr_cont("'.\n");
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + /* If this is a new dentry, set it up. We need kernfs_mutex because this
> +  * may be called by callers other than kernfs_fill_super. */

Formatting.

> + mutex_lock(_mutex);
> + if (!dentry->d_fsdata) {
> + kernfs_get(kn);
> + dentry->d_fsdata = kn;
> + } else {
> + WARN_ON(dentry->d_fsdata != kn);
> + }
> + mutex_unlock(_mutex);
> +
> + return dentry;
> +}

Wouldn't it be simpler to walk dentry from kernfs root than
duplicating dentry instantiation?

> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index 1d696de..0a3e893 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -2112,11 +2120,31 @@ out_free:
>   kfree(opts.release_agent);
>   kfree(opts.name);
>  
> - if (ret)
> + if (ret) {
> + put_cgroup_ns(ns);
>   return ERR_PTR(ret);
> + }
>  
>   dentry = kernfs_mount(fs_type, flags, root->kf_root,
>   CGROUP_SUPER_MAGIC, _sb);
> +
> + if (!IS_ERR(dentry)) {
> + /* In non-init cgroup namespace, instead of root cgroup's
> +  * dentry, we return the dentry corresponding to the
> +  * cgroupns->root_cgrp.
> +  */

Formatting.

> + if (ns != _cgroup_ns) {
> + struct dentry *nsdentry;
> + struct cgroup *cgrp;
> +
> + cgrp = cset_cgroup_from_root(ns->root_cgrps, root);
> + nsdentry = kernfs_obtain_root(dentry->d_sb,
> + cgrp->kn);
> + dput(dentry);
> + dentry = nsdentry;
> + }
> + }

So, this would effectively allow namespace mounts to claim controllers
which aren't configured otherwise which doesn't seem like a good idea.
I think the right thing to do for namespace mounts is to always
require an existing superblock.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-24 Thread Serge E. Hallyn
On Tue, Nov 24, 2015 at 12:16:10PM -0500, Tejun Heo wrote:
...
> > +   if (ns != _cgroup_ns) {
> > +   struct dentry *nsdentry;
> > +   struct cgroup *cgrp;
> > +
> > +   cgrp = cset_cgroup_from_root(ns->root_cgrps, root);
> > +   nsdentry = kernfs_obtain_root(dentry->d_sb,
> > +   cgrp->kn);
> > +   dput(dentry);
> > +   dentry = nsdentry;
> > +   }
> > +   }
> 
> So, this would effectively allow namespace mounts to claim controllers
> which aren't configured otherwise which doesn't seem like a good idea.
> I think the right thing to do for namespace mounts is to always
> require an existing superblock.

that was my goal with 
https://git.kernel.org/cgit/linux/kernel/git/sergeh/linux-security.git/commit/?h=cgroupns.v4=8eb75d2bb24df59e262f050dce567d2332adc5f3
(which was sent inline earlier in this thread in response to Eric)  Does
that look sufficient?

thanks,
-serge
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-16 Thread serge
From: Aditya Kali 

This patch enables cgroup mounting inside userns when a process
as appropriate privileges. The cgroup filesystem mounted is
rooted at the cgroupns-root. Thus, in a container-setup, only
the hierarchy under the cgroupns-root is exposed inside the container.
This allows container management tools to run inside the containers
without depending on any global state.
In order to support this, a new kernfs api is added to lookup the
dentry for the cgroupns-root.

Signed-off-by: Aditya Kali 
Acked-by: Serge E. Hallyn 
---
 fs/kernfs/mount.c  |   48 
 include/linux/kernfs.h |2 ++
 kernel/cgroup.c|   32 +++-
 3 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 8eaf417..64613864 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -62,6 +62,54 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block 
*sb)
return NULL;
 }
 
+/**
+ * kernfs_obtain_root - get a dentry for the given kernfs_node
+ * @sb: the kernfs super_block
+ * @kn: kernfs_node for which a dentry is needed
+ *
+ * This can used used by callers which want to mount only a part of the kernfs
+ * as root of the filesystem.
+ */
+struct dentry *kernfs_obtain_root(struct super_block *sb,
+ struct kernfs_node *kn)
+{
+   struct dentry *dentry;
+   struct inode *inode;
+
+   BUG_ON(sb->s_op != _sops);
+
+   /* inode for the given kernfs_node should already exist. */
+   inode = ilookup(sb, kn->ino);
+   if (!inode) {
+   pr_debug("kernfs: could not get inode for '");
+   pr_cont_kernfs_path(kn);
+   pr_cont("'.\n");
+   return ERR_PTR(-EINVAL);
+   }
+
+   /* instantiate and link root dentry */
+   dentry = d_obtain_root(inode);
+   if (!dentry) {
+   pr_debug("kernfs: could not get dentry for '");
+   pr_cont_kernfs_path(kn);
+   pr_cont("'.\n");
+   return ERR_PTR(-ENOMEM);
+   }
+
+   /* If this is a new dentry, set it up. We need kernfs_mutex because this
+* may be called by callers other than kernfs_fill_super. */
+   mutex_lock(_mutex);
+   if (!dentry->d_fsdata) {
+   kernfs_get(kn);
+   dentry->d_fsdata = kn;
+   } else {
+   WARN_ON(dentry->d_fsdata != kn);
+   }
+   mutex_unlock(_mutex);
+
+   return dentry;
+}
+
 static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
 {
struct kernfs_super_info *info = kernfs_info(sb);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index d025ebd..1903777 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -284,6 +284,8 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry 
*dentry);
 struct kernfs_root *kernfs_root_from_sb(struct super_block *sb);
 struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn);
 
+struct dentry *kernfs_obtain_root(struct super_block *sb,
+ struct kernfs_node *kn);
 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
   unsigned int flags, void *priv);
 void kernfs_destroy_root(struct kernfs_root *root);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1d696de..0a3e893 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1980,6 +1980,14 @@ static struct dentry *cgroup_mount(struct 
file_system_type *fs_type,
int ret;
int i;
bool new_sb;
+   struct cgroup_namespace *ns =
+   get_cgroup_ns(current->nsproxy->cgroup_ns);
+
+   /* Check if the caller has permission to mount. */
+   if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
+   put_cgroup_ns(ns);
+   return ERR_PTR(-EPERM);
+   }
 
/*
 * The first time anyone tries to mount a cgroup, enable the list
@@ -2112,11 +2120,31 @@ out_free:
kfree(opts.release_agent);
kfree(opts.name);
 
-   if (ret)
+   if (ret) {
+   put_cgroup_ns(ns);
return ERR_PTR(ret);
+   }
 
dentry = kernfs_mount(fs_type, flags, root->kf_root,
CGROUP_SUPER_MAGIC, _sb);
+
+   if (!IS_ERR(dentry)) {
+   /* In non-init cgroup namespace, instead of root cgroup's
+* dentry, we return the dentry corresponding to the
+* cgroupns->root_cgrp.
+*/
+   if (ns != _cgroup_ns) {
+   struct dentry *nsdentry;
+   struct cgroup *cgrp;
+
+   cgrp = cset_cgroup_from_root(ns->root_cgrps, root);
+   nsdentry = kernfs_obtain_root(dentry->d_sb,
+   cgrp->kn);
+   dput(dentry);
+   dentry = 

[PATCH 7/8] cgroup: mount cgroupns-root when inside non-init cgroupns

2015-11-16 Thread serge
From: Aditya Kali 

This patch enables cgroup mounting inside userns when a process
as appropriate privileges. The cgroup filesystem mounted is
rooted at the cgroupns-root. Thus, in a container-setup, only
the hierarchy under the cgroupns-root is exposed inside the container.
This allows container management tools to run inside the containers
without depending on any global state.
In order to support this, a new kernfs api is added to lookup the
dentry for the cgroupns-root.

Signed-off-by: Aditya Kali 
Acked-by: Serge E. Hallyn 
---
 fs/kernfs/mount.c  |   48 
 include/linux/kernfs.h |2 ++
 kernel/cgroup.c|   32 +++-
 3 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 8eaf417..64613864 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -62,6 +62,54 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block 
*sb)
return NULL;
 }
 
+/**
+ * kernfs_obtain_root - get a dentry for the given kernfs_node
+ * @sb: the kernfs super_block
+ * @kn: kernfs_node for which a dentry is needed
+ *
+ * This can used used by callers which want to mount only a part of the kernfs
+ * as root of the filesystem.
+ */
+struct dentry *kernfs_obtain_root(struct super_block *sb,
+ struct kernfs_node *kn)
+{
+   struct dentry *dentry;
+   struct inode *inode;
+
+   BUG_ON(sb->s_op != _sops);
+
+   /* inode for the given kernfs_node should already exist. */
+   inode = ilookup(sb, kn->ino);
+   if (!inode) {
+   pr_debug("kernfs: could not get inode for '");
+   pr_cont_kernfs_path(kn);
+   pr_cont("'.\n");
+   return ERR_PTR(-EINVAL);
+   }
+
+   /* instantiate and link root dentry */
+   dentry = d_obtain_root(inode);
+   if (!dentry) {
+   pr_debug("kernfs: could not get dentry for '");
+   pr_cont_kernfs_path(kn);
+   pr_cont("'.\n");
+   return ERR_PTR(-ENOMEM);
+   }
+
+   /* If this is a new dentry, set it up. We need kernfs_mutex because this
+* may be called by callers other than kernfs_fill_super. */
+   mutex_lock(_mutex);
+   if (!dentry->d_fsdata) {
+   kernfs_get(kn);
+   dentry->d_fsdata = kn;
+   } else {
+   WARN_ON(dentry->d_fsdata != kn);
+   }
+   mutex_unlock(_mutex);
+
+   return dentry;
+}
+
 static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
 {
struct kernfs_super_info *info = kernfs_info(sb);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index d025ebd..1903777 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -284,6 +284,8 @@ struct kernfs_node *kernfs_node_from_dentry(struct dentry 
*dentry);
 struct kernfs_root *kernfs_root_from_sb(struct super_block *sb);
 struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn);
 
+struct dentry *kernfs_obtain_root(struct super_block *sb,
+ struct kernfs_node *kn);
 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
   unsigned int flags, void *priv);
 void kernfs_destroy_root(struct kernfs_root *root);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1d696de..0a3e893 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1980,6 +1980,14 @@ static struct dentry *cgroup_mount(struct 
file_system_type *fs_type,
int ret;
int i;
bool new_sb;
+   struct cgroup_namespace *ns =
+   get_cgroup_ns(current->nsproxy->cgroup_ns);
+
+   /* Check if the caller has permission to mount. */
+   if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
+   put_cgroup_ns(ns);
+   return ERR_PTR(-EPERM);
+   }
 
/*
 * The first time anyone tries to mount a cgroup, enable the list
@@ -2112,11 +2120,31 @@ out_free:
kfree(opts.release_agent);
kfree(opts.name);
 
-   if (ret)
+   if (ret) {
+   put_cgroup_ns(ns);
return ERR_PTR(ret);
+   }
 
dentry = kernfs_mount(fs_type, flags, root->kf_root,
CGROUP_SUPER_MAGIC, _sb);
+
+   if (!IS_ERR(dentry)) {
+   /* In non-init cgroup namespace, instead of root cgroup's
+* dentry, we return the dentry corresponding to the
+* cgroupns->root_cgrp.
+*/
+   if (ns != _cgroup_ns) {
+   struct dentry *nsdentry;
+   struct cgroup *cgrp;
+
+   cgrp = cset_cgroup_from_root(ns->root_cgrps, root);
+   nsdentry = kernfs_obtain_root(dentry->d_sb,
+   cgrp->kn);
+