Re: [PATCH v5 1/2] cgroup: fix cgroup_path() vs rename() race

2013-03-04 Thread Tejun Heo
On Fri, Mar 01, 2013 at 03:01:56PM +0800, Li Zefan wrote:
> rename() will change dentry->d_name. The result of this race can
> be worse than seeing partially rewritten name, but we might access
> a stale pointer because rename() will re-allocate memory to hold
> a longer name.
> 
> As accessing dentry->name must be protected by dentry->d_lock or
> parent inode's i_mutex, while on the other hand cgroup-path() can
> be called with some irq-safe spinlocks held, we can't generate
> cgroup path using dentry->d_name.
> 
> Alternatively we make a copy of dentry->d_name and save it in
> cgrp->name when a cgroup is created, and update cgrp->name at
> rename().
> 
> v5: use flexible array instead of zero-size array.
> v4: - allocate root_cgroup_name and all root_cgroup->name points to it.
> - add cgroup_name() wrapper.
> v3: use kfree_rcu() instead of synchronize_rcu() in user-visible path.
> v2: make cgrp->name RCU safe.
> 
> Signed-off-by: Li Zefan 

Applied 1-2 to cgroup/for-3.10.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v5 1/2] cgroup: fix cgroup_path() vs rename() race

2013-02-28 Thread Li Zefan
rename() will change dentry->d_name. The result of this race can
be worse than seeing partially rewritten name, but we might access
a stale pointer because rename() will re-allocate memory to hold
a longer name.

As accessing dentry->name must be protected by dentry->d_lock or
parent inode's i_mutex, while on the other hand cgroup-path() can
be called with some irq-safe spinlocks held, we can't generate
cgroup path using dentry->d_name.

Alternatively we make a copy of dentry->d_name and save it in
cgrp->name when a cgroup is created, and update cgrp->name at
rename().

v5: use flexible array instead of zero-size array.
v4: - allocate root_cgroup_name and all root_cgroup->name points to it.
- add cgroup_name() wrapper.
v3: use kfree_rcu() instead of synchronize_rcu() in user-visible path.
v2: make cgrp->name RCU safe.

Signed-off-by: Li Zefan 
---
 block/blk-cgroup.h |   2 -
 include/linux/cgroup.h |  24 +++
 kernel/cgroup.c| 106 +++--
 3 files changed, 100 insertions(+), 32 deletions(-)

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 2459730..e2e3404 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -216,9 +216,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char 
*buf, int buflen)
 {
int ret;
 
-   rcu_read_lock();
ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
-   rcu_read_unlock();
if (ret)
strncpy(buf, "", buflen);
return ret;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 900af59..75c6ec1 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -150,6 +150,11 @@ enum {
CGRP_CPUSET_CLONE_CHILDREN,
 };
 
+struct cgroup_name {
+   struct rcu_head rcu_head;
+   char name[];
+};
+
 struct cgroup {
unsigned long flags;/* "unsigned long" so bitops work */
 
@@ -172,6 +177,19 @@ struct cgroup {
struct cgroup *parent;  /* my parent */
struct dentry *dentry;  /* cgroup fs entry, RCU protected */
 
+   /*
+* This is a copy of dentry->d_name, and it's needed because
+* we can't use dentry->d_name in cgroup_path().
+*
+* You must acquire rcu_read_lock() to access cgrp->name, and
+* the only place that can change it is rename(), which is
+* protected by parent dir's i_mutex.
+*
+* Normally you should use cgroup_name() wrapper rather than
+* access it directly.
+*/
+   struct cgroup_name __rcu *name;
+
/* Private pointers for each registered subsystem */
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
 
@@ -404,6 +422,12 @@ struct cgroup_scanner {
void *data;
 };
 
+/* Caller should hold rcu_read_lock() */
+static inline const char *cgroup_name(const struct cgroup *cgrp)
+{
+   return rcu_dereference(cgrp->name)->name;
+}
+
 int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b5c6432..43ff59e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -238,6 +238,8 @@ static DEFINE_SPINLOCK(hierarchy_id_lock);
 /* dummytop is a shorthand for the dummy hierarchy's top cgroup */
 #define dummytop (&rootnode.top_cgroup)
 
+static struct cgroup_name root_cgroup_name = { .name = "/" };
+
 /* This flag indicates whether tasks in the fork and exit paths should
  * check for fork/exit handlers to call. This avoids us having to do
  * extra work in the fork/exit path if none of the subsystems need to
@@ -860,6 +862,17 @@ static struct inode *cgroup_new_inode(umode_t mode, struct 
super_block *sb)
return inode;
 }
 
+static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
+{
+   struct cgroup_name *name;
+
+   name = kmalloc(sizeof(*name) + dentry->d_name.len + 1, GFP_KERNEL);
+   if (!name)
+   return NULL;
+   strcpy(name->name, dentry->d_name.name);
+   return name;
+}
+
 static void cgroup_free_fn(struct work_struct *work)
 {
struct cgroup *cgrp = container_of(work, struct cgroup, free_work);
@@ -890,6 +903,7 @@ static void cgroup_free_fn(struct work_struct *work)
simple_xattrs_free(&cgrp->xattrs);
 
ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id);
+   kfree(rcu_dereference_raw(cgrp->name));
kfree(cgrp);
 }
 
@@ -1422,6 +1436,7 @@ static void init_cgroup_root(struct cgroupfs_root *root)
INIT_LIST_HEAD(&root->allcg_list);
root->number_of_cgroups = 1;
cgrp->root = root;
+   cgrp->name = &root_cgroup_name;
cgrp->top_cgroup = cgrp;
init_cgroup_housekeeping(cgrp);
list_add_tail(&cgrp->allcg_node, &root->allcg_list);
@@ -1771,49 +1786,45 @@ static struct kobject *cgroup_kobj;
  * @buf: the buffer to write the path into
  * @buflen: the length of the buffer
  *
- * Cal