Fix for oopsable race due to uncounted references in RCFS magic
files. This change:

1) adds a reference count to the core class for every reference stored
in an rcfs_inode_info structure, and drops the reference when the
inode is destroyed. This ensures that the core class persists for as
long as the inode is reachable.

2) removes the "delayed" field in ckrm_core_class and associated debug
printing, since this warning is guaranteed to be triggered by the new
refcounting behaviour.

3) adds a "dead" field to ckrm_core_class which is set to true when a
task class is marked for destruction; attempts to add a task to the
task class marked as "dead" are redirected to the parent class
instead.

4) explicitly associates the grab/drop of the refcount on a parent
class with the setting/clearing of the hnode.parent field in the child
class.

Signed-off-by: Paul Menage <menage@google.com>
-------------

 fs/rcfs/dir.c           |    2 +-
 fs/rcfs/magic.c         |    1 +
 fs/rcfs/super.c         |    1 +
 include/linux/ckrm_rc.h |    3 +--
 kernel/ckrm/ckrm.c      |   26 ++++++--------------------
 kernel/ckrm/ckrm_tc.c   |   15 +++++++++++++++
 6 files changed, 25 insertions(+), 23 deletions(-)


diff -X /home/menage/dontdiff -uprN ../kernel9/2.6/fs/rcfs/dir.c 2.6/fs/rcfs/dir.c
--- ../kernel9/2.6/fs/rcfs/dir.c	2005-09-09 21:11:33.000000000 -0700
+++ 2.6/fs/rcfs/dir.c	2005-09-12 08:30:28.000000000 -0700
@@ -85,6 +85,7 @@ int rcfs_create_coredir(struct inode *di
 			 dentry->d_name.name);
 		ridir->core = (*(ripar->core->classtype->alloc))
 		    (ripar->core, ridir->name);
+		ckrm_core_grab(ridir->core);
 	} else {
 		printk(KERN_ERR "rcfs_mkdir: Invalid parent core %p\n",
 		       ripar->core);
@@ -140,7 +141,6 @@ int rcfs_rmdir(struct inode *dir, struct
 		printk(KERN_ERR "rcfs_rmdir: ckrm_free_core_class failed\n");
 		goto out;
 	}
-	ri->core = NULL;	/* just to be safe */
 
 	/* Clear magic files only after core successfully removed */
 	rcfs_clear_magic(dentry);
diff -X /home/menage/dontdiff -uprN ../kernel9/2.6/fs/rcfs/magic.c 2.6/fs/rcfs/magic.c
--- ../kernel9/2.6/fs/rcfs/magic.c	2005-09-09 21:11:33.000000000 -0700
+++ 2.6/fs/rcfs/magic.c	2005-09-12 08:30:17.000000000 -0700
@@ -505,6 +505,7 @@ int rcfs_create_magic(struct dentry *par
 		}
 		rcfs_get_inode_info(mfdentry->d_inode)->core =
 			 rcfs_get_inode_info(parent->d_inode)->core;
+		ckrm_core_grab(rcfs_get_inode_info(mfdentry->d_inode)->core);
 		rcfs_get_inode_info(mfdentry->d_inode)->mfdentry = mfdentry;
 		mfdentry->d_fsdata = &RCFS_IS_MAGIC;
 		if (magf[i].i_fop)
diff -X /home/menage/dontdiff -uprN ../kernel9/2.6/fs/rcfs/super.c 2.6/fs/rcfs/super.c
--- ../kernel9/2.6/fs/rcfs/super.c	2005-09-09 21:10:45.000000000 -0700
+++ 2.6/fs/rcfs/super.c	2005-09-12 08:31:06.000000000 -0700
@@ -55,6 +55,7 @@ static void rcfs_destroy_inode(struct in
 	struct rcfs_inode_info *ri = rcfs_get_inode_info(inode);
 
 	kfree(ri->name);
+	if (ri->core) ckrm_core_drop(ri->core);
 	kmem_cache_free(rcfs_inode_cachep, ri);
 }
 
diff -X /home/menage/dontdiff -uprN ../kernel9/2.6/include/linux/ckrm_rc.h 2.6/include/linux/ckrm_rc.h
--- ../kernel9/2.6/include/linux/ckrm_rc.h	2005-09-09 21:10:45.000000000 -0700
+++ 2.6/include/linux/ckrm_rc.h	2005-09-12 09:46:22.000000000 -0700
@@ -194,8 +194,7 @@ struct ckrm_core_class {
 	rwlock_t hnode_rwlock;			/* protects hnode above. */
 	atomic_t refcnt;
 	const char *name;
-	int delayed;				/* core deletion delayed  */
-						/* because of race conditions */
+	int dead;				/* class is being destroyed */
 };
 
 /* type coerce between derived class types and ckrm core class type */
diff -X /home/menage/dontdiff -uprN ../kernel9/2.6/kernel/ckrm/ckrm.c 2.6/kernel/ckrm/ckrm.c
--- ../kernel9/2.6/kernel/ckrm/ckrm.c	2005-09-09 21:10:45.000000000 -0700
+++ 2.6/kernel/ckrm/ckrm.c	2005-09-12 10:19:42.000000000 -0700
@@ -247,6 +247,7 @@ ckrm_add_child(struct ckrm_core_class *p
 			pnode = &parent->hnode;
 			write_lock(&parent->hnode_rwlock);
 			list_add(&cnode->siblings, &pnode->children);
+			ckrm_core_grab(parent);
 			write_unlock(&parent->hnode_rwlock);
 		}
 	}
@@ -286,6 +287,7 @@ static int ckrm_remove_child(struct ckrm
 	write_lock(&parent->hnode_rwlock);
 	list_del(&cnode->siblings);
 	write_unlock(&parent->hnode_rwlock);
+	ckrm_core_drop(cnode->parent);
 	cnode->parent = NULL;
 	class_unlock(child);
 	return 1;
@@ -416,7 +418,7 @@ ckrm_init_core_class(struct ckrm_classty
 	dcore->name = name;
 	dcore->class_lock = SPIN_LOCK_UNLOCKED;
 	dcore->hnode_rwlock = RW_LOCK_UNLOCKED;
-	dcore->delayed = 0;
+	dcore->dead = 0;
 
 	atomic_set(&dcore->refcnt, 0);
 	write_lock(&ckrm_class_lock);
@@ -433,10 +435,6 @@ ckrm_init_core_class(struct ckrm_classty
 	for (i = 0; i < clstype->max_resid; i++)
 		ckrm_alloc_res_class(dcore, parent, i);
 
-	/* fix for race condition seen in stress with numtasks */
-	if (parent)
-		ckrm_core_grab(parent);
-
 	ckrm_core_grab(dcore);
 	return 0;
 }
@@ -479,17 +477,15 @@ void ckrm_free_core_class(struct ckrm_co
 
 	pr_debug("core=%p:%s parent=%p:%s\n", core, core->name, parent,
 		  parent->name);
-	if (core->delayed) {
-		/* this core was marked as late */
-		printk("class <%s> finally deleted %lu\n", core->name, jiffies);
-	}
 	if (ckrm_remove_child(core) == 0) {
-		printk("Core class removal failed. Chilren present\n");
+		printk(KERN_ERR "Core class removal failed. Children present\n");
 	}
 	for (i = 0; i < clstype->max_resid; i++) {
 		ckrm_free_res_class(core, i);
 	}
 
+	if (!core->dead) printk(KERN_ERR "Freeing non-dead class %p\n", core);
+
 	write_lock(&ckrm_class_lock);
 	/* Clear the magic, so we would know if this core is reused. */
 	core->magic = 0;
@@ -502,10 +498,6 @@ void ckrm_free_core_class(struct ckrm_co
 	set_callbacks_active(clstype);
 	write_unlock(&ckrm_class_lock);
 
-	/* fix for race condition seen in stress with numtasks */
-	if (parent)
-		ckrm_core_drop(parent);
-
 	kfree(core);
 }
 
@@ -517,12 +509,6 @@ int ckrm_release_core_class(struct ckrm_
 	if (core == core->classtype->default_class)
 		return 0;
 
-	/* need to make sure that the classgot really dropped */
-	if (atomic_read(&core->refcnt) != 1) {
-		pr_debug("class <%s> deletion delayed refcnt=%d jif=%ld\n",
-			  core->name, atomic_read(&core->refcnt), jiffies);
-		core->delayed = 1;	/* just so we have a ref point */
-	}
 	ckrm_core_drop(core);
 	return 0;
 }
diff -X /home/menage/dontdiff -uprN ../kernel9/2.6/kernel/ckrm/ckrm_tc.c 2.6/kernel/ckrm/ckrm_tc.c
--- ../kernel9/2.6/kernel/ckrm/ckrm_tc.c	2005-09-09 21:10:45.000000000 -0700
+++ 2.6/kernel/ckrm/ckrm_tc.c	2005-09-12 09:54:16.000000000 -0700
@@ -174,6 +174,17 @@ ckrm_set_taskclass(struct task_struct *t
 	}
 	/* put into new class */
 	class_lock(class_core(newcls));
+	while (class_core(newcls)->dead) {
+		/* If this class is dead, we need to move up the hierarchy */
+		struct ckrm_core_class *parent;
+		parent = class_core(newcls)->hnode.parent;
+		ckrm_core_grab(parent);
+		class_unlock(class_core(newcls));
+                printk(KERN_INFO "Redirecting task: %s -> %s\n", class_core(newcls)->name, parent->name);
+		ckrm_core_drop(class_core(newcls));
+		class_lock(parent);
+		newcls = class_type(struct ckrm_task_class, parent);
+	}
 	tsk->taskclass = newcls;
 	list_add(&tsk->taskclass_link, &class_core(newcls)->objlist);
 	class_unlock(class_core(newcls));
@@ -608,6 +619,10 @@ static int ckrm_free_task_class(struct c
 
 	pr_debug("%p:%s:%d\n", core, core->name, atomic_read(&core->refcnt));
 
+	class_lock(core);
+	core->dead = 1;
+	class_unlock(core);
+
 	taskcls = class_type(struct ckrm_task_class, core);
 
 	ce_protect(&ct_taskclass);
