Hi all, the list being down and IMO this being important enough, I've
cc'd y'all - hope no-one's too offended?

Anyhow, as the subject line intimates, if the symptom of your kernel
panic is semaphore related (ksymoops is your friend), the attached
patch(es) could/should sort you out. Note that sched.c BUG is a symptom
which can be caused by a variety of causes, so it is possible that there
may be more than one issue here. Without more widespread use of
ksymoops...


sys_assign_ip_info() and sys_release_ip_info() can get called in softirq
functions, thus (sometimes?) leading to oopses because you can't use
semaphores in code that runs at IRQ level.

sched-oopsfix.patch replaces the semaphore with a spinlock, which works
and is the correct solution if ip_info continues to need messing with at
IRQ level.

Might not be the optimum solution since spinlocks do spin somewhat :) but
since the code they're protecting is small and fast, they're probably
the way to go? If not there's always ctx17 and a refit.

sched-oopsfix-safer.patch also replaces the semaphore with a spinlock,
but adds some protection against the (remote) possibility of
ip_info->refcount getting out of kilter (munged) in multi-threaded
applications. N.b.  Jacques didn't think this was possible, but I've
printk'd refcount and seen it get above 5400 - before sched.c BUG oopsed
on me. I've not seen it skyrocket again, but since it's a timing based
problem it will be inherently intermittent. IIRC it's unlikely to be a
problem with decrementing, so a small memory leak is the most common
effect. In an extreme case refcount could _eventually_ wrap leading to
vfree being called followed by an invalid pointer access... 

Comments please...
Jonathan
-- 
                   
 Jonathan Sambrook 
Software  Developer 
 Designer  Servers
--- linux-2.4.20-quota-ctx/kernel/sys.c 2003-01-30 14:40:56.000000000 +0000
+++ new/kernel/sys.c    2003-03-17 21:52:38.000000000 +0000
@@ -15,6 +15,7 @@
 #include <linux/prctl.h>
 #include <linux/init.h>
 #include <linux/highuid.h>
+#include <linux/spinlock.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -1089,6 +1090,8 @@
        }
 }
 
+spinlock_t ip_info_lock = SPIN_LOCK_UNLOCKED;
+
 /*
        Decrease the reference count on the ip_info struct
        Free the struct if the reference count reach 0.
@@ -1096,13 +1099,13 @@
 void sys_release_ip_info (struct iproot_info *ip_info)
 {
        if (ip_info != NULL){
-               down_write (&uts_sem);
+               spin_lock(&ip_info_lock);
                ip_info->refcount--;
                if (ip_info->refcount == 0){
-                       // printk ("vfree s_info %d\n",p->pid);
+                       // printk ("%5d vfree ip_info 0x%08x\n", current->pid, ip_info 
);
                        vfree (ip_info);
                }
-               up_write (&uts_sem);
+               spin_unlock(&ip_info_lock);
        }
 }
 /*
@@ -1111,9 +1114,9 @@
 void sys_assign_ip_info (struct iproot_info *ip_info)
 {
        if (ip_info != NULL){
-               down_write (&uts_sem);
+               spin_lock(&ip_info_lock);
                ip_info->refcount++;
-               up_write (&uts_sem);
+               spin_unlock(&ip_info_lock);
        }
 }
 
--- linux-2.4.20-quota-ctx/kernel/sys.c 2003-01-30 14:40:56.000000000 +0000
+++ new/kernel/sys.c    2003-03-17 21:53:55.000000000 +0000
@@ -15,6 +15,7 @@
 #include <linux/prctl.h>
 #include <linux/init.h>
 #include <linux/highuid.h>
+#include <linux/spinlock.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -1089,32 +1090,34 @@
        }
 }
 
+spinlock_t ip_info_lock = SPIN_LOCK_UNLOCKED;
+
 /*
        Decrease the reference count on the ip_info struct
        Free the struct if the reference count reach 0.
 */
 void sys_release_ip_info (struct iproot_info *ip_info)
 {
+       spin_lock(&ip_info_lock);
        if (ip_info != NULL){
-               down_write (&uts_sem);
                ip_info->refcount--;
                if (ip_info->refcount == 0){
-                       // printk ("vfree s_info %d\n",p->pid);
+                       // printk ("%5d vfree ip_info 0x%08x\n", current->pid, ip_info 
);
                        vfree (ip_info);
                }
-               up_write (&uts_sem);
        }
+       spin_unlock(&ip_info_lock);
 }
 /*
        Increase the reference count on the ip_info member of a task
 */
 void sys_assign_ip_info (struct iproot_info *ip_info)
 {
+       spin_lock(&ip_info_lock);
        if (ip_info != NULL){
-               down_write (&uts_sem);
                ip_info->refcount++;
-               up_write (&uts_sem);
        }
+       spin_unlock(&ip_info_lock);
 }
 
 /*
--- linux-2.4.20-quota-ctx/net/ipv4/tcp_minisocks.c     2003-01-30 14:40:56.000000000 
+0000
+++ new/net/ipv4/tcp_minisocks.c        2003-03-17 14:27:06.000000000 +0000
@@ -651,8 +651,8 @@
                struct sk_filter *filter;
 #endif
 
+               sys_assign_ip_info (sk->ip_info);
                memcpy(newsk, sk, sizeof(*newsk));
-               sys_assign_ip_info (newsk->ip_info);
                newsk->state = TCP_SYN_RECV;
 
                /* SANITY */

Attachment: pgp00000.pgp
Description: PGP signature

Reply via email to