Hi all, the list being down and IMO this being important enough, I've cc'd y'all - hope no-one's too offended?
Anyhow, as the subject line intimates, if the symptom of your kernel
panic is semaphore related (ksymoops is your friend), the attached
patch(es) could/should sort you out. Note that sched.c BUG is a symptom
which can be caused by a variety of causes, so it is possible that there
may be more than one issue here. Without more widespread use of
ksymoops...
sys_assign_ip_info() and sys_release_ip_info() can get called in softirq
functions, thus (sometimes?) leading to oopses because you can't use
semaphores in code that runs at IRQ level.
sched-oopsfix.patch replaces the semaphore with a spinlock, which works
and is the correct solution if ip_info continues to need messing with at
IRQ level.
Might not be the optimum solution since spinlocks do spin somewhat :) but
since the code they're protecting is small and fast, they're probably
the way to go? If not there's always ctx17 and a refit.
sched-oopsfix-safer.patch also replaces the semaphore with a spinlock,
but adds some protection against the (remote) possibility of
ip_info->refcount getting out of kilter (munged) in multi-threaded
applications. N.b. Jacques didn't think this was possible, but I've
printk'd refcount and seen it get above 5400 - before sched.c BUG oopsed
on me. I've not seen it skyrocket again, but since it's a timing based
problem it will be inherently intermittent. IIRC it's unlikely to be a
problem with decrementing, so a small memory leak is the most common
effect. In an extreme case refcount could _eventually_ wrap leading to
vfree being called followed by an invalid pointer access...
Comments please...
Jonathan
--
Jonathan Sambrook
Software Developer
Designer Servers
--- linux-2.4.20-quota-ctx/kernel/sys.c 2003-01-30 14:40:56.000000000 +0000
+++ new/kernel/sys.c 2003-03-17 21:52:38.000000000 +0000
@@ -15,6 +15,7 @@
#include <linux/prctl.h>
#include <linux/init.h>
#include <linux/highuid.h>
+#include <linux/spinlock.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -1089,6 +1090,8 @@
}
}
+spinlock_t ip_info_lock = SPIN_LOCK_UNLOCKED;
+
/*
Decrease the reference count on the ip_info struct
Free the struct if the reference count reach 0.
@@ -1096,13 +1099,13 @@
void sys_release_ip_info (struct iproot_info *ip_info)
{
if (ip_info != NULL){
- down_write (&uts_sem);
+ spin_lock(&ip_info_lock);
ip_info->refcount--;
if (ip_info->refcount == 0){
- // printk ("vfree s_info %d\n",p->pid);
+ // printk ("%5d vfree ip_info 0x%08x\n", current->pid, ip_info
);
vfree (ip_info);
}
- up_write (&uts_sem);
+ spin_unlock(&ip_info_lock);
}
}
/*
@@ -1111,9 +1114,9 @@
void sys_assign_ip_info (struct iproot_info *ip_info)
{
if (ip_info != NULL){
- down_write (&uts_sem);
+ spin_lock(&ip_info_lock);
ip_info->refcount++;
- up_write (&uts_sem);
+ spin_unlock(&ip_info_lock);
}
}
--- linux-2.4.20-quota-ctx/kernel/sys.c 2003-01-30 14:40:56.000000000 +0000
+++ new/kernel/sys.c 2003-03-17 21:53:55.000000000 +0000
@@ -15,6 +15,7 @@
#include <linux/prctl.h>
#include <linux/init.h>
#include <linux/highuid.h>
+#include <linux/spinlock.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -1089,32 +1090,34 @@
}
}
+spinlock_t ip_info_lock = SPIN_LOCK_UNLOCKED;
+
/*
Decrease the reference count on the ip_info struct
Free the struct if the reference count reach 0.
*/
void sys_release_ip_info (struct iproot_info *ip_info)
{
+ spin_lock(&ip_info_lock);
if (ip_info != NULL){
- down_write (&uts_sem);
ip_info->refcount--;
if (ip_info->refcount == 0){
- // printk ("vfree s_info %d\n",p->pid);
+ // printk ("%5d vfree ip_info 0x%08x\n", current->pid, ip_info
);
vfree (ip_info);
}
- up_write (&uts_sem);
}
+ spin_unlock(&ip_info_lock);
}
/*
Increase the reference count on the ip_info member of a task
*/
void sys_assign_ip_info (struct iproot_info *ip_info)
{
+ spin_lock(&ip_info_lock);
if (ip_info != NULL){
- down_write (&uts_sem);
ip_info->refcount++;
- up_write (&uts_sem);
}
+ spin_unlock(&ip_info_lock);
}
/*
--- linux-2.4.20-quota-ctx/net/ipv4/tcp_minisocks.c 2003-01-30 14:40:56.000000000
+0000
+++ new/net/ipv4/tcp_minisocks.c 2003-03-17 14:27:06.000000000 +0000
@@ -651,8 +651,8 @@
struct sk_filter *filter;
#endif
+ sys_assign_ip_info (sk->ip_info);
memcpy(newsk, sk, sizeof(*newsk));
- sys_assign_ip_info (newsk->ip_info);
newsk->state = TCP_SYN_RECV;
/* SANITY */
pgp00000.pgp
Description: PGP signature
