On Tue, 2001/05/29 at 09:39:42 -0700, John Baldwin wrote:
> 
> On 28-May-01 Doug Barton wrote:
> > I forgot something:
> > 
> > IdlePTD 4734976
> > initial pcb at 3b5f80
> > panicstr: mutex sched lock recursed at /usr/src/sys/kern/kern_synch.c:858
> > panic messages:
> 
> I would need a traceback from here.  It looks like someone called msleep or
> tsleep with sched lock held.

OK, I think I've found the problem, patch attached. set_user_ldt is
called from cpu_switch on i386, where the sched lock is already held
by the process that is just being scheduled away, and curproc has
already been changed, so this isn't treated like a recursed mutex, but
rather like the new process (dead-) locking against the old one.

The solution taken in the attached patch create a
set_user_ldt_nolock. This way, we have a more or less consistent
enviroment (of the new process) there.
The (pcb != PCPU_GET(curpcb)) check is in the outer locking
set_user_ldt wrapper (it seems only to be needed in the smp rendezvous
case and is a "can't happen" when called from cpu_switch).

This works for me; Doug, could you please test it too? I'd be thankful
for any review.

        - thomas
Index: i386/swtch.s
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/swtch.s,v
retrieving revision 1.114
diff -u -r1.114 swtch.s
--- i386/swtch.s        2001/05/20 16:51:08     1.114
+++ i386/swtch.s        2001/05/29 22:09:14
@@ -248,7 +248,7 @@
        movl    %eax,PCPU(CURRENTLDT)
        jmp     2f
 1:     pushl   %edx
-       call    set_user_ldt
+       call    set_user_ldt_nolock
        popl    %edx
 2:
 
Index: i386/sys_machdep.c
===================================================================
RCS file: /home/ncvs/src/sys/i386/i386/sys_machdep.c,v
retrieving revision 1.57
diff -u -r1.57 sys_machdep.c
--- i386/sys_machdep.c  2001/05/15 23:22:20     1.57
+++ i386/sys_machdep.c  2001/05/29 22:24:04
@@ -239,17 +239,16 @@
 
 /*
  * Update the GDT entry pointing to the LDT to point to the LDT of the
- * current process.
+ * current process. Assumes that sched_lock is held. This is needed
+ * in cpu_switch because sched_lock is held by the process that has
+ * just been scheduled away and we would deadlock if we would try to
+ * acquire sched_lock.
  */   
 void
-set_user_ldt(struct pcb *pcb)
+set_user_ldt_nolock(struct pcb *pcb)
 {
        struct pcb_ldt *pcb_ldt;
 
-       if (pcb != PCPU_GET(curpcb))
-               return;
-
-       mtx_lock_spin(&sched_lock);
        pcb_ldt = pcb->pcb_ldt;
 #ifdef SMP
        gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pcb_ldt->ldt_sd;
@@ -258,6 +257,17 @@
 #endif
        lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
        PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
+}
+
+/* Locking wrapper of the above */
+void
+set_user_ldt(struct pcb *pcb)
+{
+       if (pcb != PCPU_GET(curpcb))
+               return;
+
+       mtx_lock_spin(&sched_lock);
+       set_user_ldt_nolock(pcb);
        mtx_unlock_spin(&sched_lock);
 }
 
Index: include/pcb_ext.h
===================================================================
RCS file: /home/ncvs/src/sys/i386/include/pcb_ext.h,v
retrieving revision 1.6
diff -u -r1.6 pcb_ext.h
--- include/pcb_ext.h   2001/05/10 17:03:03     1.6
+++ include/pcb_ext.h   2001/05/29 22:06:37
@@ -55,6 +55,7 @@
 
 int i386_extend_pcb __P((struct proc *));
 void set_user_ldt __P((struct pcb *));
+void set_user_ldt_nolock __P((struct pcb *));
 struct pcb_ldt *user_ldt_alloc __P((struct pcb *, int));
 void user_ldt_free __P((struct pcb *));
 

Reply via email to