Ok, my first patch was pretty badly broken... try two.  

Between the smp race on this test and set flag:

if (!KDB_STATE(PRINTF_LOCK)) {
        KDB_STATE_SET(PRINTF_LOCK);
        spin_lock(&kdb_printf_lock);
}

... and the fact that in the inner calls of a recursive kdb_printf this test
is trues:

if (KDB_STATE(PRINTF_LOCK)) {
        spin_unlock(&kdb_printf_lock);
        KDB_STATE_CLEAR(PRINTF_LOCK);
}

... the possibility exists to get the PRINTF_LOCK state and the spin_lock
state out of sync, eventually leading to a deadlock.

What I have done is intoduce a recursion count to ensure that only the outer
call is interacting with the smp serialization, with data locking to prevent
racing.  This replaces the functionality of the KDB_STATE_PRINTF_LOCK.




--- /h/rsanders/trees/2.0_kdb4/ftl_future/linux/include/linux/kdb.h     Tue
Jun 10 08:49:44 2003
+++ include/linux/kdb.h Tue Jun 10 11:01:10 2003
@@ -138,7 +138,7 @@
 #define KDB_STATE_SUPPRESS     0x00000200      /* Suppress error messages
*/
 #define KDB_STATE_LONGJMP      0x00000400      /* longjmp() data is
available */
 #define KDB_STATE_GO_SWITCH    0x00000800      /* go is switching back to
initial cpu */
-#define KDB_STATE_PRINTF_LOCK  0x00001000      /* Holds kdb_printf lock */
+/*                             0x00001000         Unused */
 #define KDB_STATE_WAIT_IPI     0x00002000      /* Waiting for kdb_ipi() NMI
*/
 #define KDB_STATE_RECURSE      0x00004000      /* Recursive entry to kdb */
 #define KDB_STATE_IP_ADJUSTED  0x00008000      /* Restart IP has been
adjusted */
--- /h/rsanders/trees/2.0_kdb4/ftl_future/linux/kdb/kdb_io.c    Tue Jun 10
08:49:44 2003
+++ kdb/kdb_io.c        Tue Jun 10 11:04:33 2003
@@ -487,16 +487,20 @@
        int logging, saved_loglevel = 0;
        int do_longjmp = 0;
        struct console *c = console_drivers;
-       static spinlock_t kdb_printf_lock = SPIN_LOCK_UNLOCKED;
+       static spinlock_t data_lock = SPIN_LOCK_UNLOCKED;
+       static spinlock_t serialize_lock = SPIN_LOCK_UNLOCKED;
+       static int recursion_count[NR_CPUS] = { 0 };
 
        /* Serialize kdb_printf if multiple cpus try to write at once.
         * But if any cpu goes recursive in kdb, just print the output,
         * even if it is interleaved with any other text.
         */
-       if (!KDB_STATE(PRINTF_LOCK)) {
-               KDB_STATE_SET(PRINTF_LOCK);
-               spin_lock(&kdb_printf_lock);
-       }
+       spin_lock(&data_lock);
+       /* attempt to grab the lock only on the root call of this recurse */
+       if ( !recursion_count[smp_processor_id()] )
+               spin_lock(&serialize_lock);
+       ++recursion_count[smp_processor_id()];
+       spin_unlock(&data_lock);
 
        diag = kdbgetintenv("LINES", &linecount);
        if (diag || linecount <= 1)
@@ -591,10 +595,17 @@
        if (logging) {
                console_loglevel = saved_loglevel;
        }
-       if (KDB_STATE(PRINTF_LOCK)) {
-               spin_unlock(&kdb_printf_lock);
-               KDB_STATE_CLEAR(PRINTF_LOCK);
-       }
+
+       spin_lock(&data_lock);
+       --recursion_count[smp_processor_id()];
+       /*  
+        *  only clear the serialization lock if this is the root call 
+        *   and not one of the nested calls
+        */
+       if ( !recursion_count[smp_processor_id()] ) 
+               spin_unlock(&serialize_lock);
+       spin_unlock(&data_lock);
+
        if (do_longjmp)
 #ifdef KDB_HAVE_LONGJMP
                kdba_longjmp(&kdbjmpbuf[smp_processor_id()], 1)


Reply via email to