panic __mp_lock_held(sched_lock)

2013-08-11 Thread Alexander Bluhm
Hi,

On my ThinkPat T430s I am trying to debug multithreaded qemu by
attaching gdb.  This crashes the kernel of the host system within
a few minutes.  Luckily I managed to attach a serial over lan with
Intel AMT.

login: panic: kernel diagnostic assertion __mp_lock_held(sched_lock) == 0 
failed: file ../../../../kern/kern_lock.c, line 126
Stopped at  Debugger+0x5:   leave
RUN AT LEAST 'trace' AND 'ps' AND INCLUDE OUTPUT WHEN REPORTING THIS PANIC!
IF RUNNING SMP, USE 'mach ddbcpu #' AND 'trace' ON OTHER PROCESSORS, TOO.
DO NOT EVEN BOTHER REPORTING THIS WITHOUT INCLUDING THAT INFORMATION!
ddb{0} trace
Debugger() at Debugger+0x5
panic() at panic+0xee
__assert() at __assert+0x21
_kernel_lock_init() at _kernel_lock_init
issignal() at issignal+0x205
sleep_setup_signal() at sleep_setup_signal+0x94
tsleep() at tsleep+0x86
sys_sigsuspend() at sys_sigsuspend+0x46
syscall() at syscall+0x249
--- syscall (number 111) ---
end of kernel
end trace frame: 0x7fe55fdbef0, count: -9
0x7fe50c4cdcc:

   PID   PPID   PGRPUID  S   FLAGS  WAIT  COMMAND
 28380   5201   5983   1000  3   0x4100080  thrsleep  qemu-system-x86_
* 5825   5201   5983   1000  7   0xc100088  pause qemu-system-x86_
 18891   5201   5983   1000  3   0xc100080  sigwait   qemu-system-x86_
  5983   5201   5983   1000  3   0x880  thrsleep  qemu-system-x86_
 19446  22621  19446   1000  30x80  poll  gdb
  5201  12983   5201   1000  30x80  wait  gdb

The kernel lock is acquired in mi_syscall() as sys_sigsuspend() needs
it.  tsleep() calls sleep_setup() which acquires the sched lock.
Then sleep_setup_signal() calls issignal() via the macro CURSIG().
The function issignal() is full of side effects, especially for a
traced process.

There the kernel lock is acquired again, which should be fine as
it is a recursive lock.  But to avoid deadlocks, _kernel_lock()
asserts that is is acquired before sched lock.  This check is too
strict, the condition is only true when the lock is taken the first
time.

Index: kern/kern_lock.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_lock.c,v
retrieving revision 1.42
diff -u -p -u -p -r1.42 kern_lock.c
--- kern/kern_lock.c6 May 2013 16:37:55 -   1.42
+++ kern/kern_lock.c11 Aug 2013 01:54:06 -
@@ -123,7 +123,10 @@ _kernel_lock_init(void)
 void
 _kernel_lock(void)
 {
-   SCHED_ASSERT_UNLOCKED();
+#ifdef DIAGNOSTIC
+   if (__mp_lock_held(kernel_lock) == 0)
+   SCHED_ASSERT_UNLOCKED();
+#endif /* DIAGNOSTIC */
__mp_lock(kernel_lock);
 }

Unfortunately this fix does not solve my problem.  With that I get
another panic: wakeup: p_stat is 7

login: panic: wakeup: p_stat is 7
Stopped at  Debugger+0x5:   leave
RUN AT LEAST 'trace' AND 'ps' AND INCLUDE OUTPUT WHEN REPORTING THIS PANIC!
IF RUNNING SMP, USE 'mach ddbcpu #' AND 'trace' ON OTHER PROCESSORS, TOO.
DO NOT EVEN BOTHER REPORTING THIS WITHOUT INCLUDING THAT INFORMATION!
ddb{3} trace
Debugger() at Debugger+0x5
panic() at panic+0xee
wakeup_n() at wakeup_n+0xfd
sys___thrwakeup() at sys___thrwakeup+0x54
syscall() at syscall+0x249
--- syscall (number 301) ---
end of kernel
end trace frame: 0x684cb9237c0, count: -5
0x684bf834c2a:

   PID   PPID   PGRPUID  S   FLAGS  WAIT  COMMAND
 10959  11922  10959   1000  30x80  wait  gdb
*11287  10959  10043   1000  7   0xc10qemu-system-x86_
 11131  10959  10043   1000  3   0xc100080  sigwait   qemu-system-x86_
 10043  10959  10043   1000  7   0x800qemu-system-x86_

I will investigate further.

bluhm



pcmcia/if_ray.c: dead code

2013-08-11 Thread Maxime Villard
Hi,
in src/sys/dev/pcmcia/if_ray.c, at l.1018:

- - - -

case SIOCG80211NWID:
RAY_DPRINTF((%s: ioctl: cmd SIOCG80211NWID\n, ifp-if_xname));
error = copyout(sc-sc_cnwid, ifr-ifr_data,
sizeof(sc-sc_cnwid));
break; --
#ifdef RAY_DO_SIGLEV
error = copyout(sc-sc_siglevs, ifr-ifr_data,
sizeof sc-sc_siglevs);
break;
#endif
default:
[...]

- - - -

The code in the 'ifdef' is never reached, because of the 'break'. Found
by my scanner.



ddb show proc flags

2013-08-11 Thread Alexander Bluhm
Hi,

The ps flags are split between thread and process.  It would be
useful if ddb shows both.  I or them together in the ps overview
and list them explicitly for the specific thread.

ddb show all procs
   PID   PPID   PGRPUID  S   FLAGS  WAIT  COMMAND
 18514  1  18514  0  3 0x40180  selectsendmail

ddb show proc 0xd317a460
PROC (sendmail) pid=18514 stat=sleep
flags process=40100SUGID,SUGIDEXEC proc=80SINTR

I also synced the ddb bit names with the define names.

ok?

bluhm

Index: kern/kern_proc.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_proc.c,v
retrieving revision 1.51
diff -u -p -u -p -r1.51 kern_proc.c
--- kern/kern_proc.c8 Aug 2013 23:25:06 -   1.51
+++ kern/kern_proc.c11 Aug 2013 03:29:35 -
@@ -395,8 +395,9 @@ proc_printit(struct proc *p, const char 
else
pst = pstat[(int)p-p_stat - 1];
 
-   (*pr)(PROC (%s) pid=%d stat=%s flags=%b\n,
-   p-p_comm, p-p_pid, pst, p-p_flag, P_BITS);
+   (*pr)(PROC (%s) pid=%d stat=%s\n, p-p_comm, p-p_pid, pst);
+   (*pr)(flags process=%b proc=%b\n,
+   p-p_p-ps_flags, P_BITS, p-p_flag, P_BITS);
(*pr)(pri=%u, usrpri=%u, nice=%d\n,
p-p_priority, p-p_usrpri, p-p_p-ps_nice);
(*pr)(forw=%p, list=%p,%p\n,
@@ -474,7 +475,8 @@ db_show_all_procs(db_expr_t addr, int ha
%-12.12s  %-16s\n,
ppr ? ppr-ps_pid : -1,
pr-ps_pgrp ? pr-ps_pgrp-pg_id : -1,
-   pr-ps_cred-p_ruid, p-p_stat, p-p_flag,
+   pr-ps_cred-p_ruid, p-p_stat,
+   p-p_flag | p-p_p-ps_flags,
(p-p_wchan  p-p_wmesg) ?
p-p_wmesg : , p-p_comm);
break;
Index: sys/proc.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/proc.h,v
retrieving revision 1.168
diff -u -p -u -p -r1.168 proc.h
--- sys/proc.h  6 Jun 2013 13:09:37 -   1.168
+++ sys/proc.h  11 Aug 2013 02:51:13 -
@@ -412,12 +412,14 @@ struct proc {
 #endif
 
 #defineP_BITS \
-(\20\02CONTROLT\03INMEM\04SIGPAUSE\05PPWAIT\06PROFIL\07SELECT \
- \010SINTR\011SUGID\012SYSTEM\013TIMEOUT\014TRACED\015WAITED\016WEXIT \
- \017EXEC\020PWEUPC\021ISPWAIT\022COREDUMPING\023SUGIDEXEC\024SUSPSINGLE 
\
+(\20\01INKTR\02CONTROLT\03INMEM\04SIGSUSPEND \
+ \05PPWAIT\06PROFIL\07SELECT\010SINTR \
+ \011SUGID\012SYSTEM\013TIMEOUT\014TRACED \
+ \015WAITED\016WEXIT\017EXEC\020OWEUPC \
+ \021ISPWAIT\022COREDUMP\023SUGIDEXEC\024SUSPSINGLE \
  \025NOZOMBIE\026INEXEC\027SYSTRACE\030CONTINUED \
- \031SINGLEEXIT\032SINGLEUNWIND \
- \033THREAD\034SUSPSIG\035SOFTDEP\036STOPPED\037CPUPEG)
+ \031SINGLEEXIT\032SINGLEUNWIND\033THREAD\034SUSPSIG \
+ \035SOFTDEP\036STOPPED\037CPUPEG\040EXITING)
 
 /* Macro to compute the exit signal to be delivered. */
 #define P_EXITSIG(p) \



carp ifdef inet6

2013-08-11 Thread Alexander Bluhm
Hi,

In the common IPv4 and IPv6 file ip_carp.c an #ifdef INET6 is
missing.  Especially the IN6_IS_ADDR_LINKLOCAL() macro should not
be necessary in IPv4 only code.

ok?

bluhm

Index: netinet/ip_carp.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.209
diff -u -p -u -p -r1.209 ip_carp.c
--- netinet/ip_carp.c   20 Jun 2013 12:03:40 -  1.209
+++ netinet/ip_carp.c   11 Aug 2013 18:59:49 -
@@ -1969,9 +1969,11 @@ carp_addr_updated(void *v)
TAILQ_FOREACH(ifa, sc-sc_if.if_addrlist, ifa_list) {
if (ifa-ifa_addr-sa_family == AF_INET)
new_naddrs++;
+#ifdef INET6
else if (ifa-ifa_addr-sa_family == AF_INET6 
!IN6_IS_ADDR_LINKLOCAL(ifatoia6(ifa)-ia_addr.sin6_addr))
new_naddrs6++;
+#endif /* INET6 */
}
 
/* We received address changes from if_addrhooks callback */



Re: panic __mp_lock_held(sched_lock)

2013-08-11 Thread Mark Kettenis
 Date: Sun, 11 Aug 2013 11:46:29 +0200
 From: Alexander Bluhm alexander.bl...@gmx.net
 
 Hi,
 
 On my ThinkPat T430s I am trying to debug multithreaded qemu by
 attaching gdb.  This crashes the kernel of the host system within
 a few minutes.  Luckily I managed to attach a serial over lan with
 Intel AMT.
 
 login: panic: kernel diagnostic assertion __mp_lock_held(sched_lock) == 0 
 failed: file ../../../../kern/kern_lock.c, line 126
 Stopped at  Debugger+0x5:   leave
 RUN AT LEAST 'trace' AND 'ps' AND INCLUDE OUTPUT WHEN REPORTING THIS PANIC!
 IF RUNNING SMP, USE 'mach ddbcpu #' AND 'trace' ON OTHER PROCESSORS, TOO.
 DO NOT EVEN BOTHER REPORTING THIS WITHOUT INCLUDING THAT INFORMATION!
 ddb{0} trace
 Debugger() at Debugger+0x5
 panic() at panic+0xee
 __assert() at __assert+0x21
 _kernel_lock_init() at _kernel_lock_init
 issignal() at issignal+0x205
 sleep_setup_signal() at sleep_setup_signal+0x94
 tsleep() at tsleep+0x86
 sys_sigsuspend() at sys_sigsuspend+0x46
 syscall() at syscall+0x249
 --- syscall (number 111) ---
 end of kernel
 end trace frame: 0x7fe55fdbef0, count: -9
 0x7fe50c4cdcc:
 
PID   PPID   PGRPUID  S   FLAGS  WAIT  COMMAND
  28380   5201   5983   1000  3   0x4100080  thrsleep  qemu-system-x86_
 * 5825   5201   5983   1000  7   0xc100088  pause qemu-system-x86_
  18891   5201   5983   1000  3   0xc100080  sigwait   qemu-system-x86_
   5983   5201   5983   1000  3   0x880  thrsleep  qemu-system-x86_
  19446  22621  19446   1000  30x80  poll  gdb
   5201  12983   5201   1000  30x80  wait  gdb
 
 The kernel lock is acquired in mi_syscall() as sys_sigsuspend() needs
 it.  tsleep() calls sleep_setup() which acquires the sched lock.
 Then sleep_setup_signal() calls issignal() via the macro CURSIG().
 The function issignal() is full of side effects, especially for a
 traced process.
 
 There the kernel lock is acquired again, which should be fine as
 it is a recursive lock.  But to avoid deadlocks, _kernel_lock()
 asserts that is is acquired before sched lock.  This check is too
 strict, the condition is only true when the lock is taken the first
 time.

I don't think that check is too strict.  Rather the ptrace(2)
functionality is subtly broken.

 Index: kern/kern_lock.c
 ===
 RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_lock.c,v
 retrieving revision 1.42
 diff -u -p -u -p -r1.42 kern_lock.c
 --- kern/kern_lock.c  6 May 2013 16:37:55 -   1.42
 +++ kern/kern_lock.c  11 Aug 2013 01:54:06 -
 @@ -123,7 +123,10 @@ _kernel_lock_init(void)
  void
  _kernel_lock(void)
  {
 - SCHED_ASSERT_UNLOCKED();
 +#ifdef DIAGNOSTIC
 + if (__mp_lock_held(kernel_lock) == 0)
 + SCHED_ASSERT_UNLOCKED();
 +#endif /* DIAGNOSTIC */
   __mp_lock(kernel_lock);
  }
 
 Unfortunately this fix does not solve my problem.  With that I get
 another panic: wakeup: p_stat is 7
 
 login: panic: wakeup: p_stat is 7
 Stopped at  Debugger+0x5:   leave
 RUN AT LEAST 'trace' AND 'ps' AND INCLUDE OUTPUT WHEN REPORTING THIS PANIC!
 IF RUNNING SMP, USE 'mach ddbcpu #' AND 'trace' ON OTHER PROCESSORS, TOO.
 DO NOT EVEN BOTHER REPORTING THIS WITHOUT INCLUDING THAT INFORMATION!
 ddb{3} trace
 Debugger() at Debugger+0x5
 panic() at panic+0xee
 wakeup_n() at wakeup_n+0xfd
 sys___thrwakeup() at sys___thrwakeup+0x54
 syscall() at syscall+0x249
 --- syscall (number 301) ---
 end of kernel
 end trace frame: 0x684cb9237c0, count: -5
 0x684bf834c2a:
 
PID   PPID   PGRPUID  S   FLAGS  WAIT  COMMAND
  10959  11922  10959   1000  30x80  wait  gdb
 *11287  10959  10043   1000  7   0xc10qemu-system-x86_
  11131  10959  10043   1000  3   0xc100080  sigwait   qemu-system-x86_
  10043  10959  10043   1000  7   0x800qemu-system-x86_
 
 I will investigate further.
 
 bluhm