On Sun, 6 Oct 2013, Joel Sing wrote:
...
I like the concept of this diff, however it is not sufficient to solve
the multithreaded profiling problem. The reason for this is that you're
still sending SPROCESS signals, which are delivered to the main thread
unless another thread has diverted the signal. This means that all of
the profiling signals end up on the main thread, not the thread that was
consuming CPU time.
This can be fixed by either using ptsignal(p, SIG{PROF,VTALARM},
STHREAD) instead of psignal(), or by including your other diff that
changes the signal delivery within ptsignal - regardless, I think the
previous diff is worth pursuing since it potentially reduces the number
of signal related context switches.
Okay, here's a complete diff that, at least on my box with a regress test
that jsing@ should be committing soon, solves the problem, reduces the
total complexity of the itimer signal delivery, and reduces the context
switches caused by signal by having a thread sending a signal to its own
process take delivery of the signal if it can.
(While checking up on something else, I found that FreeBSD already uses
this take the cannoli^Wsignal logic.)
ok?
Philip
Index: bin/ps/ps.1
===
RCS file: /cvs/src/bin/ps/ps.1,v
retrieving revision 1.84
diff -u -p -r1.84 ps.1
--- bin/ps/ps.1 22 Sep 2013 17:28:34 - 1.84
+++ bin/ps/ps.1 7 Oct 2013 02:08:32 -
@@ -222,6 +222,8 @@ The thread flags (in hexadecimal), as de
.Aq Pa sys/proc.h :
.Bd -literal
P_INKTR 0x1 writing ktrace(2) record
+P_PROFPEND0x2 this thread needs SIGPROF
+P_ALRMPEND0x4 this thread needs SIGVTALRM
P_SIGSUSPEND 0x8 need to restore before-suspend mask
P_SELECT 0x40 selecting; wakeup/waiting danger
P_SINTR 0x80 sleep is interruptible
Index: sys/sys/proc.h
===
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.170
diff -u -p -r1.170 proc.h
--- sys/sys/proc.h 22 Sep 2013 17:28:33 - 1.170
+++ sys/sys/proc.h 7 Oct 2013 02:08:35 -
@@ -209,8 +209,6 @@ struct process {
struct timespec ps_start; /* starting time. */
struct timeout ps_realit_to; /* real-time itimer trampoline. */
- struct timeout ps_virt_to; /* virtual itimer trampoline. */
- struct timeout ps_prof_to; /* prof itimer trampoline. */
int ps_refcnt; /* Number of references. */
};
@@ -362,6 +360,8 @@ struct proc {
* These flags are per-thread and kept in p_flag
*/
#defineP_INKTR 0x01/* In a ktrace op, don't
recurse */
+#defineP_PROFPEND 0x02/* SIGPROF needs to be posted */
+#defineP_ALRMPEND 0x04/* SIGVTALRM needs to be posted
*/
#defineP_SIGSUSPEND0x08/* Need to restore
before-suspend mask*/
#defineP_SELECT0x40/* Selecting; wakeup/waiting
danger. */
#defineP_SINTR 0x80/* Sleep is interruptible. */
@@ -380,7 +380,8 @@ struct proc {
#define P_CPUPEG 0x4000 /* Do not move to another cpu. */
#defineP_BITS \
-(\20\01INKTR\04SIGSUSPEND\07SELECT\010SINTR\012SYSTEM \
+(\20\01INKTR\02PROFPEND\03ALRMPEND\04SIGSUSPEND\07SELECT \
+ \010SINTR\012SYSTEM \
\013TIMEOUT\016WEXIT\020OWEUPC\024SUSPSINGLE \
\025NOZOMBIE\027SYSTRACE\030CONTINUED\033THREAD \
\034SUSPSIG\035SOFTDEP\036STOPPED\037CPUPEG)
Index: sys/sys/resourcevar.h
===
RCS file: /cvs/src/sys/sys/resourcevar.h,v
retrieving revision 1.16
diff -u -p -r1.16 resourcevar.h
--- sys/sys/resourcevar.h 3 Jun 2013 16:55:22 - 1.16
+++ sys/sys/resourcevar.h 7 Oct 2013 02:08:35 -
@@ -68,8 +68,5 @@ struct plimit *limcopy(struct plimit *);
void limfree(struct plimit *);
voidruadd(struct rusage *, struct rusage *);
-
-void virttimer_trampoline(void *);
-void proftimer_trampoline(void *);
#endif
#endif /* !_SYS_RESOURCEVAR_H_ */
Index: sys/kern/kern_clock.c
===
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.82
diff -u -p -r1.82 kern_clock.c
--- sys/kern/kern_clock.c 13 Aug 2013 05:52:23 - 1.82
+++ sys/kern/kern_clock.c 7 Oct 2013 02:08:37 -
@@ -144,40 +144,11 @@ initclocks(void)
/*
* hardclock does the accounting needed for ITIMER_PROF and ITIMER_VIRTUAL.
* We don't want to send signals with psignal from hardclock because it makes
- * MULTIPROCESSOR locking very complicated. Instead we use a small trick
- * to send the signals safely and without blocking too many interrupts
- * while doing that (signal handling can be heavy).
- *
- * hardclock detects that the itimer has expired, and schedules a