On Mon, May 03, 2010 at 08:13:00PM +0000, Alexander Krizhanovsky wrote: > Kostik, > > thank you very much for the review! > > Kostik Belousov wrote: > >On Mon, Apr 19, 2010 at 12:46:48AM +0000, Alexander Krizhanovsky wrote: > > > >>Hi all! > >> > >>This patch implements per-thread rusage statistic (like RUSAGE_THREAD in > >>Linux and RUSAGE_LWP in OpenSolaris). > >> > >>Unfortunately, we have to acquire a number of locks to read/update > >>system and user times for current thread rusage information because it's > >>also used for whole process statistic and needs to be zeroed. > >> > >>Any comments are very appreciated. > >> > >>It's tested against 8.0-RELEASE. Appropriate PR is submitted. > >> > >>-- > >>Alexander Krizhanovsky > >>NatSys Lab. (http://natsys-lab.com) > >>tel: +7 (916) 717-3899, +7 (499) 747-6304 > >>e-mail: [email protected] > >> > >> > >I think this should be somewhat modified before commit. > > > >First, please separate patch into two pieces. One would introduce > >ruxagg_tlock() helper and use it in existing locations instead of > >three-liners. Possibly, add K&R->ANSI C kern_getrusage definition > >change. > > > >Second would actually add RUSAGE_THREAD. There, please follow > >the style(9), in particular, do not initialize the local variables > >at the declaration, instead, use explicit initialization in the code. > > > Done. I have also introduced third patch for casting microseconds to > timeval and replaced a few appropriate places in whole kernel code. > patch-rusage-thread-03052010.txt is incremental for > patch-ruxagg_tlock-03052010.txt, which is by turn incremental for > patch-usec2timeval-03052010.txt. > > I have made following change for calcru1(): > - if ((int64_t)tu < 0) { > - /* XXX: this should be an assert /phk */ > - printf("calcru: negative runtime of %jd usec for pid %d > (%s)\n", > - (intmax_t)tu, p->p_pid, p->p_comm); > - tu = ruxp->rux_tu; > - } > + KASSERT((int64_t)tu < 0, ("calcru: negative runtime of %jd usec " > + "for pid %d (%s)\n", > + (intmax_t)tu, p->p_pid, p->p_comm)); > > tu can become negative at about 300 thousand years of uptime, so this > condition seems quite unrealistic and indeed should be replaced by an > assertion. However I didn't understand why it isn't done so far and the > comment only was added. Did I miss something? > > >Should calctru() share the code with calcru1() ? I am esp. concerned > >with sanity check like negative runtime etc. Possibly, this code > >from calcru1() should be separated into function used from both > >calcru1() and calctru(). > > > >Man page for getrusage(2) should be updated. > > > It's added to patch-rusage-thread-03052010.txt. > > In the end - shoud I raise a new PR (the original one is kern/145813)? > > >Thanks for the submission.
It was some time, I already committed ruxagg_tlock() part, that caused
some feedback, and I reworked the rest of the patch myself. See svn-src@
for some discussion, and the latest version that I intend to commit
shortly is below. Your comments are welcome.
Lets discuss third patch after this is landed.
diff --git a/lib/libc/sys/getrusage.2 b/lib/libc/sys/getrusage.2
index bdf5d45..423503f 100644
--- a/lib/libc/sys/getrusage.2
+++ b/lib/libc/sys/getrusage.2
@@ -28,7 +28,7 @@
.\" @(#)getrusage.2 8.1 (Berkeley) 6/4/93
.\" $FreeBSD$
.\"
-.Dd June 4, 1993
+.Dd May 1, 2010
.Dt GETRUSAGE 2
.Os
.Sh NAME
@@ -42,6 +42,7 @@
.In sys/resource.h
.Fd "#define RUSAGE_SELF 0"
.Fd "#define RUSAGE_CHILDREN -1"
+.Fd "#define RUSAGE_THREAD 1"
.Ft int
.Fn getrusage "int who" "struct rusage *rusage"
.Sh DESCRIPTION
@@ -49,11 +50,12 @@ The
.Fn getrusage
system call
returns information describing the resources utilized by the current
-process, or all its terminated child processes.
+thread, the current process, or all its terminated child processes.
The
.Fa who
argument is either
-.Dv RUSAGE_SELF
+.Dv RUSAGE_THREAD ,
+.Dv RUSAGE_SELF ,
or
.Dv RUSAGE_CHILDREN .
The buffer to which
@@ -175,6 +177,10 @@ The
.Fn getrusage
system call appeared in
.Bx 4.2 .
+The
+.Dv RUSAGE_THREAD
+facility first appeared in
+.Fx 8.1 .
.Sh BUGS
There is no way to obtain information about a child process
that has not yet terminated.
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 49a3097..6c50f1f 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -901,7 +901,7 @@ fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp,
int preferthread)
kp->ki_pri.pri_user = td->td_user_pri;
if (preferthread) {
- kp->ki_runtime = cputick2usec(td->td_runtime);
+ kp->ki_runtime = td->td_rux.rux_runtime;
kp->ki_pctcpu = sched_pctcpu(td);
kp->ki_estcpu = td->td_estcpu;
}
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index a3ed75d..0bc78d0 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -76,7 +76,7 @@ static void calcru1(struct proc *p, struct rusage_ext *ruxp,
struct timeval *up, struct timeval *sp);
static int donice(struct thread *td, struct proc *chgp, int n);
static struct uidinfo *uilookup(uid_t uid);
-static void ruxagg_tlock(struct proc *p, struct thread *td);
+static void ruxagg(struct proc *p, struct thread *td);
/*
* Resource controls and accounting.
@@ -630,7 +630,7 @@ lim_cb(void *arg)
return;
PROC_SLOCK(p);
FOREACH_THREAD_IN_PROC(p, td) {
- ruxagg_tlock(p, td);
+ ruxagg(p, td);
}
PROC_SUNLOCK(p);
if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
@@ -841,7 +841,7 @@ calcru(struct proc *p, struct timeval *up, struct timeval
*sp)
FOREACH_THREAD_IN_PROC(p, td) {
if (td->td_incruntime == 0)
continue;
- ruxagg_tlock(p, td);
+ ruxagg(p, td);
}
calcru1(p, &p->p_rux, up, sp);
}
@@ -961,6 +961,16 @@ kern_getrusage(struct thread *td, int who, struct rusage
*rup)
calccru(p, &rup->ru_utime, &rup->ru_stime);
break;
+ case RUSAGE_THREAD:
+ PROC_SLOCK(p);
+ ruxagg(p, td);
+ PROC_SUNLOCK(p);
+ thread_lock(td);
+ *rup = td->td_ru;
+ calcru1(p, &td->td_rux, &rup->ru_utime, &rup->ru_stime);
+ thread_unlock(td);
+ break;
+
default:
error = EINVAL;
}
@@ -1001,7 +1011,7 @@ ruadd(struct rusage *ru, struct rusage_ext *rux, struct
rusage *ru2,
* Aggregate tick counts into the proc's rusage_ext.
*/
void
-ruxagg(struct rusage_ext *rux, struct thread *td)
+ruxagg_locked(struct rusage_ext *rux, struct thread *td)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
@@ -1010,18 +1020,19 @@ ruxagg(struct rusage_ext *rux, struct thread *td)
rux->rux_uticks += td->td_uticks;
rux->rux_sticks += td->td_sticks;
rux->rux_iticks += td->td_iticks;
- td->td_incruntime = 0;
- td->td_uticks = 0;
- td->td_iticks = 0;
- td->td_sticks = 0;
}
static void
-ruxagg_tlock(struct proc *p, struct thread *td)
+ruxagg(struct proc *p, struct thread *td)
{
thread_lock(td);
- ruxagg(&p->p_rux, td);
+ ruxagg_locked(&p->p_rux, td);
+ ruxagg_locked(&td->td_rux, td);
+ td->td_incruntime = 0;
+ td->td_uticks = 0;
+ td->td_iticks = 0;
+ td->td_sticks = 0;
thread_unlock(td);
}
@@ -1039,7 +1050,7 @@ rufetch(struct proc *p, struct rusage *ru)
*ru = p->p_ru;
if (p->p_numthreads > 0) {
FOREACH_THREAD_IN_PROC(p, td) {
- ruxagg_tlock(p, td);
+ ruxagg(p, td);
rucollect(ru, &td->td_ru);
}
}
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 9be4c2f..b32c584 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -432,7 +432,7 @@ thread_exit(void)
PROC_UNLOCK(p);
thread_lock(td);
/* Save our tick information with both the thread and proc locked */
- ruxagg(&p->p_rux, td);
+ ruxagg_locked(&p->p_rux, td);
PROC_SUNLOCK(p);
td->td_state = TDS_INACTIVE;
#ifdef WITNESS
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index fb31cfc..e32e494 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -173,6 +173,27 @@ struct kdtrace_thread;
struct cpuset;
/*
+ * XXX: Does this belong in resource.h or resourcevar.h instead?
+ * Resource usage extension. The times in rusage structs in the kernel are
+ * never up to date. The actual times are kept as runtimes and tick counts
+ * (with control info in the "previous" times), and are converted when
+ * userland asks for rusage info. Backwards compatibility prevents putting
+ * this directly in the user-visible rusage struct.
+ *
+ * Locking for p_rux: (cj) means (j) for p_rux and (c) for p_crux.
+ * Locking for td_rux: (t) for all fields.
+ */
+struct rusage_ext {
+ u_int64_t rux_runtime; /* (cj) Real time. */
+ u_int64_t rux_uticks; /* (cj) Statclock hits in user mode. */
+ u_int64_t rux_sticks; /* (cj) Statclock hits in sys mode. */
+ u_int64_t rux_iticks; /* (cj) Statclock hits in intr mode. */
+ u_int64_t rux_uu; /* (c) Previous user time in usec. */
+ u_int64_t rux_su; /* (c) Previous sys time in usec. */
+ u_int64_t rux_tu; /* (c) Previous total time in usec. */
+};
+
+/*
* Kernel runnable context (thread).
* This is what is put to sleep and reactivated.
* Thread context. Processes may have multiple threads.
@@ -219,7 +240,8 @@ struct thread {
u_int td_estcpu; /* (t) estimated cpu utilization */
int td_slptick; /* (t) Time at sleep. */
int td_blktick; /* (t) Time spent blocked. */
- struct rusage td_ru; /* (t) rusage information */
+ struct rusage td_ru; /* (t) rusage information. */
+ struct rusage_ext td_rux; /* (t) Internal rusage information. */
uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */
uint64_t td_runtime; /* (t) How many cpu ticks we've run. */
u_int td_pticks; /* (t) Statclock hits for profiling */
@@ -426,26 +448,6 @@ do {
\
#define TD_SET_CAN_RUN(td) (td)->td_state = TDS_CAN_RUN
/*
- * XXX: Does this belong in resource.h or resourcevar.h instead?
- * Resource usage extension. The times in rusage structs in the kernel are
- * never up to date. The actual times are kept as runtimes and tick counts
- * (with control info in the "previous" times), and are converted when
- * userland asks for rusage info. Backwards compatibility prevents putting
- * this directly in the user-visible rusage struct.
- *
- * Locking: (cj) means (j) for p_rux and (c) for p_crux.
- */
-struct rusage_ext {
- u_int64_t rux_runtime; /* (cj) Real time. */
- u_int64_t rux_uticks; /* (cj) Statclock hits in user mode. */
- u_int64_t rux_sticks; /* (cj) Statclock hits in sys mode. */
- u_int64_t rux_iticks; /* (cj) Statclock hits in intr mode. */
- u_int64_t rux_uu; /* (c) Previous user time in usec. */
- u_int64_t rux_su; /* (c) Previous sys time in usec. */
- u_int64_t rux_tu; /* (c) Previous total time in usec. */
-};
-
-/*
* Process structure.
*/
struct proc {
diff --git a/sys/sys/resource.h b/sys/sys/resource.h
index 9af96af..e703744 100644
--- a/sys/sys/resource.h
+++ b/sys/sys/resource.h
@@ -56,6 +56,7 @@
#define RUSAGE_SELF 0
#define RUSAGE_CHILDREN -1
+#define RUSAGE_THREAD 1
struct rusage {
struct timeval ru_utime; /* user time used */
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
index 21728aa..95a9b49 100644
--- a/sys/sys/resourcevar.h
+++ b/sys/sys/resourcevar.h
@@ -131,7 +131,7 @@ void rucollect(struct rusage *ru, struct rusage
*ru2);
void rufetch(struct proc *p, struct rusage *ru);
void rufetchcalc(struct proc *p, struct rusage *ru, struct timeval *up,
struct timeval *sp);
-void ruxagg(struct rusage_ext *rux, struct thread *td);
+void ruxagg_locked(struct rusage_ext *rux, struct thread *td);
int suswintr(void *base, int word);
struct uidinfo
*uifind(uid_t uid);
pgpekMuFaAz08.pgp
Description: PGP signature

