I updated my patch. I see `struct gmonparam` must be same between the kernel and
kgmon(1). I deleted #ifndef in the `struct gmonparam`.
Is it OK?
diff --git a/lib/libc/gmon/gmon.c b/lib/libc/gmon/gmon.c
index 1ce0a1c289e..6887f4f5987 100644
--- a/lib/libc/gmon/gmon.c
+++ b/lib/libc/gmon/gmon.c
@@ -42,6 +42,16 @@
struct gmonparam _gmonparam = { GMON_PROF_OFF };
+#ifndef _KERNEL
+#include <pthread.h>
+
+SLIST_HEAD(, gmonparam) _gmonfree = SLIST_HEAD_INITIALIZER(_gmonfree);
+SLIST_HEAD(, gmonparam) _gmoninuse = SLIST_HEAD_INITIALIZER(_gmoninuse);
+pthread_mutex_t _gmonlock = PTHREAD_MUTEX_INITIALIZER;
+pthread_key_t _gmonkey;
+struct gmonparam _gmondummy;
+#endif
+
static int s_scale;
/* see profil(2) where this is describe (incorrectly) */
#define SCALE_1_TO_1 0x10000L
@@ -52,6 +62,13 @@ PROTO_NORMAL(moncontrol);
PROTO_DEPRECATED(monstartup);
static int hertz(void);
+#ifndef _KERNEL
+static void _gmon_destructor(void *);
+struct gmonparam *_gmon_alloc(void);
+static void _gmon_merge(void);
+static void _gmon_merge_two(struct gmonparam *, struct gmonparam *);
+#endif
+
void
monstartup(u_long lowpc, u_long highpc)
{
@@ -114,6 +131,11 @@ monstartup(u_long lowpc, u_long highpc)
} else
s_scale = SCALE_1_TO_1;
+#ifndef _KERNEL
+ _gmondummy.state = GMON_PROF_BUSY;
+ pthread_key_create(&_gmonkey, _gmon_destructor);
+#endif
+
moncontrol(1);
return;
@@ -134,6 +156,194 @@ mapfailed:
}
__strong_alias(_monstartup,monstartup);
+#ifndef _KERNEL
+static void
+_gmon_destructor(void *arg)
+{
+ struct gmonparam *p = arg, *q, **prev;
+
+ if (p == &_gmondummy)
+ return;
+
+ pthread_setspecific(_gmonkey, &_gmondummy);
+
+ pthread_mutex_lock(&_gmonlock);
+ SLIST_REMOVE(&_gmoninuse, p, gmonparam, next);
+ SLIST_INSERT_HEAD(&_gmonfree, p, next);
+ pthread_mutex_unlock(&_gmonlock);
+
+ pthread_setspecific(_gmonkey, NULL);
+}
+
+struct gmonparam *
+_gmon_alloc(void)
+{
+ void *addr;
+ struct gmonparam *p;
+
+ pthread_mutex_lock(&_gmonlock);
+ p = SLIST_FIRST(&_gmonfree);
+ if (p != NULL) {
+ SLIST_REMOVE_HEAD(&_gmonfree, next);
+ SLIST_INSERT_HEAD(&_gmoninuse, p ,next);
+ } else {
+ pthread_mutex_unlock(&_gmonlock);
+ p = mmap(NULL, sizeof (struct gmonparam),
+ PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+ if (p == MAP_FAILED)
+ goto mapfailed_2;
+ *p = _gmonparam;
+ p->kcount = NULL;
+ p->kcountsize = 0;
+ p->froms = NULL;
+ p->tos = NULL;
+ addr = mmap(NULL, p->fromssize, PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0);
+ if (addr == MAP_FAILED)
+ goto mapfailed;
+ p->froms = addr;
+
+ addr = mmap(NULL, p->tossize, PROT_READ|PROT_WRITE,
+ MAP_ANON|MAP_PRIVATE, -1, 0);
+ if (addr == MAP_FAILED)
+ goto mapfailed;
+ p->tos = addr;
+ pthread_mutex_lock(&_gmonlock);
+ SLIST_INSERT_HEAD(&_gmoninuse, p ,next);
+ }
+ pthread_mutex_unlock(&_gmonlock);
+ pthread_setspecific(_gmonkey, p);
+
+ return p;
+
+mapfailed:
+ if (p->froms != NULL) {
+ munmap(p->froms, p->fromssize);
+ p->froms = NULL;
+ }
+ if (p->tos != NULL) {
+ munmap(p->tos, p->tossize);
+ p->tos = NULL;
+ }
+mapfailed_2:
+ pthread_setspecific(_gmonkey, NULL);
+ ERR("_gmon_alloc: out of memory\n");
+ return NULL;
+}
+
+static void
+_gmon_merge_two(struct gmonparam *p, struct gmonparam *q)
+{
+ u_long fromindex;
+ u_short *frompcindex, qtoindex, toindex;
+ u_long selfpc;
+ u_long endfrom;
+ long count;
+ struct tostruct *top;
+
+ endfrom = (q->fromssize / sizeof(*q->froms));
+ for (fromindex = 0; fromindex < endfrom; fromindex++) {
+ if (q->froms[fromindex] == 0)
+ continue;
+ for (qtoindex = q->froms[fromindex]; qtoindex != 0;
+ qtoindex = q->tos[qtoindex].link) {
+ selfpc = q->tos[qtoindex].selfpc;
+ count = q->tos[qtoindex].count;
+ /* cribbed from mcount */
+ frompcindex = &p->froms[fromindex];
+ toindex = *frompcindex;
+ if (toindex == 0) {
+ /*
+ * first time traversing this arc
+ */
+ toindex = ++p->tos[0].link;
+ if (toindex >= p->tolimit)
+ /* halt further profiling */
+ goto overflow;
+
+ *frompcindex = (u_short)toindex;
+ top = &p->tos[(size_t)toindex];
+ top->selfpc = selfpc;
+ top->count = count;
+ top->link = 0;
+ goto done;
+ }
+ top = &p->tos[(size_t)toindex];
+ if (top->selfpc == selfpc) {
+ /*
+ * arc at front of chain; usual case.
+ */
+ top->count+= count;
+ goto done;
+ }
+ /*
+ * have to go looking down chain for it.
+ * top points to what we are looking at,
+ * we know it is not at the head of the chain.
+ */
+ for (; /* goto done */; ) {
+ if (top->link == 0) {
+ /*
+ * top is end of the chain and
+ * none of the chain had
+ * top->selfpc == selfpc. so
+ * we allocate a new tostruct
+ * and link it to the head of
+ * the chain.
+ */
+ toindex = ++p->tos[0].link;
+ if (toindex >= p->tolimit)
+ goto overflow;
+ top = &p->tos[(size_t)toindex];
+ top->selfpc = selfpc;
+ top->count = count;
+ top->link = *frompcindex;
+ *frompcindex = (u_short)toindex;
+ goto done;
+ }
+ /*
+ * otherwise, check the next arc on the chain.
+ */
+ top = &p->tos[top->link];
+ if (top->selfpc == selfpc) {
+ /*
+ * there it is.
+ * add to its count.
+ */
+ top->count += count;
+ goto done;
+ }
+
+ }
+
+ done: ;
+ }
+
+ }
+overflow: ;
+
+}
+
+static void
+_gmon_merge(void)
+{
+ struct gmonparam *q;
+
+ pthread_mutex_lock(&_gmonlock);
+
+ SLIST_FOREACH(q, &_gmonfree, next)
+ _gmon_merge_two(&_gmonparam, q);
+
+ SLIST_FOREACH(q, &_gmoninuse, next) {
+ q->state = GMON_PROF_OFF;
+ _gmon_merge_two(&_gmonparam, q);
+ }
+
+ pthread_mutex_unlock(&_gmonlock);
+}
+#endif
+
+
void
_mcleanup(void)
{
@@ -233,6 +443,9 @@ _mcleanup(void)
snprintf(dbuf, sizeof dbuf, "[mcleanup1] kcount 0x%x ssiz %d\n",
p->kcount, p->kcountsize);
write(log, dbuf, strlen(dbuf));
+#endif
+#ifndef _KERNEL
+ _gmon_merge();
#endif
hdr = (struct gmonhdr *)&gmonhdr;
bzero(hdr, sizeof(*hdr));
diff --git a/lib/libc/gmon/mcount.c b/lib/libc/gmon/mcount.c
index f0ce70dd6ae..2394e539337 100644
--- a/lib/libc/gmon/mcount.c
+++ b/lib/libc/gmon/mcount.c
@@ -31,6 +31,15 @@
#include <sys/types.h>
#include <sys/gmon.h>
+#ifndef _KERNEL
+#include <stdio.h> // for the use of '__isthreaded'.
+#include <pthread.h>
+#include <thread_private.h>
+#include <tib.h>
+extern struct gmonparam _gmondummy;
+struct gmonparam *_gmon_alloc(void);
+#endif
+
/*
* mcount is called on entry to each function compiled with the profiling
* switch set. _mcount(), which is declared in a machine-dependent way
@@ -65,7 +74,22 @@ _MCOUNT_DECL(u_long frompc, u_long selfpc)
if ((p = curcpu()->ci_gmon) == NULL)
return;
#else
- p = &_gmonparam;
+ if (__isthreaded) {
+ if (_gmonparam.state != GMON_PROF_ON)
+ return;
+ pthread_t t = TIB_GET()->tib_thread;
+ p = t->gmonparam;
+ if (p == &_gmondummy)
+ return;
+ if (p == NULL) {
+ // prevent recursive call of _gmon_alloc().
+ t->gmonparam = &_gmondummy;
+ if ((t->gmonparam = _gmon_alloc()) == NULL)
+ return;
+ p = t->gmonparam;
+ }
+ } else
+ p = &_gmonparam;
#endif
/*
* check that we are profiling
diff --git a/lib/libc/include/thread_private.h
b/lib/libc/include/thread_private.h
index 237c3fbd034..d6c89c8334b 100644
--- a/lib/libc/include/thread_private.h
+++ b/lib/libc/include/thread_private.h
@@ -5,6 +5,8 @@
#ifndef _THREAD_PRIVATE_H_
#define _THREAD_PRIVATE_H_
+#include <sys/types.h>
+#include <sys/gmon.h>
#include <stdio.h> /* for FILE and __isthreaded */
#define _MALLOC_MUTEXES 32
@@ -389,6 +391,7 @@ struct pthread {
/* cancel received in a delayed cancel block? */
int delayed_cancel;
+ struct gmonparam *gmonparam;
};
/* flags in pthread->flags */
#define THREAD_DONE 0x001
diff --git a/sys/sys/gmon.h b/sys/sys/gmon.h
index 1a63cb52b1b..cce98752346 100644
--- a/sys/sys/gmon.h
+++ b/sys/sys/gmon.h
@@ -35,6 +35,7 @@
#ifndef _SYS_GMON_H_
#define _SYS_GMON_H_
+#include <sys/queue.h>
#include <machine/profile.h>
/*
@@ -136,6 +137,7 @@ struct gmonparam {
u_long highpc;
u_long textsize;
u_long hashfraction;
+ SLIST_ENTRY(gmonparam) next;
};
/*
On 2021/06/16 16:34, Yuichiro NAITO wrote:
> When I compile a multi-threaded application with '-pg' option, I always get no
> results in gmon.out. With bad luck, my application gets SIGSEGV while running.
> Because the buffer that holds number of caller/callee count is the only one
> in the process and will be broken by multi-threaded access.
>
> I get the idea to solve this problem from NetBSD. NetBSD has individual
> buffers
> for each thread and merges them at the end of profiling.
>
> NetBSD stores the reference to the individual buffer by
> pthread_setspecific(3).
> I think it causes infinite recursive call if whole libc library (except
> mcount.c) is compiled with -pg.
>
> The compiler generates '_mcount' function call at the beginning of every
> functions. If '_mcount' calls pthread_getspecific(3) to get the individual
> buffer, pthread_getspecific() calls '_mcount' again and causes infinite
> recursion.
>
> NetBSD prevents from infinite recursive call by checking a global variable.
> But
> this approach also prevents simultaneously call of '_mcount' on a
> multi-threaded
> application. It makes a little bit wrong results of profiling.
>
> So I added a pointer to the buffer in `struct pthread` that can be accessible
> via macro call as same as pthread_self(3). This approach can prevent of
> infinite recursive call of '_mcount'.
>
> I obtained merging function from NetBSD that is called in '_mcleanup'
> function.
> Merging function needs to walk through all the individual buffers,
> I added SLIST_ENTRY member in 'struct gmonparam' to make a list of the
> buffers.
> And also added '#ifndef _KERNEL' for the SLIST_ENTRY member not to be used for
> the kernel.
>
> But I still use pthread_getspecific(3) for that can call destructor when
> a thread is terminated. The individual buffer is moved to free list to reuse
> for a new thread.
--
Yuichiro NAITO ([email protected])