When I compile a multi-threaded application with '-pg' option, I always get no
results in gmon.out. With bad luck, my application gets SIGSEGV while running.
Because the buffer that holds number of caller/callee count is the only one
in the process and will be broken by multi-threaded access.

I get the idea to solve this problem from NetBSD. NetBSD has individual buffers
for each thread and merges them at the end of profiling.

NetBSD stores the reference to the individual buffer by pthread_setspecific(3).
I think it causes infinite recursive call if whole libc library (except
mcount.c) is compiled with -pg.

The compiler generates '_mcount' function call at the beginning of every
functions. If '_mcount' calls pthread_getspecific(3) to get the individual
buffer, pthread_getspecific() calls '_mcount' again and causes infinite
recursion.

NetBSD prevents from infinite recursive call by checking a global variable. But
this approach also prevents simultaneously call of '_mcount' on a multi-threaded
application. It makes a little bit wrong results of profiling.

So I added a pointer to the buffer in `struct pthread` that can be accessible
via macro call as same as pthread_self(3). This approach can prevent of
infinite recursive call of '_mcount'.

I obtained merging function from NetBSD that is called in '_mcleanup' function.
Merging function needs to walk through all the individual buffers,
I added SLIST_ENTRY member in 'struct gmonparam' to make a list of the buffers.
And also added '#ifndef _KERNEL' for the SLIST_ENTRY member not to be used for
the kernel.

But I still use pthread_getspecific(3) for that can call destructor when
a thread is terminated. The individual buffer is moved to free list to reuse
for a new thread.

Here is a patch for this approach.

diff --git a/lib/libc/gmon/gmon.c b/lib/libc/gmon/gmon.c
index 1ce0a1c289e..6887f4f5987 100644
--- a/lib/libc/gmon/gmon.c
+++ b/lib/libc/gmon/gmon.c
@@ -42,6 +42,16 @@
 
 struct gmonparam _gmonparam = { GMON_PROF_OFF };
 
+#ifndef _KERNEL
+#include <pthread.h>
+
+SLIST_HEAD(, gmonparam) _gmonfree = SLIST_HEAD_INITIALIZER(_gmonfree);
+SLIST_HEAD(, gmonparam) _gmoninuse = SLIST_HEAD_INITIALIZER(_gmoninuse);
+pthread_mutex_t _gmonlock = PTHREAD_MUTEX_INITIALIZER;
+pthread_key_t _gmonkey;
+struct gmonparam _gmondummy;
+#endif
+
 static int     s_scale;
 /* see profil(2) where this is describe (incorrectly) */
 #define                SCALE_1_TO_1    0x10000L
@@ -52,6 +62,13 @@ PROTO_NORMAL(moncontrol);
 PROTO_DEPRECATED(monstartup);
 static int hertz(void);
 
+#ifndef _KERNEL
+static void _gmon_destructor(void *);
+struct gmonparam *_gmon_alloc(void);
+static void _gmon_merge(void);
+static void _gmon_merge_two(struct gmonparam *, struct gmonparam *);
+#endif
+
 void
 monstartup(u_long lowpc, u_long highpc)
 {
@@ -114,6 +131,11 @@ monstartup(u_long lowpc, u_long highpc)
        } else
                s_scale = SCALE_1_TO_1;
 
+#ifndef _KERNEL
+       _gmondummy.state = GMON_PROF_BUSY;
+       pthread_key_create(&_gmonkey, _gmon_destructor);
+#endif
+
        moncontrol(1);
        return;
 
@@ -134,6 +156,194 @@ mapfailed:
 }
 __strong_alias(_monstartup,monstartup);
 
+#ifndef _KERNEL
+static void
+_gmon_destructor(void *arg)
+{
+       struct gmonparam *p = arg, *q, **prev;
+
+       if (p == &_gmondummy)
+               return;
+
+       pthread_setspecific(_gmonkey, &_gmondummy);
+
+       pthread_mutex_lock(&_gmonlock);
+       SLIST_REMOVE(&_gmoninuse, p, gmonparam, next);
+       SLIST_INSERT_HEAD(&_gmonfree, p, next);
+       pthread_mutex_unlock(&_gmonlock);
+
+       pthread_setspecific(_gmonkey, NULL);
+}
+
+struct gmonparam *
+_gmon_alloc(void)
+{
+       void *addr;
+       struct gmonparam *p;
+
+       pthread_mutex_lock(&_gmonlock);
+       p = SLIST_FIRST(&_gmonfree);
+       if (p != NULL) {
+               SLIST_REMOVE_HEAD(&_gmonfree, next);
+               SLIST_INSERT_HEAD(&_gmoninuse, p ,next);
+       } else {
+               pthread_mutex_unlock(&_gmonlock);
+               p = mmap(NULL, sizeof (struct gmonparam),
+                        PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+               if (p == MAP_FAILED)
+                       goto mapfailed_2;
+               *p = _gmonparam;
+               p->kcount = NULL;
+               p->kcountsize = 0;
+               p->froms = NULL;
+               p->tos = NULL;
+               addr = mmap(NULL, p->fromssize, PROT_READ|PROT_WRITE,
+                           MAP_ANON|MAP_PRIVATE, -1, 0);
+               if (addr == MAP_FAILED)
+                       goto mapfailed;
+               p->froms = addr;
+
+               addr = mmap(NULL, p->tossize, PROT_READ|PROT_WRITE,
+                           MAP_ANON|MAP_PRIVATE, -1, 0);
+               if (addr == MAP_FAILED)
+                       goto mapfailed;
+               p->tos = addr;
+               pthread_mutex_lock(&_gmonlock);
+               SLIST_INSERT_HEAD(&_gmoninuse, p ,next);
+       }
+       pthread_mutex_unlock(&_gmonlock);
+       pthread_setspecific(_gmonkey, p);
+
+       return p;
+
+mapfailed:
+       if (p->froms != NULL) {
+               munmap(p->froms, p->fromssize);
+               p->froms = NULL;
+       }
+       if (p->tos != NULL) {
+               munmap(p->tos, p->tossize);
+               p->tos = NULL;
+       }
+mapfailed_2:
+       pthread_setspecific(_gmonkey, NULL);
+       ERR("_gmon_alloc: out of memory\n");
+       return NULL;
+}
+
+static void
+_gmon_merge_two(struct gmonparam *p, struct gmonparam *q)
+{
+       u_long fromindex;
+       u_short *frompcindex, qtoindex, toindex;
+       u_long selfpc;
+       u_long endfrom;
+       long count;
+       struct tostruct *top;
+
+       endfrom = (q->fromssize / sizeof(*q->froms));
+       for (fromindex = 0; fromindex < endfrom; fromindex++) {
+               if (q->froms[fromindex] == 0)
+                       continue;
+               for (qtoindex = q->froms[fromindex]; qtoindex != 0;
+                    qtoindex = q->tos[qtoindex].link) {
+                       selfpc = q->tos[qtoindex].selfpc;
+                       count = q->tos[qtoindex].count;
+                       /* cribbed from mcount */
+                       frompcindex = &p->froms[fromindex];
+                       toindex = *frompcindex;
+                       if (toindex == 0) {
+                               /*
+                                *      first time traversing this arc
+                                */
+                               toindex = ++p->tos[0].link;
+                               if (toindex >= p->tolimit)
+                                       /* halt further profiling */
+                                       goto overflow;
+
+                               *frompcindex = (u_short)toindex;
+                               top = &p->tos[(size_t)toindex];
+                               top->selfpc = selfpc;
+                               top->count = count;
+                               top->link = 0;
+                               goto done;
+                       }
+                       top = &p->tos[(size_t)toindex];
+                       if (top->selfpc == selfpc) {
+                               /*
+                                * arc at front of chain; usual case.
+                                */
+                               top->count+= count;
+                               goto done;
+                       }
+                       /*
+                        * have to go looking down chain for it.
+                        * top points to what we are looking at,
+                        * we know it is not at the head of the chain.
+                        */
+                       for (; /* goto done */; ) {
+                               if (top->link == 0) {
+                                       /*
+                                        * top is end of the chain and
+                                        * none of the chain had
+                                        * top->selfpc == selfpc.  so
+                                        * we allocate a new tostruct
+                                        * and link it to the head of
+                                        * the chain.
+                                        */
+                                       toindex = ++p->tos[0].link;
+                                       if (toindex >= p->tolimit)
+                                               goto overflow;
+                                       top = &p->tos[(size_t)toindex];
+                                       top->selfpc = selfpc;
+                                       top->count = count;
+                                       top->link = *frompcindex;
+                                       *frompcindex = (u_short)toindex;
+                                       goto done;
+                               }
+                               /*
+                                * otherwise, check the next arc on the chain.
+                                */
+                               top = &p->tos[top->link];
+                               if (top->selfpc == selfpc) {
+                                       /*
+                                        * there it is.
+                                        * add to its count.
+                                        */
+                                       top->count += count;
+                                       goto done;
+                               }
+
+                       }
+
+               done: ;
+               }
+
+       }
+overflow: ;
+
+}
+
+static void
+_gmon_merge(void)
+{
+       struct gmonparam *q;
+
+       pthread_mutex_lock(&_gmonlock);
+
+       SLIST_FOREACH(q, &_gmonfree, next)
+               _gmon_merge_two(&_gmonparam, q);
+
+       SLIST_FOREACH(q, &_gmoninuse, next) {
+               q->state = GMON_PROF_OFF;
+               _gmon_merge_two(&_gmonparam, q);
+       }
+
+       pthread_mutex_unlock(&_gmonlock);
+}
+#endif
+
+
 void
 _mcleanup(void)
 {
@@ -233,6 +443,9 @@ _mcleanup(void)
        snprintf(dbuf, sizeof dbuf, "[mcleanup1] kcount 0x%x ssiz %d\n",
            p->kcount, p->kcountsize);
        write(log, dbuf, strlen(dbuf));
+#endif
+#ifndef _KERNEL
+       _gmon_merge();
 #endif
        hdr = (struct gmonhdr *)&gmonhdr;
        bzero(hdr, sizeof(*hdr));
diff --git a/lib/libc/gmon/mcount.c b/lib/libc/gmon/mcount.c
index f0ce70dd6ae..2394e539337 100644
--- a/lib/libc/gmon/mcount.c
+++ b/lib/libc/gmon/mcount.c
@@ -31,6 +31,15 @@
 #include <sys/types.h>
 #include <sys/gmon.h>
 
+#ifndef _KERNEL
+#include <stdio.h>             // for the use of '__isthreaded'.
+#include <pthread.h>
+#include <thread_private.h>
+#include <tib.h>
+extern struct gmonparam _gmondummy;
+struct gmonparam *_gmon_alloc(void);
+#endif
+
 /*
  * mcount is called on entry to each function compiled with the profiling
  * switch set.  _mcount(), which is declared in a machine-dependent way
@@ -65,7 +74,22 @@ _MCOUNT_DECL(u_long frompc, u_long selfpc)
        if ((p = curcpu()->ci_gmon) == NULL)
                return;
 #else
-       p = &_gmonparam;
+       if (__isthreaded) {
+               if (_gmonparam.state != GMON_PROF_ON)
+                       return;
+               pthread_t t = TIB_GET()->tib_thread;
+               p = t->gmonparam;
+               if (p == &_gmondummy)
+                       return;
+               if (p == NULL) {
+                       // prevent recursive call of _gmon_alloc().
+                       t->gmonparam = &_gmondummy;
+                       if ((t->gmonparam = _gmon_alloc()) == NULL)
+                               return;
+                       p = t->gmonparam;
+               }
+       } else
+               p = &_gmonparam;
 #endif
        /*
         * check that we are profiling
diff --git a/lib/libc/include/thread_private.h 
b/lib/libc/include/thread_private.h
index 237c3fbd034..d6c89c8334b 100644
--- a/lib/libc/include/thread_private.h
+++ b/lib/libc/include/thread_private.h
@@ -5,6 +5,8 @@
 #ifndef _THREAD_PRIVATE_H_
 #define _THREAD_PRIVATE_H_
 
+#include <sys/types.h>
+#include <sys/gmon.h>
 #include <stdio.h>             /* for FILE and __isthreaded */
 
 #define _MALLOC_MUTEXES 32
@@ -389,6 +391,7 @@ struct pthread {
 
        /* cancel received in a delayed cancel block? */
        int delayed_cancel;
+       struct gmonparam *gmonparam;
 };
 /* flags in pthread->flags */
 #define        THREAD_DONE             0x001
diff --git a/sys/sys/gmon.h b/sys/sys/gmon.h
index 1a63cb52b1b..1e9ab4054bd 100644
--- a/sys/sys/gmon.h
+++ b/sys/sys/gmon.h
@@ -35,6 +35,7 @@
 #ifndef _SYS_GMON_H_
 #define _SYS_GMON_H_
 
+#include <sys/queue.h>
 #include <machine/profile.h>
 
 /*
@@ -136,6 +137,9 @@ struct gmonparam {
        u_long          highpc;
        u_long          textsize;
        u_long          hashfraction;
+#ifndef _KERNEL
+       SLIST_ENTRY(gmonparam)  next;
+#endif
 };
 
 /*

-- 
Yuichiro NAITO ([email protected])

Reply via email to