Hello,

I analyzed Pth performance for application that performs lots of context
switch. I found huge amount of time spent in just 2 functions :
sigismember() and sigdelset(). This functions are called from
pth_sched_eventmanager in loop for each thread and each signal.
pth_sched_eventmanager itself is called for each context switch. This
provides O(n**2) calls for functions mentioned, where n is number of
threads.

Functions mentioned do signal mask operations. Actually they do
set/clean/test one bit in bit vector that represents signal mask.
Besides this, for each bit to be changed/queried we have function call
and check for parameter correctness.

I did fix (attached) to inline bit operations directly into the code. I
took code from glibc 2.1.3 and slightly modified it. I leave signal set
operations as macros, not inline functions and I removed extra
operations for sigismember(). Originally sigismember() was implemented
as 'return sig[word] & mask ? 1:0'; I changed it to 'return sig[word] &
mask' only.
I removed all references to sidaddset(), sigdelset() and sigismember(),
replacing it by macros. Macros itself resides in 'fastsig.h' file. All
actual calls renamed from xxx() to fast_xxx() to make sure macros, not
functions, are used.

Fix is against 1.3.5. It should cause no problem to adopt it for other
releases.

This fix improves performance for my test by factor 3 for 70 threads on
HP 10.20.
I tested this fix on 2 platforms: HP 10.20 and Linux 2.2.14. I did also
test to make sure my macros do exactly the same as corresponded
functions do.

I will be happy if you incorporate fix I attached into future releases.

Regards,
Vladimir Kondratiev.
diff -BbdruN pth-1.3.5/fastsig.h pth-1.3.5-new/fastsig.h
--- pth-1.3.5/fastsig.h Thu Jan  1 02:00:00 1970
+++ pth-1.3.5-new/fastsig.h     Sun Sep 10 08:42:54 2000
@@ -0,0 +1,25 @@
+#ifndef __FASTSIG_H
+#define __FASTSIG_H
+
+/**
+ * Vladimir Kondratiev
+ * patch for inlined signal mask functions
+ */
+
+/* Return a mask that includes the bit for SIG only.  */
+# define __sigmask(sig) \
+  (((unsigned long int) 1) << (((sig) - 1) % (8 * sizeof (unsigned long int))))
+
+  /* Return the word index for SIG.  */
+# define __sigword(sig) (((sig) - 1) / (8 * sizeof (unsigned long int)))
+
+#define fast_sigaddset(ss,i) \
+((unsigned long int*)ss)[__sigword(i)] |= __sigmask(i)
+
+#define fast_sigdelset(ss,i) \
+((unsigned long int*)ss)[__sigword(i)] &= ~(__sigmask(i))
+
+#define fast_sigismember(ss,i) \
+(((unsigned long int*)ss)[__sigword(i)] & __sigmask(i))
+
+#endif /* __FASTSIG_H */
diff -BbdruN pth-1.3.5/pth_high.c pth-1.3.5-new/pth_high.c
--- pth-1.3.5/pth_high.c        Fri Mar 10 12:58:32 2000
+++ pth-1.3.5-new/pth_high.c    Sun Sep 10 08:48:21 2000
@@ -34,6 +34,7 @@
  */
 
 #include "pth_p.h"
+#include "fastsig.h"
 
 /* Pth variant of usleep(3) */
 int pth_usleep(unsigned int usec)
@@ -111,7 +112,7 @@
     if (sigpending(&pending) < 0)
         sigemptyset(&pending);
     for (sig = 1; sig < PTH_NSIG; sig++) {
-        if (sigismember(set, sig) && sigismember(&pending, sig)) {
+        if (fast_sigismember(set, sig) && fast_sigismember(&pending, sig)) {
             pth_util_sigdelete(sig);
             *sigp = sig;
             return 0;
diff -BbdruN pth-1.3.5/pth_lib.c pth-1.3.5-new/pth_lib.c
--- pth-1.3.5/pth_lib.c Fri Mar 10 12:58:32 2000
+++ pth-1.3.5-new/pth_lib.c     Sun Sep 10 08:42:54 2000
@@ -28,6 +28,7 @@
                                   time I was too famous.''
                                             -- Unknown                */
 #include "pth_p.h"
+#include "fastsig.h"
 
 /* return the hexadecimal Pth library version number */
 long pth_version(void)
@@ -306,8 +307,8 @@
             return FALSE;
         if (sa.sa_handler == SIG_IGN)
             return TRUE; /* fine, nothing to do, sig is globally ignored */
-        if (!sigismember(&t->sigpending, sig)) {
-            sigaddset(&t->sigpending, sig);
+        if (!fast_sigismember(&t->sigpending, sig)) {
+            fast_sigaddset(&t->sigpending, sig);
             t->sigpendcnt++;
         }
         pth_yield(t);
diff -BbdruN pth-1.3.5/pth_mctx.c pth-1.3.5-new/pth_mctx.c
--- pth-1.3.5/pth_mctx.c        Thu Mar 30 21:04:39 2000
+++ pth-1.3.5-new/pth_mctx.c    Sun Sep 10 08:42:54 2000
@@ -26,6 +26,7 @@
                                   ANSI C, it isn't worth doing.'' 
                                                 -- Unknown        */
 #include "pth_p.h"
+#include "fastsig.h"
 
 #if cpp
 
@@ -252,7 +253,7 @@
      * later transfer control onto the signal stack.
      */
     sigemptyset(&sigs);
-    sigaddset(&sigs, SIGUSR1);
+    fast_sigaddset(&sigs, SIGUSR1);
     pth_sc(sigprocmask)(SIG_BLOCK, &sigs, &osigs);
     sa.sa_handler = pth_mctx_set_trampoline;
     sigemptyset(&sa.sa_mask);
@@ -299,7 +300,7 @@
     mctx_called = FALSE;
     kill(getpid(), SIGUSR1);
     sigfillset(&sigs);
-    sigdelset(&sigs, SIGUSR1);
+    fast_sigdelset(&sigs, SIGUSR1);
     while (!mctx_called)
         sigsuspend(&sigs);
 
diff -BbdruN pth-1.3.5/pth_sched.c pth-1.3.5-new/pth_sched.c
--- pth-1.3.5/pth_sched.c       Fri Mar 10 12:58:32 2000
+++ pth-1.3.5-new/pth_sched.c   Sun Sep 10 08:48:21 2000
@@ -26,6 +26,7 @@
                                   see Recursive.''
                                      -- Unknown   */
 #include "pth_p.h"
+#include "fastsig.h"
 
 intern pth_t        pth_main;       /* the main thread                       */
 intern pth_t        pth_sched;      /* the permanent scheduler thread        */
@@ -219,8 +220,8 @@
         if (pth_current->sigpendcnt > 0) {
             sigpending(&pth_sigpending);
             for (sig = 1; sig < PTH_NSIG; sig++)
-                if (sigismember(&pth_current->sigpending, sig))
-                    if (!sigismember(&pth_sigpending, sig))
+                if (fast_sigismember(&pth_current->sigpending, sig))
+                    if (!fast_sigismember(&pth_sigpending, sig))
                         kill(getpid(), sig);
         }
 
@@ -272,13 +273,13 @@
             sigset_t sigstillpending;
             sigpending(&sigstillpending);
             for (sig = 1; sig < PTH_NSIG; sig++) {
-                if (sigismember(&pth_current->sigpending, sig)) {
-                    if (!sigismember(&sigstillpending, sig)) {
+                if (fast_sigismember(&pth_current->sigpending, sig)) {
+                    if (!fast_sigismember(&sigstillpending, sig)) {
                         /* thread (and perhaps also process) signal delivered */
-                        sigdelset(&pth_current->sigpending, sig);
+                        fast_sigdelset(&pth_current->sigpending, sig);
                         pth_current->sigpendcnt--;
                     }
-                    else if (!sigismember(&pth_sigpending, sig)) {
+                    else if (!fast_sigismember(&pth_sigpending, sig)) {
                         /* thread signal not delivered */
                         pth_util_sigdelete(sig);
                     }
@@ -303,7 +304,7 @@
                                 (unsigned long)pth_current, pth_current->name);
                         kill(getpid(), SIGSEGV);
                         sigfillset(&ss);
-                        sigdelset(&ss, SIGSEGV);
+                        fast_sigdelset(&ss, SIGSEGV);
                         sigsuspend(&ss);
                         abort();
                     }
@@ -427,8 +428,8 @@
 
         /* determine signals we block */
         for (sig = 1; sig < PTH_NSIG; sig++)
-            if (!sigismember(&(t->mctx.sigs), sig))
-                sigdelset(&pth_sigblock, sig);
+            if (!fast_sigismember(&(t->mctx.sigs), sig))
+                fast_sigdelset(&pth_sigblock, sig);
 
         /* cancellation support */
         if (t->cancelreq == TRUE)
@@ -470,25 +471,25 @@
                 /* Signal Set */
                 else if (ev->ev_type == PTH_EVENT_SIGS) {
                     for (sig = 1; sig < PTH_NSIG; sig++) {
-                        if (sigismember(ev->ev_args.SIGS.sigs, sig)) {
+                        if (fast_sigismember(ev->ev_args.SIGS.sigs, sig)) {
                             /* thread signal handling */
-                            if (sigismember(&t->sigpending, sig)) {
+                            if (fast_sigismember(&t->sigpending, sig)) {
                                 *(ev->ev_args.SIGS.sig) = sig;
-                                sigdelset(&t->sigpending, sig);
+                                fast_sigdelset(&t->sigpending, sig);
                                 t->sigpendcnt--;
                                 this_occurred = TRUE;
                             }
                             /* process signal handling */
-                            if (sigismember(&pth_sigpending, sig)) {
+                            if (fast_sigismember(&pth_sigpending, sig)) {
                                 if (ev->ev_args.SIGS.sig != NULL)
                                     *(ev->ev_args.SIGS.sig) = sig;
                                 pth_util_sigdelete(sig);
-                                sigdelset(&pth_sigpending, sig);
+                                fast_sigdelset(&pth_sigpending, sig);
                                 this_occurred = TRUE;
                             }
                             else {
-                                sigdelset(&pth_sigblock, sig);
-                                sigaddset(&pth_sigcatch, sig);
+                                fast_sigdelset(&pth_sigblock, sig);
+                                fast_sigaddset(&pth_sigcatch, sig);
                             }
                         }
                     }
@@ -595,7 +596,7 @@
 
     /* replace signal actions for signals we've to catch for events */
     for (sig = 1; sig < PTH_NSIG; sig++) {
-        if (sigismember(&pth_sigcatch, sig)) {
+        if (fast_sigismember(&pth_sigcatch, sig)) {
             sa.sa_handler = pth_sched_eventmanager_sighandler;
             sigfillset(&sa.sa_mask);
             sa.sa_flags = 0;
@@ -618,7 +619,7 @@
     /* restore signal mask and actions and handle signals */
     pth_sc(sigprocmask)(SIG_SETMASK, &oss, NULL);
     for (sig = 1; sig < PTH_NSIG; sig++)
-        if (sigismember(&pth_sigcatch, sig))
+        if (fast_sigismember(&pth_sigcatch, sig))
             sigaction(sig, &osa[sig], NULL);
 
     /* if the timer elapsed, handle it */
@@ -701,13 +702,13 @@
                     /* Signal Set */
                     else if (ev->ev_type == PTH_EVENT_SIGS) {
                         for (sig = 1; sig < PTH_NSIG; sig++) {
-                            if (sigismember(ev->ev_args.SIGS.sigs, sig)) {
-                                if (sigismember(&pth_sigraised, sig)) {
+                            if (fast_sigismember(ev->ev_args.SIGS.sigs, sig)) {
+                                if (fast_sigismember(&pth_sigraised, sig)) {
                                     if (ev->ev_args.SIGS.sig != NULL)
                                         *(ev->ev_args.SIGS.sig) = sig;
                                     pth_debug2("pth_sched_eventmanager: "
                                                "[signal] event occurred for thread 
\"%s\"", t->name);
-                                    sigdelset(&pth_sigraised, sig);
+                                    fast_sigdelset(&pth_sigraised, sig);
                                     ev->ev_occurred = TRUE;
                                 }
                             }
@@ -775,7 +776,7 @@
     char c;
 
     /* remember raised signal */
-    sigaddset(&pth_sigraised, sig);
+    fast_sigaddset(&pth_sigraised, sig);
 
     /* write signal to signal pipe in order to awake the select() */
     c = (int)sig;
diff -BbdruN pth-1.3.5/pth_util.c pth-1.3.5-new/pth_util.c
--- pth-1.3.5/pth_util.c        Thu Dec 30 23:59:01 1999
+++ pth-1.3.5-new/pth_util.c    Sun Sep 10 08:42:54 2000
@@ -26,6 +26,7 @@
                                   the root of all evil.''
                                              -- D.E.Knuth */
 #include "pth_p.h"
+#include "fastsig.h"
 
 /* calculate numerical mimimum */
 #if cpp
@@ -46,12 +47,12 @@
 
     /* check status of signal */
     sigpending(&ss);
-    if (!sigismember(&ss, sig))
+    if (!fast_sigismember(&ss, sig))
         return FALSE;
 
     /* block signal and remember old mask */
     sigemptyset(&ss);
-    sigaddset(&ss, sig);
+    fast_sigaddset(&ss, sig);
     pth_sc(sigprocmask)(SIG_BLOCK, &ss, &oss);
 
     /* set signal action to our dummy handler */
@@ -65,7 +66,7 @@
 
     /* now let signal be delivered */
     sigfillset(&ss);
-    sigdelset(&ss, sig);
+    fast_sigdelset(&ss, sig);
     sigsuspend(&ss);
 
     /* restore signal mask and handler */

Reply via email to