Revision: 54887
          http://brlcad.svn.sourceforge.net/brlcad/?rev=54887&view=rev
Author:   brlcad
Date:     2013-03-26 20:18:47 +0000 (Tue, 26 Mar 2013)
Log Message:
-----------
apply skriptkid's (akshay kashyap's) gci patch 
(http://www.google-melange.com/gci/task/view/google/gci2012/8070202) that 
implements cpu thread affinity.  initial testing showed a HUGE impact on 
massively parallel systems with one 64-core ibm power7 system showing a 
benchmark performance improvement around 50% (VGR increase from 120k to 180k).  
currently only implemented for pthread systems, but should be easily extended 
to other platforms and gives bu_parallel() some initial infrastructure so 
bu_parallel threads can know their CPU id.  very cool.

Modified Paths:
--------------
    brlcad/trunk/include/bu.h
    brlcad/trunk/src/libbu/CMakeLists.txt
    brlcad/trunk/src/libbu/parallel.c

Added Paths:
-----------
    brlcad/trunk/src/libbu/affinity.c

Modified: brlcad/trunk/include/bu.h
===================================================================
--- brlcad/trunk/include/bu.h   2013-03-26 19:25:57 UTC (rev 54886)
+++ brlcad/trunk/include/bu.h   2013-03-26 20:18:47 UTC (rev 54887)
@@ -431,6 +431,18 @@
 
 
 /*----------------------------------------------------------------------*/
+
+/** @file affinity.c
+ *
+ * Contains utility to set affinity mask of a thread to the CPU set it
+ * is currently running on.
+ *
+ */
+
+BU_EXPORT extern int bu_set_affinity(void);
+
+/*----------------------------------------------------------------------*/
+
 /** @addtogroup conv */
 /** @ingroup data */
 /** @{*/

Modified: brlcad/trunk/src/libbu/CMakeLists.txt
===================================================================
--- brlcad/trunk/src/libbu/CMakeLists.txt       2013-03-26 19:25:57 UTC (rev 
54886)
+++ brlcad/trunk/src/libbu/CMakeLists.txt       2013-03-26 20:18:47 UTC (rev 
54887)
@@ -11,6 +11,7 @@
 BRLCAD_LIB_INCLUDE_DIRS(bu BU_INCLUDE_DIRS BU_LOCAL_INCLUDE_DIRS)
 
 set(LIBBU_SOURCES
+  affinity.c
   argv.c
   avs.c
   backtrace.c

Added: brlcad/trunk/src/libbu/affinity.c
===================================================================
--- brlcad/trunk/src/libbu/affinity.c                           (rev 0)
+++ brlcad/trunk/src/libbu/affinity.c   2013-03-26 20:18:47 UTC (rev 54887)
@@ -0,0 +1,101 @@
+/*                         A F F I N I T Y . C
+ * BRL-CAD
+ *
+ * Copyright (c) 2004-2012 United States Government as represented by
+ * the U.S. Army Research Laboratory.
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this file; see the file named COPYING for more
+ * information.
+ */
+
+/** @file affinity.c
+ *
+ * Contains utility to set affinity mask of a thread to the CPU set it
+ * is currently running on.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifdef HAVE_PTHREAD_H
+#  include <pthread.h>
+#endif
+
+/*
+ * bu_set_affinity
+ * 
+ * Set affinity mask of current thread to the CPU set it is currently 
+ * running on. If it is not running on any CPUs in the set, it is
+ * migrated to CPU 0 by default.
+ * 
+ * Return: 0 on Suceess
+ *         -1 on Failure
+ * 
+ */
+int
+bu_set_affinity(void)
+{
+#if defined(_GNU_SOURCE) && defined(HAVE_PTHREAD_H)
+       
+    int cpulim = bu_avail_cpus();    /* Max number of CPUs available for the 
process */
+    int status;                      /* Status of thread setting/getting */
+    int cpu = 0;                     /* Default CPU number */
+    int j;                           /* Variable for iteration. */
+
+    cpu_set_t cpuset;                          /* CPU set structure. Defined 
in sched.h */
+    pthread_t curr_thread = pthread_self();       /* Get current thread */
+
+    CPU_ZERO(&cpuset);
+
+    for(j = 0; j < cpulim; j++) {    /* Set affinity mask to include CPUs 0 to 
max available CPU */
+       CPU_SET(j, &cpuset);
+    }
+    
+    status = pthread_getaffinity_np(curr_thread, sizeof(cpu_set_t), &cpuset);  
  /* Check current affinity mask assigned to thread */
+
+    if(status != 0) {    /* Error in getting affinity mask */
+       return -1;
+    }
+       
+    for(j = 0; j < CPU_SETSIZE; j++) {    /* Check which set has been returned 
by pthread_get_affinity */
+       if(CPU_ISSET(j, &cpuset)) {
+       cpu = j;
+       break;  /* Break loop since CPU affinity mask has been found */
+       }
+    }
+
+    CPU_ZERO(&cpuset);
+    CPU_SET(cpu, &cpuset);        /* Clear CPU set and assign CPUs */
+
+    status = pthread_setaffinity_np(curr_thread, sizeof(cpu_set_t), &cpuset);  
      /* Set affinity mask of the current */
+                                                                               
      /* thread to CPU set pointed by cpuset */
+ 
+    if(status != 0) {    /* Error in setting affinity mask */
+       return -1;
+    }
+
+#endif
+
+       return 0;
+}
+
+/*
+ * Local Variables:
+ * mode: C
+ * tab-width: 8
+ * indent-tabs-mode: t
+ * c-file-style: "stroustrup"
+ * End:
+ * ex: shiftwidth=4 tabstop=8
+ */


Property changes on: brlcad/trunk/src/libbu/affinity.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:eol-style
   + native

Modified: brlcad/trunk/src/libbu/parallel.c
===================================================================
--- brlcad/trunk/src/libbu/parallel.c   2013-03-26 19:25:57 UTC (rev 54886)
+++ brlcad/trunk/src/libbu/parallel.c   2013-03-26 20:18:47 UTC (rev 54887)
@@ -165,6 +165,11 @@
 } bu_taskcontrol[MAX_PSW];
 #endif
 
+struct thread_data {
+    void (*user_func)(int, genptr_t);
+    genptr_t user_arg;
+    int cpu_id;
+};
 
 void
 bu_nice_set(int newnice)
@@ -569,6 +574,12 @@
 static genptr_t parallel_arg;  /* User's arg to his threads */
 static void (*parallel_func)(int, genptr_t);   /* user function to run in 
parallel */
 
+HIDDEN void
+parallel_interface_arg(struct thread_data *user_thread_data)
+{
+    bu_set_affinity();
+    (*((*user_thread_data).user_func))((*user_thread_data).cpu_id, 
(*user_thread_data).user_arg);
+}
 
 /**
  * Interface layer between bu_parallel and the user's function.
@@ -586,13 +597,18 @@
 HIDDEN void
 parallel_interface(void)
 {
-    register int cpu;          /* our CPU (thread) number */
+    struct thread_data user_thread_data_pi;
 
+    bu_set_affinity();
+
+    user_thread_data_pi.user_func = parallel_func;
+    user_thread_data_pi.user_arg  = parallel_arg; 
+
     bu_semaphore_acquire(BU_SEM_SYSCALL);
-    cpu = parallel_nthreads_started++;
+    user_thread_data_pi.cpu_id = parallel_nthreads_started++;
     bu_semaphore_release(BU_SEM_SYSCALL);
 
-    (*parallel_func)(cpu, parallel_arg);
+    parallel_interface_arg(&user_thread_data_pi);
 
     bu_semaphore_acquire(BU_SEM_SYSCALL);
     parallel_nthreads_finished++;
@@ -608,6 +624,7 @@
     if (cpu) _exit(0);
 #  endif /* SGI */
 }
+
 #endif /* PARALLEL */
 
 #ifdef SGI_4D
@@ -639,6 +656,8 @@
     (*func)(0, arg);
 
 #else
+
+    struct thread_data *user_thread_data_bu;
     int avail_cpus = 1;
 
 #  if defined(alliant) && !defined(i860) && !__STDC__
@@ -692,6 +711,15 @@
     parallel_func = func;
     parallel_arg = arg;
 
+    user_thread_data_bu = (struct thread_data *)bu_calloc(ncpu, 
sizeof(*user_thread_data_bu), "struct thread_data *user_thread_data_bu");
+
+    /* Fill in the data of user_thread_data_bu structures of all threads */
+    for(x = 0; x < ncpu; x++) {
+       (user_thread_data_bu + x)->user_func = func;
+       (user_thread_data_bu + x)->user_arg  = arg;
+       (user_thread_data_bu + x)->cpu_id    = x;
+    }
+
     /* if we're in debug mode, allow additional cpus */
     if (!(bu_debug & BU_DEBUG_PARALLEL)) {
        avail_cpus = bu_avail_cpus();
@@ -929,11 +957,11 @@
     /* Create the threads */
     for (x = 0; x < ncpu; x++) {
 
-       if (thr_create(0, 0, (void *(*)(void *))parallel_interface, 0, 0, 
&thread)) {
+       if (thr_create(0, 0, (void *(*)(void *))parallel_interface_arg, 
(user_thread_data_bu + x), 0, &thread)) {
            fprintf(stderr, "ERROR: bu_parallel: thr_create(0x0, 0x0, 0x%x, 
0x0, 0, 0x%x) failed on processor %d\n",
-                   parallel_interface, &thread, x);
+                   parallel_interface_arg, &thread, x);
            bu_log("ERROR: bu_parallel: thr_create(0x0, 0x0, 0x%x, 0x0, 0, 
0x%x) failed on processor %d\n",
-                  parallel_interface, &thread, x);
+                  parallel_interface_arg, &thread, x);
            /* Not much to do, lump it */
        } else {
            if (UNLIKELY(bu_debug & BU_DEBUG_PARALLEL))
@@ -1004,17 +1032,17 @@
        pthread_attr_init(&attrs);
        pthread_attr_setstacksize(&attrs, 10*1024*1024);
 
-       if (pthread_create(&thread, &attrs, (void *(*)(void 
*))parallel_interface, NULL)) {
-           fprintf(stderr, "ERROR: bu_parallel: thr_create(0x0, 0x0, 0x%lx, 
0x0, 0, 0x%lx) failed on processor %d\n",
-                   (unsigned long int)parallel_interface, (unsigned long 
int)&thread, x);
-           bu_log("ERROR: bu_parallel: thr_create(0x0, 0x0, 0x%lx, 0x0, 0, %p) 
failed on processor %d\n",
-                  (unsigned long int)parallel_interface, (void *)&thread, x);
+       if (pthread_create(&thread, &attrs, (void *(*)(void 
*))parallel_interface_arg, (user_thread_data_bu + x))) {
+           fprintf(stderr, "ERROR: bu_parallel: pthread_create(0x0, 0x0, 
0x%lx, 0x0, 0, 0x%lx) failed on processor %d\n",
+                   (unsigned long int)parallel_interface_arg, (unsigned long 
int)&thread, x);
+           bu_log("ERROR: bu_parallel: pthread_create(0x0, 0x0, 0x%lx, 0x0, 0, 
%p) failed on processor %d\n",
+                  (unsigned long int)parallel_interface_arg, (void *)&thread, 
x);
            /* Not much to do, lump it */
        } else {
            if (UNLIKELY(bu_debug & BU_DEBUG_PARALLEL)) {
                bu_log("bu_parallel(): created thread: (thread: %p) (loop:%d) 
(nthreadc:%d)\n",
                       (void*)thread, x, nthreadc);
-           }
+               }
 
            thread_tbl[nthreadc] = thread;
            nthreadc++;
@@ -1079,13 +1107,15 @@
     /* Create the Win32 threads */
 
     for( int i = 0; i < ncpu; i++){
+
        hThreadArray[i] = CreateThread(
            NULL,
            0,
            (LPVOID)parallel_interface,
-           NULL,
+           (user_thread_data_bu + x),
            0,
            &dwThreadIdArray[i]);
+
        if (hThreadArray[i] == NULL) {
            bu_log("bu_parallel(): Error in CreateThread");
            bu_exit();
@@ -1137,6 +1167,8 @@
 #  endif
     bu_pid_of_initiating_thread = 0;   /* No threads any more */
 
+    bu_free(user_thread_data_bu, "struct thread_data *user_thread_data_bu");
+
 #endif /* PARALLEL */
 
     return;

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Own the Future-Intel&reg; Level Up Game Demo Contest 2013
Rise to greatness in Intel's independent game demo contest.
Compete for recognition, cash, and the chance to get your game 
on Steam. $5K grand prize plus 10 genre and skill prizes. 
Submit your demo by 6/6/13. http://p.sf.net/sfu/intel_levelupd2d
_______________________________________________
BRL-CAD Source Commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/brlcad-commits

Reply via email to