Richard,

I took a look at your program today. I added a few assertions
to get more infos when the problem occurs. I was able to reproduce
the issue on my 2-way Opteron, yet it does not happen so often,

I think the core issue is that you have a race condition in your
program between the various worker threads. The race has to do with
dosignal(). I noticed that you get the wrong fd almost instantly when
you hit the problem. I think this may be due to the fact that you have
a race between one thread starting monitoring and generating samples
and thus notification vs.  another thread coming online, i.e., starting
to execute dosignal(). I am not 100% sure this is the problem because the
monitoring thread has set its F_SETSIG, so it should be the only one
receicing the signal, yet I have not verified the logic in the kernel.
It may be that if a thread has not yet set its F_SETSIG, then it may be
chosen first. I simply added a big sleep between dosignal() and the
beginning of active monitoring.

The other thing I did to the program is that I explicitly blocked SIGIO
in the master thread.

Could you try this in your setup?



On Tue, Apr 24, 2007 at 04:59:31PM -0400, Richard C Bilson wrote:
> Stephane,
> 
> Here is an example program that fails for me on a Core Duo running
> 2.6.20 and the 070209 perfmon. The program takes an integer as an
> argument and creates that many threads. Each thread enables counter
> overflow signals and applies F_SETSIG to its perfmon fd. However, in
> some cases the read from the perfmon fd in the signal handler returns
> errno 11 (EWOULDBLOCK), which seems like an inconsistency. Furthermore,
> inspecting the siginfo structure shows that the fd reported there
> actually refers to the context of a different thread than the one
> handling the signal.
> 
> The program seems to work fine for two or three threads, but with four
> or more it fails about half the time. I think our original uC++ version
> fails more often than this, but then we have much more going on,
> including more code in the signal handler.
> 
> Back when I originally brought up this problem last month, Phil Mucci
> mentioned that he had noticed similar behavior. I would be interested
> to know if he has managed to confirm this.
> 
> - Richard
> 
> // g++ -Wall -g sigio.cc -lpfm -lpthread
> 
> #include <perfmon/perfmon.h>
> #include <perfmon/pfmlib.h>
> #include <cstdlib>
> #include <cstdio>
> #include <cerrno>
> #include <cstring>
> #include <cstdarg>
> #include <cassert>
> #include <signal.h>
> #include <unistd.h>
> #include <asm/unistd.h>
> #include <pthread.h>
> #include <fcntl.h>
> 
> #define PERIOD 100000
> // #define OLD
> 
> __thread int uPerfmon_fd;
> 
> 
> void uAbort( const char *fmt, ... ) {
>     if ( fmt != NULL ) {
>       fprintf( stderr, "Runtime error (UNIX pid:%ld) ", (long int)getpid() );
>       va_list args;
>       va_start( args, fmt );
>       vfprintf( stderr, fmt, args );
>       va_end( args );
>       fprintf( stderr, "\n" );
>     } // if
> 
>     _exit( -1 );
> } // uAbort
> 
> 
> void sigHWOverflowHandler( int, siginfo_t *sfp, void * ) {
>     assert( sfp->si_code == POLL_IN );
> 
>     pfm_msg_t message;
>     int result = read( uPerfmon_fd, &message, sizeof( message ) );
>     if ( result == -1 ) {
>       uAbort( "HWCounters::getOverflowMask(): Unable to read overflow message 
> from fd %d errno %d (%s) si_fd %d.", uPerfmon_fd, errno, strerror( errno ), 
> sfp->si_fd );
>     } // if
> 
>     if ( result < (long)sizeof( message ) ) {
>       uAbort( "HWCounters::getOverflowMask(): Incomplete overflow message 
> from fd %d errno %d (%s).", uPerfmon_fd, errno, strerror( errno ) );
>     } // if
> 
> #if defined( OLD )
>     if ( perfmonctl( uPerfmon_fd, PFM_RESTART, NULL, 0 ) == -1 ) {
> #else
>     if ( pfm_restart( uPerfmon_fd ) == -1 ) {
> #endif // OLD
>       uAbort( "HWCounters::uRestartCounters(): Unable to restart hardware 
> counters." );
>     } // if
> } // HWCounters::getOverflowMask
> 
> 
> static char *event_name;
> 
> 
> void uStartCounters( bool signalOnOverflow ) {
> #ifdef __U_DEBUG_H__
>     uBaseTask &task = uThisTask();
>     uCluster &cluster = uThisCluster();
>     uDebugPrt( "Task calling HWCounters::uStartCounters(): (uBaseTask 
> &)0x%p(%s) on cluster 0x%p(%s)\n", &task, task.getName(), &cluster, 
> cluster.getName() );
> #endif // __U_DEBUG_H__
> 
>     pid_t pid = syscall( __NR_gettid );
> 
>     // these structures are too large to be allocated on the stack
>     pfmlib_input_param_t &events = *new pfmlib_input_param_t;
>     pfmlib_output_param_t &output_params = *new pfmlib_output_param_t;
>     memset( &output_params, 0, sizeof( output_params ) );
>     memset( &events, 0, sizeof( events ) );
>     if ( pfm_find_event_byname( event_name, &events.pfp_events[0].event ) != 
> PFMLIB_SUCCESS ) {
>       uAbort( "HWCounters::uStartCounters(): Couldn't find event %s.", 
> event_name );
>     }
> 
>     // Set the number of events being counted
>     events.pfp_event_count = 1;
> 
>     // Set profiling privilege levels (PFM_PLM0 is most privileged, PFM_PLM3 
> is least privileged)
>     events.pfp_dfl_plm |= PFM_PLM3;
> 
>     // Let libpfm figure out how to program the hardware counters
>     if ( pfm_dispatch_events( &events, NULL, &output_params, NULL ) != 
> PFMLIB_SUCCESS ) {
>       uAbort( "HWCounters::uStartCounters(): Illegal event set." );
>     }
> 
>     // Prepare the argument to initialize the PMCs.
> #if defined( OLD )
>     pfarg_reg_t pmcs[4];
> #else
>     pfarg_pmc_t pmcs[4];
> #endif
>     memset( pmcs, 0, sizeof( pmcs ) );
>     for ( unsigned int event = 0; event < output_params.pfp_pmc_count; event 
> += 1 ) {
>       pmcs[event].reg_num   = output_params.pfp_pmcs[event].reg_num;
>       pmcs[event].reg_value = output_params.pfp_pmcs[event].reg_value;
>     } // for
> 
>     // Prepare the argument to initialize the PMDs.  Set it to all zeros so 
> that the PMD event
>     // counts will be initialized to zero.
>     pfarg_pmd_t pmds[4];
>     memset( pmds, 0, sizeof( pmds ) );
> 
>     // Set appropriate PMD register mappings.  Just take the needed register 
> numbers from the
>     // `events' structure that libpfm filled in with pfm_dispatch_events().
>     for ( unsigned int event = 0; event < events.pfp_event_count; event += 1 
> ) {
>       pmds[event].reg_num = pmcs[event].reg_num;
>     }
> 
> #if defined( OLD )
>     pfarg_context_t context;
> #else
>     pfarg_ctx_t context;
> #endif // OLD
>     memset( &context, 0, sizeof( context ) );
> 
>     // Create perfmon context
> #if defined( OLD )
>     if ( perfmonctl( 0, PFM_CREATE_CONTEXT, &context, 1 ) == -1 ) {
> #else
>     int fd = pfm_create_context( &context, NULL, NULL, 0 );
>     if ( fd == -1 ) {
> #endif // OLD
>       if ( errno == ENOSYS ) {
>           uAbort( "HWCounters::uStartCounters(): Kernel does not have 
> performance monitoring support enabled." );
>       } // if
>       uAbort( "HWCounters::uStartCounters(): Unable to create perfmon 
> context: %s.", strerror( errno ) );
>     } // if
> 
> #if defined( OLD )
>     int fd = context.ctx_fd;
> #endif // OLD
>     uPerfmon_fd = fd;
> 
>     if ( signalOnOverflow ) {
>       // Enable overflow notification for all events.
> #if defined( OLD )
>       for ( unsigned int event = 0; event < output_params.pfp_pmc_count; 
> ++event ) {
>           pmcs[event].reg_flags |= PFM_REGFL_OVFL_NOTIFY;
>       } // for
> #endif // OLD
> 
>       // Set all events to overflow after the desired number of events.
>       for ( unsigned int event = 0; event < events.pfp_event_count; ++event ) 
> {
> #if ! defined( OLD )
>           pmds[event].reg_flags |= PFM_REGFL_OVFL_NOTIFY;
> #endif // ! OLD
>           pmds[event].reg_value = ~0ULL - PERIOD + 1;
>           pmds[event].reg_long_reset = ~0ULL - PERIOD + 1;
>           pmds[event].reg_short_reset = ~0ULL - PERIOD + 1;
>       } // for
>     } // if
> 
>     // Program the PMCs (this essentially tells which performance counters to 
> measure which event)
> #if defined( OLD )
>     if ( perfmonctl( fd, PFM_WRITE_PMCS, pmcs, output_params.pfp_pmc_count ) 
> == -1 ) {
> #else
>     if ( pfm_write_pmcs( fd, pmcs, output_params.pfp_pmc_count ) == -1 ) {
> #endif // OLD
>       uAbort( "HWCounters::uStartCounters(): Unable to program PMCs: %s (%d) 
> -- fd %d", strerror( errno ), errno, fd );
>     } // if
> 
>     // Initialize the PMD event counts to zero.
> #if defined( OLD )
>     if ( perfmonctl( fd, PFM_WRITE_PMDS, pmds, events.pfp_event_count ) == -1 
> ) {
> #else
>     if ( pfm_write_pmds( fd, pmds, events.pfp_event_count ) == -1 ) {
> #endif // OLD
>       uAbort( "HWCounters::uStartCounters(): Unable to initialize PMDs." );
>     } // if
> 
>     // Attach the perfmon context to the current processor.  The new perfmon 
> interface uses file
>     // descriptors to represent performance counter contexts.  These 
> statements associate the
>     // new context with the current kernel thread.
>     pfarg_load_t load_args;
>     memset( &load_args, 0, sizeof( load_args ) );
>     load_args.load_pid = pid;
> #if defined( OLD )
>     if ( perfmonctl( fd, PFM_LOAD_CONTEXT, &load_args, 1 ) == -1 ) {
> #else
>     if ( pfm_load_context( fd, &load_args ) == -1 ) {
> #endif // OLD
>       uAbort( "HWCounters::uStartCounters(): Unable to load perfmon context." 
> );
>     } // if
> 
>     if ( signalOnOverflow ) {
>       if ( fcntl( fd, F_SETFL, fcntl( fd, F_GETFL, 0 ) | O_ASYNC | O_NONBLOCK 
> ) == -1 ) {
>           uAbort( "HWCounters::uStartCounters(): Unable to setup asynchronous 
> notification of virtual processor's file descriptor." );
>       } // if
>       if ( fcntl( fd, F_SETOWN, pid ) == -1 ) {
>           uAbort( "HWCounters::uStartCounters(): Unable to set ownership of 
> virtual processor's file descriptor." );
>       } // if
>       if ( fcntl( fd, F_SETSIG, SIGIO ) == -1 ) {
>           uAbort( "HWCounters::uStartCounters(): Unable to setup additional 
> information for asynchronous notification." );
>       } // if
>     } // if
> 
>     // Let 'er rip!!
>     pfm_self_start( fd );
> 
>     delete &events;
>     delete &output_params;
> #ifdef __U_DEBUG_H__
>     uDebugPrt( "HWCounters::uStartCounters(), exit\n" );
> #endif
> } // HWCounters::uStartCounters
> 
> 
> void dosignal( int sig, void (*handler)(int, siginfo_t*, void*), int flags ) {
>     struct sigaction act;
>     act.sa_sigaction = handler;
>     sigemptyset( &act.sa_mask );
>     sigaddset( &act.sa_mask, SIGALRM );                       // disable 
> during signal handler
>     sigaddset( &act.sa_mask, SIGVTALRM );
>     sigaddset( &act.sa_mask, SIGUSR1 );
>     sigaddset( &act.sa_mask, SIGIO );
>     act.sa_flags = flags;
> 
>     if ( sigaction( sig, &act, NULL ) == -1 ) {
>       fprintf( stderr, " uSigHandlerModule::signal( sig:%d, handler:%p, 
> flags:%d ), problem installing signal handler, error(%d) %s.\n",
>                sig, handler, flags, errno, strerror( errno ) );
>       _exit( -1 );
>     } // if
> } // uSigHandlerModule::signal
> 
> 
> void *threadfunc( void * ) {
>     dosignal( SIGIO, sigHWOverflowHandler, SA_SIGINFO );
> 
>     sigset_t unblock;
>     sigemptyset( &unblock );
>     sigaddset( &unblock, SIGIO );
>     sigprocmask( SIG_UNBLOCK, &unblock, NULL );
> 
>     uStartCounters( true );
> 
>     for( ; ; )
>       ;
> 
>     return NULL;
> } // threadfunc
> 
> 
> int main( int argc, char **argv ) {
>     int nthreads = 5;
>     if ( argc > 1 ) {
>       nthreads = atoi( argv[ 1 ] );
>     } // if
>     pthread_t threads[ nthreads ];
> 
>     if ( pfm_initialize() != PFMLIB_SUCCESS ) {
>       uAbort( "Couldn't initialize perfmon" );
>     }
> 
>     pfmlib_options_t pfmlib_options;
>     pfmlib_options.pfm_debug = 0;
>     pfmlib_options.pfm_verbose = 0;
>     pfm_set_options( &pfmlib_options );
> 
>     int pmu_type;
>     if ( pfm_get_pmu_type( &pmu_type ) != PFMLIB_SUCCESS ) {
>       uAbort( "Couldn't access PMU" );
>     }
> 
>     // Check which type of PMU is being used
>     switch ( pmu_type ) {
>       case PFMLIB_ITANIUM2_PMU:
>       event_name = "CPU_CYCLES";
>       break;
> #if defined( PFMLIB_COREDUO_PMU )
>       case PFMLIB_COREDUO_PMU:
>       event_name = "UNHALTED_CORE_CYCLES";
>       break;
> #endif // PFMLIB_COREDUO_PMU
>       default:                                         // this CPU isn't 
> currently supported
>       uAbort( "unsupported PMU" );
>       break;
>     }
> 
>     for ( int i = 0; i < nthreads; i += 1 ) {
>       pthread_create( &threads[ i ], NULL, threadfunc, NULL );
>     } // for
> 
>     for ( int i = 0; i < nthreads; i += 1 ) {
>       pthread_join( threads[ i ], NULL );
>     } // for
> 
>     return 0;
> } // main
> _______________________________________________
> perfmon mailing list
> [email protected]
> http://www.hpl.hp.com/hosted/linux/mail-archives/perfmon/

-- 

-Stephane
_______________________________________________
perfmon mailing list
[email protected]
http://www.hpl.hp.com/hosted/linux/mail-archives/perfmon/

Reply via email to