Richard,
I took a look at your program today. I added a few assertions
to get more infos when the problem occurs. I was able to reproduce
the issue on my 2-way Opteron, yet it does not happen so often,
I think the core issue is that you have a race condition in your
program between the various worker threads. The race has to do with
dosignal(). I noticed that you get the wrong fd almost instantly when
you hit the problem. I think this may be due to the fact that you have
a race between one thread starting monitoring and generating samples
and thus notification vs. another thread coming online, i.e., starting
to execute dosignal(). I am not 100% sure this is the problem because the
monitoring thread has set its F_SETSIG, so it should be the only one
receicing the signal, yet I have not verified the logic in the kernel.
It may be that if a thread has not yet set its F_SETSIG, then it may be
chosen first. I simply added a big sleep between dosignal() and the
beginning of active monitoring.
The other thing I did to the program is that I explicitly blocked SIGIO
in the master thread.
Could you try this in your setup?
On Tue, Apr 24, 2007 at 04:59:31PM -0400, Richard C Bilson wrote:
> Stephane,
>
> Here is an example program that fails for me on a Core Duo running
> 2.6.20 and the 070209 perfmon. The program takes an integer as an
> argument and creates that many threads. Each thread enables counter
> overflow signals and applies F_SETSIG to its perfmon fd. However, in
> some cases the read from the perfmon fd in the signal handler returns
> errno 11 (EWOULDBLOCK), which seems like an inconsistency. Furthermore,
> inspecting the siginfo structure shows that the fd reported there
> actually refers to the context of a different thread than the one
> handling the signal.
>
> The program seems to work fine for two or three threads, but with four
> or more it fails about half the time. I think our original uC++ version
> fails more often than this, but then we have much more going on,
> including more code in the signal handler.
>
> Back when I originally brought up this problem last month, Phil Mucci
> mentioned that he had noticed similar behavior. I would be interested
> to know if he has managed to confirm this.
>
> - Richard
>
> // g++ -Wall -g sigio.cc -lpfm -lpthread
>
> #include <perfmon/perfmon.h>
> #include <perfmon/pfmlib.h>
> #include <cstdlib>
> #include <cstdio>
> #include <cerrno>
> #include <cstring>
> #include <cstdarg>
> #include <cassert>
> #include <signal.h>
> #include <unistd.h>
> #include <asm/unistd.h>
> #include <pthread.h>
> #include <fcntl.h>
>
> #define PERIOD 100000
> // #define OLD
>
> __thread int uPerfmon_fd;
>
>
> void uAbort( const char *fmt, ... ) {
> if ( fmt != NULL ) {
> fprintf( stderr, "Runtime error (UNIX pid:%ld) ", (long int)getpid() );
> va_list args;
> va_start( args, fmt );
> vfprintf( stderr, fmt, args );
> va_end( args );
> fprintf( stderr, "\n" );
> } // if
>
> _exit( -1 );
> } // uAbort
>
>
> void sigHWOverflowHandler( int, siginfo_t *sfp, void * ) {
> assert( sfp->si_code == POLL_IN );
>
> pfm_msg_t message;
> int result = read( uPerfmon_fd, &message, sizeof( message ) );
> if ( result == -1 ) {
> uAbort( "HWCounters::getOverflowMask(): Unable to read overflow message
> from fd %d errno %d (%s) si_fd %d.", uPerfmon_fd, errno, strerror( errno ),
> sfp->si_fd );
> } // if
>
> if ( result < (long)sizeof( message ) ) {
> uAbort( "HWCounters::getOverflowMask(): Incomplete overflow message
> from fd %d errno %d (%s).", uPerfmon_fd, errno, strerror( errno ) );
> } // if
>
> #if defined( OLD )
> if ( perfmonctl( uPerfmon_fd, PFM_RESTART, NULL, 0 ) == -1 ) {
> #else
> if ( pfm_restart( uPerfmon_fd ) == -1 ) {
> #endif // OLD
> uAbort( "HWCounters::uRestartCounters(): Unable to restart hardware
> counters." );
> } // if
> } // HWCounters::getOverflowMask
>
>
> static char *event_name;
>
>
> void uStartCounters( bool signalOnOverflow ) {
> #ifdef __U_DEBUG_H__
> uBaseTask &task = uThisTask();
> uCluster &cluster = uThisCluster();
> uDebugPrt( "Task calling HWCounters::uStartCounters(): (uBaseTask
> &)0x%p(%s) on cluster 0x%p(%s)\n", &task, task.getName(), &cluster,
> cluster.getName() );
> #endif // __U_DEBUG_H__
>
> pid_t pid = syscall( __NR_gettid );
>
> // these structures are too large to be allocated on the stack
> pfmlib_input_param_t &events = *new pfmlib_input_param_t;
> pfmlib_output_param_t &output_params = *new pfmlib_output_param_t;
> memset( &output_params, 0, sizeof( output_params ) );
> memset( &events, 0, sizeof( events ) );
> if ( pfm_find_event_byname( event_name, &events.pfp_events[0].event ) !=
> PFMLIB_SUCCESS ) {
> uAbort( "HWCounters::uStartCounters(): Couldn't find event %s.",
> event_name );
> }
>
> // Set the number of events being counted
> events.pfp_event_count = 1;
>
> // Set profiling privilege levels (PFM_PLM0 is most privileged, PFM_PLM3
> is least privileged)
> events.pfp_dfl_plm |= PFM_PLM3;
>
> // Let libpfm figure out how to program the hardware counters
> if ( pfm_dispatch_events( &events, NULL, &output_params, NULL ) !=
> PFMLIB_SUCCESS ) {
> uAbort( "HWCounters::uStartCounters(): Illegal event set." );
> }
>
> // Prepare the argument to initialize the PMCs.
> #if defined( OLD )
> pfarg_reg_t pmcs[4];
> #else
> pfarg_pmc_t pmcs[4];
> #endif
> memset( pmcs, 0, sizeof( pmcs ) );
> for ( unsigned int event = 0; event < output_params.pfp_pmc_count; event
> += 1 ) {
> pmcs[event].reg_num = output_params.pfp_pmcs[event].reg_num;
> pmcs[event].reg_value = output_params.pfp_pmcs[event].reg_value;
> } // for
>
> // Prepare the argument to initialize the PMDs. Set it to all zeros so
> that the PMD event
> // counts will be initialized to zero.
> pfarg_pmd_t pmds[4];
> memset( pmds, 0, sizeof( pmds ) );
>
> // Set appropriate PMD register mappings. Just take the needed register
> numbers from the
> // `events' structure that libpfm filled in with pfm_dispatch_events().
> for ( unsigned int event = 0; event < events.pfp_event_count; event += 1
> ) {
> pmds[event].reg_num = pmcs[event].reg_num;
> }
>
> #if defined( OLD )
> pfarg_context_t context;
> #else
> pfarg_ctx_t context;
> #endif // OLD
> memset( &context, 0, sizeof( context ) );
>
> // Create perfmon context
> #if defined( OLD )
> if ( perfmonctl( 0, PFM_CREATE_CONTEXT, &context, 1 ) == -1 ) {
> #else
> int fd = pfm_create_context( &context, NULL, NULL, 0 );
> if ( fd == -1 ) {
> #endif // OLD
> if ( errno == ENOSYS ) {
> uAbort( "HWCounters::uStartCounters(): Kernel does not have
> performance monitoring support enabled." );
> } // if
> uAbort( "HWCounters::uStartCounters(): Unable to create perfmon
> context: %s.", strerror( errno ) );
> } // if
>
> #if defined( OLD )
> int fd = context.ctx_fd;
> #endif // OLD
> uPerfmon_fd = fd;
>
> if ( signalOnOverflow ) {
> // Enable overflow notification for all events.
> #if defined( OLD )
> for ( unsigned int event = 0; event < output_params.pfp_pmc_count;
> ++event ) {
> pmcs[event].reg_flags |= PFM_REGFL_OVFL_NOTIFY;
> } // for
> #endif // OLD
>
> // Set all events to overflow after the desired number of events.
> for ( unsigned int event = 0; event < events.pfp_event_count; ++event )
> {
> #if ! defined( OLD )
> pmds[event].reg_flags |= PFM_REGFL_OVFL_NOTIFY;
> #endif // ! OLD
> pmds[event].reg_value = ~0ULL - PERIOD + 1;
> pmds[event].reg_long_reset = ~0ULL - PERIOD + 1;
> pmds[event].reg_short_reset = ~0ULL - PERIOD + 1;
> } // for
> } // if
>
> // Program the PMCs (this essentially tells which performance counters to
> measure which event)
> #if defined( OLD )
> if ( perfmonctl( fd, PFM_WRITE_PMCS, pmcs, output_params.pfp_pmc_count )
> == -1 ) {
> #else
> if ( pfm_write_pmcs( fd, pmcs, output_params.pfp_pmc_count ) == -1 ) {
> #endif // OLD
> uAbort( "HWCounters::uStartCounters(): Unable to program PMCs: %s (%d)
> -- fd %d", strerror( errno ), errno, fd );
> } // if
>
> // Initialize the PMD event counts to zero.
> #if defined( OLD )
> if ( perfmonctl( fd, PFM_WRITE_PMDS, pmds, events.pfp_event_count ) == -1
> ) {
> #else
> if ( pfm_write_pmds( fd, pmds, events.pfp_event_count ) == -1 ) {
> #endif // OLD
> uAbort( "HWCounters::uStartCounters(): Unable to initialize PMDs." );
> } // if
>
> // Attach the perfmon context to the current processor. The new perfmon
> interface uses file
> // descriptors to represent performance counter contexts. These
> statements associate the
> // new context with the current kernel thread.
> pfarg_load_t load_args;
> memset( &load_args, 0, sizeof( load_args ) );
> load_args.load_pid = pid;
> #if defined( OLD )
> if ( perfmonctl( fd, PFM_LOAD_CONTEXT, &load_args, 1 ) == -1 ) {
> #else
> if ( pfm_load_context( fd, &load_args ) == -1 ) {
> #endif // OLD
> uAbort( "HWCounters::uStartCounters(): Unable to load perfmon context."
> );
> } // if
>
> if ( signalOnOverflow ) {
> if ( fcntl( fd, F_SETFL, fcntl( fd, F_GETFL, 0 ) | O_ASYNC | O_NONBLOCK
> ) == -1 ) {
> uAbort( "HWCounters::uStartCounters(): Unable to setup asynchronous
> notification of virtual processor's file descriptor." );
> } // if
> if ( fcntl( fd, F_SETOWN, pid ) == -1 ) {
> uAbort( "HWCounters::uStartCounters(): Unable to set ownership of
> virtual processor's file descriptor." );
> } // if
> if ( fcntl( fd, F_SETSIG, SIGIO ) == -1 ) {
> uAbort( "HWCounters::uStartCounters(): Unable to setup additional
> information for asynchronous notification." );
> } // if
> } // if
>
> // Let 'er rip!!
> pfm_self_start( fd );
>
> delete &events;
> delete &output_params;
> #ifdef __U_DEBUG_H__
> uDebugPrt( "HWCounters::uStartCounters(), exit\n" );
> #endif
> } // HWCounters::uStartCounters
>
>
> void dosignal( int sig, void (*handler)(int, siginfo_t*, void*), int flags ) {
> struct sigaction act;
> act.sa_sigaction = handler;
> sigemptyset( &act.sa_mask );
> sigaddset( &act.sa_mask, SIGALRM ); // disable
> during signal handler
> sigaddset( &act.sa_mask, SIGVTALRM );
> sigaddset( &act.sa_mask, SIGUSR1 );
> sigaddset( &act.sa_mask, SIGIO );
> act.sa_flags = flags;
>
> if ( sigaction( sig, &act, NULL ) == -1 ) {
> fprintf( stderr, " uSigHandlerModule::signal( sig:%d, handler:%p,
> flags:%d ), problem installing signal handler, error(%d) %s.\n",
> sig, handler, flags, errno, strerror( errno ) );
> _exit( -1 );
> } // if
> } // uSigHandlerModule::signal
>
>
> void *threadfunc( void * ) {
> dosignal( SIGIO, sigHWOverflowHandler, SA_SIGINFO );
>
> sigset_t unblock;
> sigemptyset( &unblock );
> sigaddset( &unblock, SIGIO );
> sigprocmask( SIG_UNBLOCK, &unblock, NULL );
>
> uStartCounters( true );
>
> for( ; ; )
> ;
>
> return NULL;
> } // threadfunc
>
>
> int main( int argc, char **argv ) {
> int nthreads = 5;
> if ( argc > 1 ) {
> nthreads = atoi( argv[ 1 ] );
> } // if
> pthread_t threads[ nthreads ];
>
> if ( pfm_initialize() != PFMLIB_SUCCESS ) {
> uAbort( "Couldn't initialize perfmon" );
> }
>
> pfmlib_options_t pfmlib_options;
> pfmlib_options.pfm_debug = 0;
> pfmlib_options.pfm_verbose = 0;
> pfm_set_options( &pfmlib_options );
>
> int pmu_type;
> if ( pfm_get_pmu_type( &pmu_type ) != PFMLIB_SUCCESS ) {
> uAbort( "Couldn't access PMU" );
> }
>
> // Check which type of PMU is being used
> switch ( pmu_type ) {
> case PFMLIB_ITANIUM2_PMU:
> event_name = "CPU_CYCLES";
> break;
> #if defined( PFMLIB_COREDUO_PMU )
> case PFMLIB_COREDUO_PMU:
> event_name = "UNHALTED_CORE_CYCLES";
> break;
> #endif // PFMLIB_COREDUO_PMU
> default: // this CPU isn't
> currently supported
> uAbort( "unsupported PMU" );
> break;
> }
>
> for ( int i = 0; i < nthreads; i += 1 ) {
> pthread_create( &threads[ i ], NULL, threadfunc, NULL );
> } // for
>
> for ( int i = 0; i < nthreads; i += 1 ) {
> pthread_join( threads[ i ], NULL );
> } // for
>
> return 0;
> } // main
> _______________________________________________
> perfmon mailing list
> [email protected]
> http://www.hpl.hp.com/hosted/linux/mail-archives/perfmon/
--
-Stephane
_______________________________________________
perfmon mailing list
[email protected]
http://www.hpl.hp.com/hosted/linux/mail-archives/perfmon/