#include <stdio.h>
#include <string.h>
#include <shmem.h>

#define BUFFER_SIZE ( 8 * 1024 )

int
main( int    argc,
      char** argv )
{
    int   i, me, npes;
    int   start, stride, size;
    void* ba;
    void* bb;
    long* barrier_psync;
    long* collect_psync;

    start_pes( 0 );

    npes = _num_pes();
    me   = _my_pe();

    start  = 0;
    stride = 0;
    size   = npes;

    printf( "Hello from PE %d of %d\n", me + 1, npes );

    ba = shmalloc( BUFFER_SIZE );
    bb = shmalloc( BUFFER_SIZE );
    
    barrier_psync = shmalloc( sizeof( long ) * _SHMEM_BARRIER_SYNC_SIZE );
    for ( i = 0; i < _SHMEM_BARRIER_SYNC_SIZE; ++i )
    {
        barrier_psync[ i ] = _SHMEM_SYNC_VALUE;
    }

    collect_psync = shmalloc( sizeof( long ) * _SHMEM_COLLECT_SYNC_SIZE );
    for ( i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i++ )
    {
        collect_psync[ i ] = _SHMEM_SYNC_VALUE;
    }
    shmem_barrier_all();

    memcpy( ba, &me, sizeof( me ) );

    shmem_barrier( start, stride, size, barrier_psync );

    shmem_fcollect32( bb,
                      ba,
                      1,
                      start, stride, size,
                      collect_psync );

    start  = 0;
    stride = 0;
    size   = npes;
    while ( me < size )
    {
        shmem_barrier( start, stride, size, barrier_psync );
        size /= 2;
    }

    shmem_barrier_all();

    shfree( ba );
    shfree( bb );
    shfree( barrier_psync );
    shfree( collect_psync );

    return 0;
}
