I fixed the below issue with Analyze/Performance/IMB.pm in
r1117. What it will now do is read an uninterrupted data
table broken up by either an EOF or something that does not
look like a row in the data table (e.g., an error or warning
message). I'm surprised that the below "floating point
exception" could result in a pass. At least now the entire
test run is not scrapped because of one bad apple.

-Ethan


> Sat, Dec/29/2007 05:38:32PM, jjhur...@open-mpi.org wrote:
> 
> SQL QUERY: INSERT INTO latency_bandwidth 
>       (latency_bandwidth_id, message_size, latency_min, latency_avg, 
> latency_max, bandwidth_min, bandwidth_avg, bandwidth_max) VALUES 
>       ('314123', 
> '{0,1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072,262144,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,524288,1048576,2097152,4194304}',
>  
> '{0.15,191.51,166.73,169.26,166.44,167.43,168.30,168.44,165.46,166.42,167.48,162.31,136.42,222.19,446.03,716.19,1254.33,2458.68,5584.21,12544.87,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,27091.03,43622.23,72144.60,130192.28}',
>  
> '{0.22,191.53,166.78,169.27,166.45,167.45,168.30,168.60,165.47,166.46,167.64,162.32,136.45,222.21,446.09,716.21,1254.39,2458.76,5584.51,12545.59,,,,,,,,,,,,,,,,,27094.99,43640.46,72183.70,130302.65}',
>  
> '{0.28,191.55,166.89,169.28,166.47,167.49,168.32,168.69,165.48,166.49,167.70,162.34,136.48,222.24,446.13,716.24,1254.44,2458.84,5584.96,12546.58,,,,,,,,,,,,,,,,,27099.48,43659.70,72207.25,130419.42}',
>  DEFAULT, DEFAULT, DEFAULT)
> SQL ERROR: ERROR:  malformed array literal: 
> "{0.22,191.53,166.78,169.27,166.45,167.45,168.30,168.60,165.47,166.46,167.64,162.32,136.45,222.21,446.09,716.21,1254.39,2458.76,5584.51,12545.59,,,,,,,,,,,,,,,,,27094.99,43640.46,72183.70,130302.65}"
> SQL ERROR: 
>
> [SNIP]
>
>   'exit_value_81' => 0,
>   'mpi_install_section_name_81' => 'ompi/gnu-standard',
>   'latency_max_81' => 
> '{0.28,191.55,166.89,169.28,166.47,167.49,168.32,168.69,165.48,166.49,167.70,162.34,136.48,222.24,446.13,716.24,1254.44,2458.84,5584.96,12546.58,,,,,,,,,,,,,,,,,27099.48,43659.70,72207.25,130419.42}',
>   'latency_avg_81' => 
> '{0.22,191.53,166.78,169.27,166.45,167.45,168.30,168.60,165.47,166.46,167.64,162.32,136.45,222.21,446.09,716.21,1254.39,2458.76,5584.51,12545.59,,,,,,,,,,,,,,,,,27094.99,43640.46,72183.70,130302.65}',
>   'np_81' => '8',
>   'network_81' => 'loopback,verbs',
>   'test_result_81' => 1,
>   'latency_min_81' => 
> '{0.15,191.51,166.73,169.26,166.44,167.43,168.30,168.44,165.46,166.42,167.48,162.31,136.42,222.19,446.03,716.19,1254.33,2458.68,5584.21,12544.87,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,27091.03,43622.23,72144.60,130192.28}',
>   'test_build_section_name_81' => 'imb',
>   'description_81' => 'Cisco MPI development cluster',
>   'result_stderr_81' => '',
>   'environment_81' => '',
>   'exit_signal_81' => -1,
>   'test_name_81' => 'Allgatherv',
>   'parameters_81' => '--mca btl_openib_use_eager_rdma 0 --mca 
> btl_tcp_if_include ib0 --mca oob_tcp_if_include ib0',
>   'start_timestamp_81' => 'Sat Dec 29 22:32:56 2007',
>   'command_81' => 'mpirun -np 8 --mca btl_openib_use_eager_rdma 0 --mca btl 
> openib,self --mca btl_tcp_if_include ib0 --mca oob_tcp_if_include ib0  
> src/IMB-MPI1 -npmin 8 Allgatherv',
>   'duration_81' => '20 seconds',
>   'message_size_81' => 
> '{0,1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,65536,131072,262144,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,524288,1048576,2097152,4194304}',
>   'resource_manager_81' => 'slurm',
>   'result_stdout_81' => '#---------------------------------------------------
> #    Intel (R) MPI Benchmark Suite V2.3, MPI-1 part    
> #---------------------------------------------------
> # Date       : Sat Dec 29 14:32:57 2007
> # Machine    : x86_64# System     : Linux
> # Release    : 2.6.9-42.ELsmp
> # Version    : #1 SMP Wed Jul 12 23:32:02 EDT 2006
> 
> #
> # Minimum message length in bytes:   0
> # Maximum message length in bytes:   4194304
> #
> # MPI_Datatype                   :   MPI_BYTE 
> # MPI_Datatype for reductions    :   MPI_FLOAT
> # MPI_Op                         :   MPI_SUM  
> #
> #
> 
> # List of Benchmarks to run:
> 
> # Allgatherv
> 
> #----------------------------------------------------------------
> # Benchmarking Allgatherv 
> # #processes = 8 
> #----------------------------------------------------------------
>        #bytes #repetitions  t_min[usec]  t_max[usec]  t_avg[usec]
>             0         1000         0.15         0.28         0.22
>             1         1000       191.51       191.55       191.53
>             2         1000       166.73       166.89       166.78
>             4         1000       169.26       169.28       169.27
>             8         1000       166.44       166.47       166.45
>            16         1000       167.43       167.49       167.45
>            32         1000       168.30       168.32       168.30
>            64         1000       168.44       168.69       168.60
>           128         1000       165.46       165.48       165.47
>           256         1000       166.42       166.49       166.46
>           512         1000       167.48       167.70       167.64
>          1024         1000       162.31       162.34       162.32
>          2048         1000       136.42       136.48       136.45
>          4096         1000       222.19       222.24       222.21
>          8192         1000       446.03       446.13       446.09
>         16384         1000       716.19       716.24       716.21
>         32768         1000      1254.33      1254.44      1254.39
>         65536          640      2458.68      2458.84      2458.76
>        131072          320      5584.21      5584.96      5584.51
>        262144          160     12544.87     12546.58     12545.59
> [svbu-mpi031:12247] *** Process received signal ***
> [svbu-mpi031:12247] Signal: Floating point exception (8)
> [svbu-mpi031:12247] Signal code:  (0)
> [svbu-mpi031:12247] Failing at address: 0x25900002fed
> [svbu-mpi031:12247] [ 0] /lib64/tls/libpthread.so.0 [0x2a95e57430]
> [svbu-mpi031:12247] [ 1] /lib64/tls/libc.so.6(__poll+0x2f) [0x2a9601e96f]
> [svbu-mpi031:12247] [ 2] 
> /home/mpiteam/scratches/2007-12-28/Ppuo/installs/5BvS/install/lib/libopen-pal.so.0(opal_poll_dispatch+0x13c)
>  [0x2a9568e1ba]
> [svbu-mpi031:12247] [ 3] 
> /home/mpiteam/scratches/2007-12-28/Ppuo/installs/5BvS/install/lib/libopen-pal.so.0(opal_event_base_loop+0x419)
>  [0x2a9568a238]
> [svbu-mpi031:12247] [ 4] 
> /home/mpiteam/scratches/2007-12-28/Ppuo/installs/5BvS/install/lib/libopen-pal.so.0(opal_event_loop+0x1d)
>  [0x2a95689e1d]
> [svbu-mpi031:12247] [ 5] 
> /home/mpiteam/scratches/2007-12-28/Ppuo/installs/5BvS/install/lib/libopen-pal.so.0(opal_progress+0x6a)
>  [0x2a95680fbe]
> [svbu-mpi031:12247] [ 6] mpirun [0x403fe4]
> [svbu-mpi031:12247] [ 7] mpirun(orterun+0x9bb) [0x403823]
> [svbu-mpi031:12247] [ 8] mpirun(main+0x1b) [0x402e63]
> [svbu-mpi031:12247] [ 9] /lib64/tls/libc.so.6(__libc_start_main+0xdb) 
> [0x2a95f7d3fb]
> [svbu-mpi031:12247] [10] mpirun(orte_daemon_recv+0x1e2) [0x402dba]
> [svbu-mpi031:12247] *** End of error message ***
>        524288           80     27091.03     27099.48     27094.99
>       1048576           40     43622.23     43659.70     43640.46
>       2097152           20     72144.60     72207.25     72183.70
>       4194304           10    130192.28    130419.42    130302.65
> ',
>   'variant_81' => 81,
>   'result_message_81' => 'Passed',
>   'test_type_81' => 'latency_bandwidth',
>   'launcher_81' => 'mpirun',
> 

Reply via email to