/*
Hi OpenMPI people. This mail is itself the c++ code that will reproduce the bug as run in in a single processor debian machine with only TCP messaging. The sample program: RUNS in 1.1.2 ABORTS in 1.2 In 1.2 aborts during the following assert: assert(msg_status.Get_error() == MPI_SUCCESS); The suspected bug aborts in 1.2 because MPI::Status::Get_error() returns the value of MPI::Status::mpi_status.MPI_ERROR My best gess is that 1.2 is messing with the C++ MPI::Status structure during the MPI_Test call made by MPI::Request::Test(MPI::Status& st) while 1.1.2 left it as initialized by C++ and so always returning MPI_SUCCESS when msg_status.Get_error() is called. If that's the case it is worth look at the suspected bug. My guess is based on the observation that the c function int MPI_Test(MPI_Request *req, int *flag, MPI_Status *stat); sets with garbage 'stat->MPI_ERROR' after a sucessful call. insted of setting it with the same value (err_code) returned by the func. 1.2 config.log.gz attached. (1.1.2 is NOT attached because the email would exceed 100kb). Thanks for OpenMPI. JLQ. */ #include <iostream> #include <assert.h> using namespace std; #include"mpi.h" enum msg_tag { k_first_tag = 100, k_last_tag }; #define BUFF_TP long #define BUFF_MPI_TP MPI_LONG #define BUFF_SZ 100 #define create_buff() ((BUFF_TP*)malloc(sizeof(BUFF_TP) * BUFF_SZ)) void fill_buff(BUFF_TP* buff, BUFF_TP val){ for(int ii = 0; ii < BUFF_SZ; ii++){ buff[ii] = val; } } bool test_receive(int rk, MPI::Status& msg_status, MPI::Request& msg_request, int& ended) { bool resp = msg_request.Test(msg_status); if(resp){ int msg_sz = msg_status.Get_count(BUFF_MPI_TP); int msg_from = msg_status.Get_source(); int msg_tag = msg_status.Get_tag(); assert(msg_sz == BUFF_SZ); assert(msg_status.Get_error() == MPI_SUCCESS); assert( (msg_tag == k_first_tag) || (msg_tag == k_last_tag)); if(msg_tag == k_last_tag){ ended++; } } return resp; } void send_all_to(int rk, int num_msgs, BUFF_TP* buff){ int tag = k_first_tag; for(int aa = 1; aa <= num_msgs; aa++){ if(aa == num_msgs){ tag = k_last_tag; } MPI::COMM_WORLD.Send(buff, BUFF_SZ, BUFF_MPI_TP, rk, tag); } } int main(int argc, char* argv[]) { MPI::Init(argc, argv); BUFF_TP* out_buff = create_buff(); BUFF_TP* in_buff = create_buff(); MPI::Request request; MPI::Status status; int num_msgs = 0; int myrank = MPI::COMM_WORLD.Get_rank(); int nprocs = MPI::COMM_WORLD.Get_size(); try{ int tag = k_first_tag; request = MPI::COMM_WORLD.Irecv(in_buff, BUFF_SZ, BUFF_MPI_TP, MPI_ANY_SOURCE, MPI_ANY_TAG); int dest_rk = 0; int num_ended = 0; int num_others = nprocs - 1; fill_buff(in_buff, 0); fill_buff(out_buff, 0); if(myrank == 0){ cout << "input num_msgs:"; cin >> num_msgs; assert(num_msgs > 0); fill_buff(out_buff, num_msgs); }; while((num_ended < num_others) || (dest_rk < nprocs)){ if(num_ended < num_others){ bool got_it = test_receive(myrank, status, request, num_ended); if(got_it && (num_ended < num_others)){ request = MPI::COMM_WORLD.Irecv(in_buff, BUFF_SZ, BUFF_MPI_TP, MPI_ANY_SOURCE, MPI_ANY_TAG); if(num_msgs == 0){ assert(myrank > 0); num_msgs = in_buff[0]; assert(num_msgs > 0); cout << "rk=" << myrank << " GOT " << " num_msgs=" << num_msgs << endl; fill_buff(out_buff, num_msgs); } } } if((num_msgs > 0) && (dest_rk < nprocs)){ if(dest_rk != myrank){ send_all_to(dest_rk, num_msgs, out_buff); /*cout << myrank << " sended all to rk=" << dest_rk << " num_msgs=" << num_msgs << endl;*/ } dest_rk++; } } } catch(MPI::Exception e) { cout << "MPI::Exception. rk =" << myrank << endl << "error_code=" << e.Get_error_code() << "(" << e.Get_error_string() << ")" << endl; MPI::COMM_WORLD.Abort(99); } cout << myrank << " FINISHED" << endl; MPI::Finalize(); return 0; } /* ------------------------------------------------------------ 1.1.2 compile command: /lib/openmpi-1.1.2/bin/mpic++ -o no_bug_1.1.2 status_bug.cpp 1.1.2 run command: /lib/openmpi-1.1.2/bin/mpirun -np 5 -host 127.0.0.1 no_bug_1.1.2 1.2 compile command: /lib/openmpi-1.2/bin/mpic++ -o bug_1.2 status_bug.cpp 1.2 run command: /lib/openmpi-1.2/bin/mpirun -np 5 -host 127.0.0.1 bug_1.2 ------------------------------------------------------------ 1.2 error output: input num_msgs:6 bug_1.2: status_bug.cpp:82: bool test_receive(int, MPI::Status&, MPI::Request&, int&): Assertion `msg_status.Get_error() == 0' failed. [aculiar:23799] *** Process received signal *** [aculiar:23799] Signal: Aborted (6) [aculiar:23799] Signal code: User function (kill, sigsend, abort, etc.) (0) [aculiar:23799] [ 0] /lib/libpthread.so.0 [0x4026f8cb] [aculiar:23799] [ 1] /lib/libc.so.6 [0x402de6f0] [aculiar:23799] [ 2] /lib/libpthread.so.0(raise+0x2b) [0x4026ca7b] [aculiar:23799] [ 3] /lib/libc.so.6(gsignal+0x44) [0x402de554] [aculiar:23799] [ 4] /lib/libc.so.6(abort+0x178) [0x402dfa88] [aculiar:23799] [ 5] /lib/libc.so.6(__assert_fail+0x10f) [0x402d7bbf] [aculiar:23799] [ 6] bug_1.2(_Z12test_receiveiRN3MPI6StatusERNS_7RequestERi+0x116) [0x804f3dc] [aculiar:23799] [ 7] bug_1.2(main+0x223) [0x804f6f5] [aculiar:23799] [ 8] /lib/libc.so.6(__libc_start_main+0xc6) [0x402cae36] [aculiar:23799] [ 9] bug_1.2(_ZN3MPI3Win8Set_attrEiPKv+0x121) [0x804f1f1] [aculiar:23799] *** End of error message *** mpirun noticed that job rank 0 with PID 23798 on node 127.0.0.1 exited on signal 15 (Terminated). 2 additional processes aborted (not shown) ------------------------------------------------------------ output of /lib/openmpi-1.1.2/bin/ompi_info Open MPI: 1.1.2 Open MPI SVN revision: r12073 Open RTE: 1.1.2 Open RTE SVN revision: r12073 OPAL: 1.1.2 OPAL SVN revision: r12073 Prefix: /lib/openmpi-1.1.2 Configured architecture: i686-pc-linux-gnu Configured by: webmgr Configured on: Sun Oct 29 09:49:39 COT 2006 Configure host: aculiar.aculiar.com Built by: webmgr Built on: dom oct 29 12:15:26 COT 2006 Built host: aculiar.aculiar.com C bindings: yes C++ bindings: yes Fortran77 bindings: no Fortran90 bindings: no Fortran90 bindings size: na C compiler: gcc C compiler absolute: /usr/bin/gcc C++ compiler: g++ C++ compiler absolute: /usr/bin/g++ Fortran77 compiler: none Fortran77 compiler abs: none Fortran90 compiler: none Fortran90 compiler abs: none C profiling: yes C++ profiling: yes Fortran77 profiling: no Fortran90 profiling: no C++ exceptions: no Thread support: posix (mpi: no, progress: no) Internal debug support: no MPI parameter check: runtime Memory profiling support: no Memory debugging support: no libltdl support: yes MCA memory: ptmalloc2 (MCA v1.0, API v1.0, Component v1.1.2) MCA paffinity: linux (MCA v1.0, API v1.0, Component v1.1.2) MCA maffinity: first_use (MCA v1.0, API v1.0, Component v1.1.2) MCA timer: linux (MCA v1.0, API v1.0, Component v1.1.2) MCA allocator: basic (MCA v1.0, API v1.0, Component v1.0) MCA allocator: bucket (MCA v1.0, API v1.0, Component v1.0) MCA coll: basic (MCA v1.0, API v1.0, Component v1.1.2) MCA coll: hierarch (MCA v1.0, API v1.0, Component v1.1.2) MCA coll: self (MCA v1.0, API v1.0, Component v1.1.2) MCA coll: sm (MCA v1.0, API v1.0, Component v1.1.2) MCA coll: tuned (MCA v1.0, API v1.0, Component v1.1.2) MCA io: romio (MCA v1.0, API v1.0, Component v1.1.2) MCA mpool: sm (MCA v1.0, API v1.0, Component v1.1.2) MCA pml: ob1 (MCA v1.0, API v1.0, Component v1.1.2) MCA bml: r2 (MCA v1.0, API v1.0, Component v1.1.2) MCA rcache: rb (MCA v1.0, API v1.0, Component v1.1.2) MCA btl: self (MCA v1.0, API v1.0, Component v1.1.2) MCA btl: sm (MCA v1.0, API v1.0, Component v1.1.2) MCA btl: tcp (MCA v1.0, API v1.0, Component v1.0) MCA topo: unity (MCA v1.0, API v1.0, Component v1.1.2) MCA osc: pt2pt (MCA v1.0, API v1.0, Component v1.0) MCA gpr: null (MCA v1.0, API v1.0, Component v1.1.2) MCA gpr: proxy (MCA v1.0, API v1.0, Component v1.1.2) MCA gpr: replica (MCA v1.0, API v1.0, Component v1.1.2) MCA iof: proxy (MCA v1.0, API v1.0, Component v1.1.2) MCA iof: svc (MCA v1.0, API v1.0, Component v1.1.2) MCA ns: proxy (MCA v1.0, API v1.0, Component v1.1.2) MCA ns: replica (MCA v1.0, API v1.0, Component v1.1.2) MCA oob: tcp (MCA v1.0, API v1.0, Component v1.0) MCA ras: dash_host (MCA v1.0, API v1.0, Component v1.1.2) MCA ras: hostfile (MCA v1.0, API v1.0, Component v1.1.2) MCA ras: localhost (MCA v1.0, API v1.0, Component v1.1.2) MCA ras: poe (MCA v1.0, API v1.0, Component v1.1.2) MCA ras: slurm (MCA v1.0, API v1.0, Component v1.1.2) MCA rds: hostfile (MCA v1.0, API v1.0, Component v1.1.2) MCA rds: resfile (MCA v1.0, API v1.0, Component v1.1.2) MCA rmaps: round_robin (MCA v1.0, API v1.0, Component v1.1.2) MCA rmgr: proxy (MCA v1.0, API v1.0, Component v1.1.2) MCA rmgr: urm (MCA v1.0, API v1.0, Component v1.1.2) MCA rml: oob (MCA v1.0, API v1.0, Component v1.1.2) MCA pls: fork (MCA v1.0, API v1.0, Component v1.1.2) MCA pls: rsh (MCA v1.0, API v1.0, Component v1.1.2) MCA pls: slurm (MCA v1.0, API v1.0, Component v1.1.2) MCA sds: env (MCA v1.0, API v1.0, Component v1.1.2) MCA sds: pipe (MCA v1.0, API v1.0, Component v1.1.2) MCA sds: seed (MCA v1.0, API v1.0, Component v1.1.2) MCA sds: singleton (MCA v1.0, API v1.0, Component v1.1.2) MCA sds: slurm (MCA v1.0, API v1.0, Component v1.1.2) ------------------------------------------------------------ output of /lib/openmpi-1.2/bin/ompi_info Open MPI: 1.2 Open MPI SVN revision: r14027 Open RTE: 1.2 Open RTE SVN revision: r14027 OPAL: 1.2 OPAL SVN revision: r14027 Prefix: /lib/openmpi-1.2 Configured architecture: i686-pc-linux-gnu Configured by: root Configured on: Tue Apr 3 18:49:41 COT 2007 Configure host: aculiar.aculiar.com Built by: root Built on: Tue Apr 3 20:15:23 COT 2007 Built host: aculiar.aculiar.com C bindings: yes C++ bindings: yes Fortran77 bindings: no Fortran90 bindings: no Fortran90 bindings size: na C compiler: gcc C compiler absolute: /usr/bin/gcc C++ compiler: g++ C++ compiler absolute: /usr/bin/g++ Fortran77 compiler: none Fortran77 compiler abs: none Fortran90 compiler: none Fortran90 compiler abs: none C profiling: yes C++ profiling: yes Fortran77 profiling: no Fortran90 profiling: no C++ exceptions: no Thread support: posix (mpi: no, progress: no) Internal debug support: no MPI parameter check: runtime Memory profiling support: no Memory debugging support: no libltdl support: yes Heterogeneous support: yes mpirun default --prefix: no MCA backtrace: execinfo (MCA v1.0, API v1.0, Component v1.2) MCA memory: ptmalloc2 (MCA v1.0, API v1.0, Component v1.2) MCA paffinity: linux (MCA v1.0, API v1.0, Component v1.2) MCA maffinity: first_use (MCA v1.0, API v1.0, Component v1.2) MCA timer: linux (MCA v1.0, API v1.0, Component v1.2) MCA allocator: basic (MCA v1.0, API v1.0, Component v1.0) MCA allocator: bucket (MCA v1.0, API v1.0, Component v1.0) MCA coll: basic (MCA v1.0, API v1.0, Component v1.2) MCA coll: self (MCA v1.0, API v1.0, Component v1.2) MCA coll: sm (MCA v1.0, API v1.0, Component v1.2) MCA coll: tuned (MCA v1.0, API v1.0, Component v1.2) MCA io: romio (MCA v1.0, API v1.0, Component v1.2) MCA mpool: sm (MCA v1.0, API v1.0, Component v1.2) MCA pml: cm (MCA v1.0, API v1.0, Component v1.2) MCA pml: ob1 (MCA v1.0, API v1.0, Component v1.2) MCA bml: r2 (MCA v1.0, API v1.0, Component v1.2) MCA rcache: rb (MCA v1.0, API v1.0, Component v1.2) MCA rcache: vma (MCA v1.0, API v1.0, Component v1.2) MCA btl: self (MCA v1.0, API v1.0.1, Component v1.2) MCA btl: sm (MCA v1.0, API v1.0.1, Component v1.2) MCA btl: tcp (MCA v1.0, API v1.0.1, Component v1.0) MCA topo: unity (MCA v1.0, API v1.0, Component v1.2) MCA osc: pt2pt (MCA v1.0, API v1.0, Component v1.2) MCA errmgr: hnp (MCA v1.0, API v1.3, Component v1.2) MCA errmgr: orted (MCA v1.0, API v1.3, Component v1.2) MCA errmgr: proxy (MCA v1.0, API v1.3, Component v1.2) MCA gpr: null (MCA v1.0, API v1.0, Component v1.2) MCA gpr: proxy (MCA v1.0, API v1.0, Component v1.2) MCA gpr: replica (MCA v1.0, API v1.0, Component v1.2) MCA iof: proxy (MCA v1.0, API v1.0, Component v1.2) MCA iof: svc (MCA v1.0, API v1.0, Component v1.2) MCA ns: proxy (MCA v1.0, API v2.0, Component v1.2) MCA ns: replica (MCA v1.0, API v2.0, Component v1.2) MCA oob: tcp (MCA v1.0, API v1.0, Component v1.0) MCA ras: dash_host (MCA v1.0, API v1.3, Component v1.2) MCA ras: gridengine (MCA v1.0, API v1.3, Component v1.2) MCA ras: localhost (MCA v1.0, API v1.3, Component v1.2) MCA ras: slurm (MCA v1.0, API v1.3, Component v1.2) MCA rds: hostfile (MCA v1.0, API v1.3, Component v1.2) MCA rds: proxy (MCA v1.0, API v1.3, Component v1.2) MCA rds: resfile (MCA v1.0, API v1.3, Component v1.2) MCA rmaps: round_robin (MCA v1.0, API v1.3, Component v1.2) MCA rmgr: proxy (MCA v1.0, API v2.0, Component v1.2) MCA rmgr: urm (MCA v1.0, API v2.0, Component v1.2) MCA rml: oob (MCA v1.0, API v1.0, Component v1.2) MCA pls: gridengine (MCA v1.0, API v1.3, Component v1.2) MCA pls: proxy (MCA v1.0, API v1.3, Component v1.2) MCA pls: rsh (MCA v1.0, API v1.3, Component v1.2) MCA pls: slurm (MCA v1.0, API v1.3, Component v1.2) MCA sds: env (MCA v1.0, API v1.0, Component v1.2) MCA sds: pipe (MCA v1.0, API v1.0, Component v1.2) MCA sds: seed (MCA v1.0, API v1.0, Component v1.2) MCA sds: singleton (MCA v1.0, API v1.0, Component v1.2) MCA sds: slurm (MCA v1.0, API v1.0, Component v1.2) */ ____________________________________________________________________________________ It's here! Your new message! Get new email alerts with the free Yahoo! Toolbar. http://tools.search.yahoo.com/toolbar/features/mail/
config_1.2.log.gz
Description: 2604947188-config_1.2.log.gz