/*
Hi OpenMPI people.
This mail is itself the c++ code that will reproduce
the
bug as run in in a single processor debian machine
with
only TCP messaging.
The sample program:
RUNS in 1.1.2
ABORTS in 1.2
In 1.2 aborts during the following assert:
assert(msg_status.Get_error() == MPI_SUCCESS);
The suspected bug aborts in 1.2
because MPI::Status::Get_error() returns the value of
MPI::Status::mpi_status.MPI_ERROR
My best gess is that 1.2 is messing with the C++
MPI::Status structure during the MPI_Test call made by
MPI::Request::Test(MPI::Status& st) while 1.1.2 left
it
as initialized by C++ and so always returning
MPI_SUCCESS
when msg_status.Get_error() is called. If that's the
case
it is worth look at the suspected bug.
My guess is based on the observation that the c
function
int MPI_Test(MPI_Request *req, int *flag, MPI_Status
*stat);
sets with garbage 'stat->MPI_ERROR' after a sucessful
call.
insted of setting it with the same value (err_code)
returned
by the func.
1.2 config.log.gz attached.
(1.1.2 is NOT attached because the email would
exceed 100kb).
Thanks for OpenMPI.
JLQ.
*/
#include <iostream>
#include <assert.h>
using namespace std;
#include"mpi.h"
enum msg_tag {
k_first_tag = 100,
k_last_tag
};
#define BUFF_TP long
#define BUFF_MPI_TP MPI_LONG
#define BUFF_SZ 100
#define create_buff()
((BUFF_TP*)malloc(sizeof(BUFF_TP) * BUFF_SZ))
void fill_buff(BUFF_TP* buff, BUFF_TP val){
for(int ii = 0; ii < BUFF_SZ; ii++){
buff[ii] = val;
}
}
bool test_receive(int rk,
MPI::Status& msg_status,
MPI::Request& msg_request,
int& ended)
{
bool resp = msg_request.Test(msg_status);
if(resp){
int msg_sz = msg_status.Get_count(BUFF_MPI_TP);
int msg_from = msg_status.Get_source();
int msg_tag = msg_status.Get_tag();
assert(msg_sz == BUFF_SZ);
assert(msg_status.Get_error() == MPI_SUCCESS);
assert( (msg_tag == k_first_tag) ||
(msg_tag == k_last_tag));
if(msg_tag == k_last_tag){
ended++;
}
}
return resp;
}
void send_all_to(int rk, int num_msgs, BUFF_TP* buff){
int tag = k_first_tag;
for(int aa = 1; aa <= num_msgs; aa++){
if(aa == num_msgs){
tag = k_last_tag;
}
MPI::COMM_WORLD.Send(buff, BUFF_SZ,
BUFF_MPI_TP, rk, tag);
}
}
int main(int argc, char* argv[])
{
MPI::Init(argc, argv);
BUFF_TP* out_buff = create_buff();
BUFF_TP* in_buff = create_buff();
MPI::Request request;
MPI::Status status;
int num_msgs = 0;
int myrank = MPI::COMM_WORLD.Get_rank();
int nprocs = MPI::COMM_WORLD.Get_size();
try{
int tag = k_first_tag;
request = MPI::COMM_WORLD.Irecv(in_buff, BUFF_SZ,
BUFF_MPI_TP, MPI_ANY_SOURCE, MPI_ANY_TAG);
int dest_rk = 0;
int num_ended = 0;
int num_others = nprocs - 1;
fill_buff(in_buff, 0);
fill_buff(out_buff, 0);
if(myrank == 0){
cout << "input num_msgs:";
cin >> num_msgs;
assert(num_msgs > 0);
fill_buff(out_buff, num_msgs);
};
while((num_ended < num_others) || (dest_rk <
nprocs)){
if(num_ended < num_others){
bool got_it = test_receive(myrank, status,
request, num_ended);
if(got_it && (num_ended < num_others)){
request = MPI::COMM_WORLD.Irecv(in_buff,
BUFF_SZ,
BUFF_MPI_TP, MPI_ANY_SOURCE,
MPI_ANY_TAG);
if(num_msgs == 0){
assert(myrank > 0);
num_msgs = in_buff[0];
assert(num_msgs > 0);
cout << "rk=" << myrank << " GOT " <<
" num_msgs=" << num_msgs <<
endl;
fill_buff(out_buff, num_msgs);
}
}
}
if((num_msgs > 0) && (dest_rk < nprocs)){
if(dest_rk != myrank){
send_all_to(dest_rk, num_msgs, out_buff);
/*cout << myrank << " sended all to rk="
<< dest_rk << " num_msgs=" << num_msgs
<< endl;*/
}
dest_rk++;
}
}
} catch(MPI::Exception e) {
cout << "MPI::Exception. rk =" << myrank << endl <<
"error_code=" << e.Get_error_code() <<
"(" << e.Get_error_string() << ")" << endl;
MPI::COMM_WORLD.Abort(99);
}
cout << myrank << " FINISHED" << endl;
MPI::Finalize();
return 0;
}
/*
------------------------------------------------------------
1.1.2 compile command:
/lib/openmpi-1.1.2/bin/mpic++ -o no_bug_1.1.2
status_bug.cpp
1.1.2 run command:
/lib/openmpi-1.1.2/bin/mpirun -np 5 -host 127.0.0.1
no_bug_1.1.2
1.2 compile command:
/lib/openmpi-1.2/bin/mpic++ -o bug_1.2 status_bug.cpp
1.2 run command:
/lib/openmpi-1.2/bin/mpirun -np 5 -host 127.0.0.1
bug_1.2
------------------------------------------------------------
1.2 error output:
input num_msgs:6
bug_1.2: status_bug.cpp:82: bool test_receive(int,
MPI::Status&, MPI::Request&, int&): Assertion
`msg_status.Get_error() == 0' failed.
[aculiar:23799] *** Process received signal ***
[aculiar:23799] Signal: Aborted (6)
[aculiar:23799] Signal code: User function (kill,
sigsend, abort, etc.) (0)
[aculiar:23799] [ 0] /lib/libpthread.so.0 [0x4026f8cb]
[aculiar:23799] [ 1] /lib/libc.so.6 [0x402de6f0]
[aculiar:23799] [ 2] /lib/libpthread.so.0(raise+0x2b)
[0x4026ca7b]
[aculiar:23799] [ 3] /lib/libc.so.6(gsignal+0x44)
[0x402de554]
[aculiar:23799] [ 4] /lib/libc.so.6(abort+0x178)
[0x402dfa88]
[aculiar:23799] [ 5]
/lib/libc.so.6(__assert_fail+0x10f) [0x402d7bbf]
[aculiar:23799] [ 6]
bug_1.2(_Z12test_receiveiRN3MPI6StatusERNS_7RequestERi+0x116)
[0x804f3dc]
[aculiar:23799] [ 7] bug_1.2(main+0x223) [0x804f6f5]
[aculiar:23799] [ 8]
/lib/libc.so.6(__libc_start_main+0xc6) [0x402cae36]
[aculiar:23799] [ 9]
bug_1.2(_ZN3MPI3Win8Set_attrEiPKv+0x121) [0x804f1f1]
[aculiar:23799] *** End of error message ***
mpirun noticed that job rank 0 with PID 23798 on node
127.0.0.1 exited on signal 15 (Terminated).
2 additional processes aborted (not shown)
------------------------------------------------------------
output of
/lib/openmpi-1.1.2/bin/ompi_info
Open MPI: 1.1.2
Open MPI SVN revision: r12073
Open RTE: 1.1.2
Open RTE SVN revision: r12073
OPAL: 1.1.2
OPAL SVN revision: r12073
Prefix: /lib/openmpi-1.1.2
Configured architecture: i686-pc-linux-gnu
Configured by: webmgr
Configured on: Sun Oct 29 09:49:39 COT 2006
Configure host: aculiar.aculiar.com
Built by: webmgr
Built on: dom oct 29 12:15:26 COT 2006
Built host: aculiar.aculiar.com
C bindings: yes
C++ bindings: yes
Fortran77 bindings: no
Fortran90 bindings: no
Fortran90 bindings size: na
C compiler: gcc
C compiler absolute: /usr/bin/gcc
C++ compiler: g++
C++ compiler absolute: /usr/bin/g++
Fortran77 compiler: none
Fortran77 compiler abs: none
Fortran90 compiler: none
Fortran90 compiler abs: none
C profiling: yes
C++ profiling: yes
Fortran77 profiling: no
Fortran90 profiling: no
C++ exceptions: no
Thread support: posix (mpi: no, progress:
no)
Internal debug support: no
MPI parameter check: runtime
Memory profiling support: no
Memory debugging support: no
libltdl support: yes
MCA memory: ptmalloc2 (MCA v1.0, API
v1.0,
Component v1.1.2)
MCA paffinity: linux (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA maffinity: first_use (MCA v1.0, API
v1.0,
Component v1.1.2)
MCA timer: linux (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA allocator: basic (MCA v1.0, API v1.0,
Component
v1.0)
MCA allocator: bucket (MCA v1.0, API v1.0,
Component
v1.0)
MCA coll: basic (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA coll: hierarch (MCA v1.0, API
v1.0,
Component v1.1.2)
MCA coll: self (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA coll: sm (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA coll: tuned (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA io: romio (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA mpool: sm (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA pml: ob1 (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA bml: r2 (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA rcache: rb (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA btl: self (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA btl: sm (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA btl: tcp (MCA v1.0, API v1.0,
Component
v1.0)
MCA topo: unity (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA osc: pt2pt (MCA v1.0, API v1.0,
Component
v1.0)
MCA gpr: null (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA gpr: proxy (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA gpr: replica (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA iof: proxy (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA iof: svc (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA ns: proxy (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA ns: replica (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA oob: tcp (MCA v1.0, API v1.0,
Component
v1.0)
MCA ras: dash_host (MCA v1.0, API
v1.0,
Component v1.1.2)
MCA ras: hostfile (MCA v1.0, API
v1.0,
Component v1.1.2)
MCA ras: localhost (MCA v1.0, API
v1.0,
Component v1.1.2)
MCA ras: poe (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA ras: slurm (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA rds: hostfile (MCA v1.0, API
v1.0,
Component v1.1.2)
MCA rds: resfile (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA rmaps: round_robin (MCA v1.0, API
v1.0,
Component v1.1.2)
MCA rmgr: proxy (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA rmgr: urm (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA rml: oob (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA pls: fork (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA pls: rsh (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA pls: slurm (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA sds: env (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA sds: pipe (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA sds: seed (MCA v1.0, API v1.0,
Component
v1.1.2)
MCA sds: singleton (MCA v1.0, API
v1.0,
Component v1.1.2)
MCA sds: slurm (MCA v1.0, API v1.0,
Component
v1.1.2)
------------------------------------------------------------
output of
/lib/openmpi-1.2/bin/ompi_info
Open MPI: 1.2
Open MPI SVN revision: r14027
Open RTE: 1.2
Open RTE SVN revision: r14027
OPAL: 1.2
OPAL SVN revision: r14027
Prefix: /lib/openmpi-1.2
Configured architecture: i686-pc-linux-gnu
Configured by: root
Configured on: Tue Apr 3 18:49:41 COT 2007
Configure host: aculiar.aculiar.com
Built by: root
Built on: Tue Apr 3 20:15:23 COT 2007
Built host: aculiar.aculiar.com
C bindings: yes
C++ bindings: yes
Fortran77 bindings: no
Fortran90 bindings: no
Fortran90 bindings size: na
C compiler: gcc
C compiler absolute: /usr/bin/gcc
C++ compiler: g++
C++ compiler absolute: /usr/bin/g++
Fortran77 compiler: none
Fortran77 compiler abs: none
Fortran90 compiler: none
Fortran90 compiler abs: none
C profiling: yes
C++ profiling: yes
Fortran77 profiling: no
Fortran90 profiling: no
C++ exceptions: no
Thread support: posix (mpi: no, progress:
no)
Internal debug support: no
MPI parameter check: runtime
Memory profiling support: no
Memory debugging support: no
libltdl support: yes
Heterogeneous support: yes
mpirun default --prefix: no
MCA backtrace: execinfo (MCA v1.0, API
v1.0,
Component v1.2)
MCA memory: ptmalloc2 (MCA v1.0, API
v1.0,
Component v1.2)
MCA paffinity: linux (MCA v1.0, API v1.0,
Component
v1.2)
MCA maffinity: first_use (MCA v1.0, API
v1.0,
Component v1.2)
MCA timer: linux (MCA v1.0, API v1.0,
Component
v1.2)
MCA allocator: basic (MCA v1.0, API v1.0,
Component
v1.0)
MCA allocator: bucket (MCA v1.0, API v1.0,
Component
v1.0)
MCA coll: basic (MCA v1.0, API v1.0,
Component
v1.2)
MCA coll: self (MCA v1.0, API v1.0,
Component
v1.2)
MCA coll: sm (MCA v1.0, API v1.0,
Component
v1.2)
MCA coll: tuned (MCA v1.0, API v1.0,
Component
v1.2)
MCA io: romio (MCA v1.0, API v1.0,
Component
v1.2)
MCA mpool: sm (MCA v1.0, API v1.0,
Component
v1.2)
MCA pml: cm (MCA v1.0, API v1.0,
Component
v1.2)
MCA pml: ob1 (MCA v1.0, API v1.0,
Component
v1.2)
MCA bml: r2 (MCA v1.0, API v1.0,
Component
v1.2)
MCA rcache: rb (MCA v1.0, API v1.0,
Component
v1.2)
MCA rcache: vma (MCA v1.0, API v1.0,
Component
v1.2)
MCA btl: self (MCA v1.0, API v1.0.1,
Component
v1.2)
MCA btl: sm (MCA v1.0, API v1.0.1,
Component
v1.2)
MCA btl: tcp (MCA v1.0, API v1.0.1,
Component
v1.0)
MCA topo: unity (MCA v1.0, API v1.0,
Component
v1.2)
MCA osc: pt2pt (MCA v1.0, API v1.0,
Component
v1.2)
MCA errmgr: hnp (MCA v1.0, API v1.3,
Component
v1.2)
MCA errmgr: orted (MCA v1.0, API v1.3,
Component
v1.2)
MCA errmgr: proxy (MCA v1.0, API v1.3,
Component
v1.2)
MCA gpr: null (MCA v1.0, API v1.0,
Component
v1.2)
MCA gpr: proxy (MCA v1.0, API v1.0,
Component
v1.2)
MCA gpr: replica (MCA v1.0, API v1.0,
Component
v1.2)
MCA iof: proxy (MCA v1.0, API v1.0,
Component
v1.2)
MCA iof: svc (MCA v1.0, API v1.0,
Component
v1.2)
MCA ns: proxy (MCA v1.0, API v2.0,
Component
v1.2)
MCA ns: replica (MCA v1.0, API v2.0,
Component
v1.2)
MCA oob: tcp (MCA v1.0, API v1.0,
Component
v1.0)
MCA ras: dash_host (MCA v1.0, API
v1.3,
Component v1.2)
MCA ras: gridengine (MCA v1.0, API
v1.3,
Component v1.2)
MCA ras: localhost (MCA v1.0, API
v1.3,
Component v1.2)
MCA ras: slurm (MCA v1.0, API v1.3,
Component
v1.2)
MCA rds: hostfile (MCA v1.0, API
v1.3,
Component v1.2)
MCA rds: proxy (MCA v1.0, API v1.3,
Component
v1.2)
MCA rds: resfile (MCA v1.0, API v1.3,
Component
v1.2)
MCA rmaps: round_robin (MCA v1.0, API
v1.3,
Component v1.2)
MCA rmgr: proxy (MCA v1.0, API v2.0,
Component
v1.2)
MCA rmgr: urm (MCA v1.0, API v2.0,
Component
v1.2)
MCA rml: oob (MCA v1.0, API v1.0,
Component
v1.2)
MCA pls: gridengine (MCA v1.0, API
v1.3,
Component v1.2)
MCA pls: proxy (MCA v1.0, API v1.3,
Component
v1.2)
MCA pls: rsh (MCA v1.0, API v1.3,
Component
v1.2)
MCA pls: slurm (MCA v1.0, API v1.3,
Component
v1.2)
MCA sds: env (MCA v1.0, API v1.0,
Component
v1.2)
MCA sds: pipe (MCA v1.0, API v1.0,
Component
v1.2)
MCA sds: seed (MCA v1.0, API v1.0,
Component
v1.2)
MCA sds: singleton (MCA v1.0, API
v1.0,
Component v1.2)
MCA sds: slurm (MCA v1.0, API v1.0,
Component
v1.2)
*/
______________________________________________________________________
______________
It's here! Your new message!
Get new email alerts with the free Yahoo! Toolbar.
http://tools.search.yahoo.com/toolbar/features/mail/
<config_1.2.log.gz>
<mime-attachment.txt>