Ok, I think I have found the problem

During std::vector::push_back or emplace_back a realloc happens and thus memory 
locations that I gave to MPI_Isend
become invalid.

My loop now reads:

  std::vector<MPI_EventData> eventSendBuf(eventsSize); // Buffer to hold the 
MPI_EventData object

  for (int i = 0; i < eventsSize; ++i) {
    MPI_Request req;

    eventSendBuf.at(i).size = 5;

    cout << "Isending event " << i << endl;
    MPI_Isend(&eventSendBuf[i], 1, MPI_EVENTDATA, 0, 0, MPI_COMM_WORLD, &req);
    requests.push_back(req);
  }

Best,
Florian


Am 19.02.2018 um 10:14 schrieb Florian Lindner:
> Hello,
> 
> I am having problems understanding an error valgrind gives me. I tried to bog 
> down the program as much as possible. The
> original program as well as the test example both work fine, but when I link 
> the created library to another application
> I get segfaults. I think that this piece of code is to blame. I run valgrind 
> on it and get an invalid read.
> 
> The code can be seen at 
> https://gist.github.com/floli/d62d16ce7cabb4522e2ae7e6b3cfda43 or below.
> 
> It's about 60 lines of C/C++ code.
> 
> I have also attached the valgrind report below the code.
> 
> The code registers a custom MPI datatype and sends that using an isend. It 
> does not crash or produces invalid data, but
> I fear that the invalid read message from valgrind is a hint of an existing 
> memory corruption.
> 
> But I got no idea where that could happen.
> 
> OpenMPI 3.0.0 @ Arch
> 
> I am very thankful of any hints whatsover!
> 
> Florian
> 
> 
> 
> 
> 
> // Compile and test with: mpicxx -std=c++11 -g -O0 mpitest.cpp  &&
> LD_PRELOAD=/usr/lib/valgrind/libmpiwrap-amd64-linux.so mpirun -n 1 valgrind 
> --read-var-info=yes --leak-check=full ./a.out
> 
> #include <vector>
> #include <iostream>
> 
> #include <mpi.h>
> 
> using namespace std;
> 
> struct MPI_EventData
> {
>   int size;
> };
> 
> 
> void collect()
> {
>   // Register MPI datatype
>   MPI_Datatype MPI_EVENTDATA;
>   int blocklengths[] = {1};
>   MPI_Aint displacements[] = {offsetof(MPI_EventData, size) };
>   MPI_Datatype types[] = {MPI_INT};
>   MPI_Type_create_struct(1, blocklengths, displacements, types, 
> &MPI_EVENTDATA);
>   MPI_Type_commit(&MPI_EVENTDATA);
> 
>   int rank, MPIsize;
>   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
>   MPI_Comm_size(MPI_COMM_WORLD, &MPIsize);
> 
>   std::vector<MPI_Request> requests;
>   std::vector<int> eventsPerRank(MPIsize);
>   size_t eventsSize = 3; // each rank sends three events, invalid read 
> happens only if eventsSize > 1
>   MPI_Gather(&eventsSize, 1, MPI_INT, eventsPerRank.data(), 1, MPI_INT, 0, 
> MPI_COMM_WORLD);
> 
>   std::vector<MPI_EventData> eventSendBuf; // Buffer to hold the 
> MPI_EventData object
> 
>   for (int i = 0; i < eventsSize; ++i) {
>     MPI_EventData eventdata;
>     MPI_Request req;
> 
>     eventdata.size = 5;
>     eventSendBuf.push_back(eventdata);
> 
>     cout << "Isending event " << i << endl;
>     MPI_Isend(&eventSendBuf.back(), 1, MPI_EVENTDATA, 0, 0, MPI_COMM_WORLD, 
> &req);
>     requests.push_back(req);
>   }
> 
>   if (rank == 0) {
>     for (int i = 0; i < MPIsize; ++i) {
>       for (int j = 0; j < eventsPerRank[i]; ++j) {
>         MPI_EventData ev;
>         MPI_Recv(&ev, 1, MPI_EVENTDATA, i, MPI_ANY_TAG, MPI_COMM_WORLD, 
> MPI_STATUS_IGNORE);
> 
>         cout << "Received Size = " << ev.size << endl;
>       }
>     }
>   }
>   MPI_Waitall(requests.size(), requests.data(), MPI_STATUSES_IGNORE);
>   MPI_Type_free(&MPI_EVENTDATA);
> }
> 
> 
> int main(int argc, char *argv[])
> {
>   MPI_Init(&argc, &argv);
> 
>   collect();
> 
>   MPI_Finalize();
> }
> 
> 
> /*
> 
>  % mpicxx -std=c++11 -g -O0 mpitest.cpp  && 
> LD_PRELOAD=/usr/lib/valgrind/libmpiwrap-amd64-linux.so mpirun -n 1 valgrind
> --read-var-info=yes --leak-check=full ./a.out
> ==13584== Memcheck, a memory error detector
> ==13584== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
> ==13584== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
> ==13584== Command: ./a.out
> ==13584==
> valgrind MPI wrappers 13584: Active for pid 13584
> valgrind MPI wrappers 13584: Try MPIWRAP_DEBUG=help for possible options
> ==13584== Thread 3:
> ==13584== Syscall param epoll_pwait(sigmask) points to unaddressable byte(s)
> ==13584==    at 0x61A0FE6: epoll_pwait (in /usr/lib/libc-2.26.so)
> ==13584==    by 0x677CDDC: ??? (in /usr/lib/openmpi/libopen-pal.so.40.0.0)
> ==13584==    by 0x6780EDA: opal_libevent2022_event_base_loop (in 
> /usr/lib/openmpi/libopen-pal.so.40.0.0)
> ==13584==    by 0x93100CE: ??? (in 
> /usr/lib/openmpi/openmpi/mca_pmix_pmix2x.so)
> ==13584==    by 0x5E9408B: start_thread (in /usr/lib/libpthread-2.26.so)
> ==13584==    by 0x61A0E7E: clone (in /usr/lib/libc-2.26.so)
> ==13584==  Address 0x0 is not stack'd, malloc'd or (recently) free'd
> ==13584==
> Isending event 0
> ==13584== Thread 1:
> ==13584== Invalid read of size 2
> ==13584==    at 0x4C33B20: memmove (vg_replace_strmem.c:1258)
> ==13584==    by 0x11A7BB: MPI_EventData* std::__copy_move<true, true,
> std::random_access_iterator_tag>::__copy_m<MPI_EventData>(MPI_EventData 
> const*, MPI_EventData const*, MPI_EventData*)
> (stl_algobase.h:368)
> ==13584==    by 0x11A70B: MPI_EventData* std::__copy_move_a<true, 
> MPI_EventData*, MPI_EventData*>(MPI_EventData*,
> MPI_EventData*, MPI_EventData*) (stl_algobase.h:386)
> ==13584==    by 0x11A62B: MPI_EventData* std::__copy_move_a2<true, 
> MPI_EventData*, MPI_EventData*>(MPI_EventData*,
> MPI_EventData*, MPI_EventData*) (stl_algobase.h:424)
> ==13584==    by 0x11A567: MPI_EventData* 
> std::copy<std::move_iterator<MPI_EventData*>,
> MPI_EventData*>(std::move_iterator<MPI_EventData*>, 
> std::move_iterator<MPI_EventData*>, MPI_EventData*) (stl_algobase.h:456)
> ==13584==    by 0x11A478: MPI_EventData*
> std::__uninitialized_copy<true>::__uninit_copy<std::move_iterator<MPI_EventData*>,
> MPI_EventData*>(std::move_iterator<MPI_EventData*>, 
> std::move_iterator<MPI_EventData*>, MPI_EventData*)
> (stl_uninitialized.h:101)
> ==13584==    by 0x11A306: MPI_EventData* 
> std::uninitialized_copy<std::move_iterator<MPI_EventData*>,
> MPI_EventData*>(std::move_iterator<MPI_EventData*>, 
> std::move_iterator<MPI_EventData*>, MPI_EventData*)
> (stl_uninitialized.h:134)
> ==13584==    by 0x11A05B: MPI_EventData* 
> std::__uninitialized_copy_a<std::move_iterator<MPI_EventData*>, 
> MPI_EventData*,
> MPI_EventData>(std::move_iterator<MPI_EventData*>, 
> std::move_iterator<MPI_EventData*>, MPI_EventData*,
> std::allocator<MPI_EventData>&) (stl_uninitialized.h:289)
> ==13584==    by 0x119AEC: MPI_EventData* 
> std::__uninitialized_move_if_noexcept_a<MPI_EventData*, MPI_EventData*,
> std::allocator<MPI_EventData> >(MPI_EventData*, MPI_EventData*, 
> MPI_EventData*, std::allocator<MPI_EventData>&)
> (stl_uninitialized.h:312)
> ==13584==    by 0x1190D2: void std::vector<MPI_EventData, 
> std::allocator<MPI_EventData>
>> ::_M_realloc_insert<MPI_EventData 
>> const&>(__gnu_cxx::__normal_iterator<MPI_EventData*, 
>> std::vector<MPI_EventData,
> std::allocator<MPI_EventData> > >, MPI_EventData const&) (vector.tcc:424)
> ==13584==    by 0x118B17: std::vector<MPI_EventData, 
> std::allocator<MPI_EventData> >::push_back(MPI_EventData const&)
> (stl_vector.h:948)
> ==13584==    by 0x113B6E: collect() (mpitest.cpp:42)
> ==13584==  Address 0xd315ca0 is 0 bytes inside a block of size 4 alloc'd
> ==13584==    at 0x4C2D54F: operator new(unsigned long) 
> (vg_replace_malloc.c:334)
> ==13584==    by 0x11A2BB: 
> __gnu_cxx::new_allocator<MPI_EventData>::allocate(unsigned long, void const*)
> (new_allocator.h:111)
> ==13584==    by 0x119FE2: std::allocator_traits<std::allocator<MPI_EventData>
>> ::allocate(std::allocator<MPI_EventData>&, unsigned long) 
>> (alloc_traits.h:436)
> ==13584==    by 0x119A87: std::_Vector_base<MPI_EventData, 
> std::allocator<MPI_EventData> >::_M_allocate(unsigned long)
> (stl_vector.h:172)
> ==13584==    by 0x119055: void std::vector<MPI_EventData, 
> std::allocator<MPI_EventData>
>> ::_M_realloc_insert<MPI_EventData 
>> const&>(__gnu_cxx::__normal_iterator<MPI_EventData*, 
>> std::vector<MPI_EventData,
> std::allocator<MPI_EventData> > >, MPI_EventData const&) (vector.tcc:406)
> ==13584==    by 0x118B17: std::vector<MPI_EventData, 
> std::allocator<MPI_EventData> >::push_back(MPI_EventData const&)
> (stl_vector.h:948)
> ==13584==    by 0x113B6E: collect() (mpitest.cpp:42)
> ==13584==    by 0x113DEE: main (mpitest.cpp:68)
> ==13584==
> Isending event 1
> Isending event 2
> Received Size = 5
> Received Size = 5
> Received Size = 5
> ==13584==
> ==13584== HEAP SUMMARY:
> ==13584==     in use at exit: 1,898 bytes in 44 blocks
> ==13584==   total heap usage: 18,037 allocs, 17,993 frees, 4,086,090 bytes 
> allocated
> ==13584==
> ==13584== 5 bytes in 1 blocks are definitely lost in loss record 1 of 44
> ==13584==    at 0x4C2CEDF: malloc (vg_replace_malloc.c:299)
> ==13584==    by 0x6130B9A: strdup (in /usr/lib/libc-2.26.so)
> ==13584==    by 0x954B720: ???
> ==13584==    by 0x9312913: ???
> ==13584==    by 0x9313114: ???
> ==13584==    by 0x930FE31: ???
> ==13584==    by 0x92CA3AD: ???
> ==13584==    by 0x92A3ED7: ???
> ==13584==    by 0x867C876: ???
> ==13584==    by 0x647BBEA: orte_init (in 
> /usr/lib/openmpi/libopen-rte.so.40.0.0)
> ==13584==    by 0x52E3F77: ompi_mpi_init (in 
> /usr/lib/openmpi/libmpi.so.40.0.0)
> ==13584==    by 0x530B78D: PMPI_Init (in /usr/lib/openmpi/libmpi.so.40.0.0)
> ==13584==
> ==13584== 12 bytes in 1 blocks are definitely lost in loss record 2 of 44
> ==13584==    at 0x4C2CEDF: malloc (vg_replace_malloc.c:299)
> ==13584==    by 0x6130B9A: strdup (in /usr/lib/libc-2.26.so)
> ==13584==    by 0x954FA92: ???
> ==13584==    by 0x9323720: ???
> ==13584==    by 0x92CA651: ???
> ==13584==    by 0x92A3ED7: ???
> ==13584==    by 0x867C876: ???
> ==13584==    by 0x647BBEA: orte_init (in 
> /usr/lib/openmpi/libopen-rte.so.40.0.0)
> ==13584==    by 0x52E3F77: ompi_mpi_init (in 
> /usr/lib/openmpi/libmpi.so.40.0.0)
> ==13584==    by 0x530B78D: PMPI_Init (in /usr/lib/openmpi/libmpi.so.40.0.0)
> ==13584==    by 0x4E4A2C0: PMPI_Init (libmpiwrap.c:2271)
> ==13584==    by 0x113DE9: main (mpitest.cpp:66)
> ==13584==
> ==13584== 35 bytes in 1 blocks are definitely lost in loss record 24 of 44
> ==13584==    at 0x4C2CEDF: malloc (vg_replace_malloc.c:299)
> ==13584==    by 0x6130B9A: strdup (in /usr/lib/libc-2.26.so)
> ==13584==    by 0x954B6B8: ???
> ==13584==    by 0x9312913: ???
> ==13584==    by 0x9313114: ???
> ==13584==    by 0x930FE31: ???
> ==13584==    by 0x92CA3AD: ???
> ==13584==    by 0x92A3ED7: ???
> ==13584==    by 0x867C876: ???
> ==13584==    by 0x647BBEA: orte_init (in 
> /usr/lib/openmpi/libopen-rte.so.40.0.0)
> ==13584==    by 0x52E3F77: ompi_mpi_init (in 
> /usr/lib/openmpi/libmpi.so.40.0.0)
> ==13584==    by 0x530B78D: PMPI_Init (in /usr/lib/openmpi/libmpi.so.40.0.0)
> ==13584==
> ==13584== 1,608 (320 direct, 1,288 indirect) bytes in 1 blocks are definitely 
> lost in loss record 44 of 44
> ==13584==    at 0x4C2F0FF: realloc (vg_replace_malloc.c:785)
> ==13584==    by 0x92B1D7E: ???
> ==13584==    by 0x92B9FFE: ???
> ==13584==    by 0x92BB03E: ???
> ==13584==    by 0x6781AD8: opal_libevent2022_event_base_loop (in 
> /usr/lib/openmpi/libopen-pal.so.40.0.0)
> ==13584==    by 0x93100CE: ???
> ==13584==    by 0x5E9408B: start_thread (in /usr/lib/libpthread-2.26.so)
> ==13584==    by 0x61A0E7E: clone (in /usr/lib/libc-2.26.so)
> ==13584==
> ==13584== LEAK SUMMARY:
> ==13584==    definitely lost: 372 bytes in 4 blocks
> ==13584==    indirectly lost: 1,288 bytes in 34 blocks
> ==13584==      possibly lost: 0 bytes in 0 blocks
> ==13584==    still reachable: 238 bytes in 6 blocks
> ==13584==         suppressed: 0 bytes in 0 blocks
> ==13584== Reachable blocks (those to which a pointer was found) are not shown.
> ==13584== To see them, rerun with: --leak-check=full --show-leak-kinds=all
> ==13584==
> ==13584== For counts of detected and suppressed errors, rerun with: -v
> ==13584== ERROR SUMMARY: 44 errors from 6 contexts (suppressed: 0 from 0)
> 
> */
> _______________________________________________
> users mailing list
> users@lists.open-mpi.org
> https://lists.open-mpi.org/mailman/listinfo/users
> 
_______________________________________________
users mailing list
users@lists.open-mpi.org
https://lists.open-mpi.org/mailman/listinfo/users

Reply via email to