Hi, I was wondering if anyone else saw this issue, or if it's specific to the particular cluster I am using. The problem is that the runs stall after a few hours. Basically, two processes (of 10-12) get killed due to a failed assert within the mpi library (I'm using openmpi on a qlogic infinipath IB network). From the backtrace, it looks like the recompose step is the one initiating the MPI call. Despite the name of the executable, this was actually run on a sandybridge processor. The OS is CentOs6, and I'm using icc version 13.1.1. The ET version is Orsted.
I saw this issue with many different runs. However, the backtrace and assert failure below came from runs that used the "ParitySymmetry" thorn and the associated changes to CarpetRegrid2. I included the patch to CarpetRegrid2 at the bottom. n015:3.0.Assertion failure at ptl.c:200: nbytes == msglen n018:3.0.Assertion failure at ptl.c:200: nbytes == msglen Backtrace from rank 2 pid 27135: 1. /lib64/libc.so.6() [0x32a4832920] 2. /lib64/libc.so.6(gsignal+0x35) [0x32a48328a5] 3. /lib64/libc.so.6(abort+0x175) [0x32a4834085] 4. /usr/lib64/libpsm_infinipath.so.1(+0x17b6d) [0x7f577e6c6b6d] 5. /usr/lib64/libpsm_infinipath.so.1(psmi_handle_error+0x261) [0x7f577e6c6dd1] 6. /usr/lib64/libpsm_infinipath.so.1(psmi_am_mq_handler_rtsmatch+0x17a) [0x7f577e6c1f6a] 7. /usr/lib64/libpsm_infinipath.so.1(+0xa832) [0x7f577e6b9832] 8. /usr/lib64/libpsm_infinipath.so.1(+0xd90f) [0x7f577e6bc90f] 9. /usr/lib64/libpsm_infinipath.so.1(psmi_poll_internal+0x29) [0x7f577e6df309] a. /usr/lib64/libpsm_infinipath.so.1(psm_mq_ipeek+0xa5) [0x7f577e6dde05] b. /usr/mpi/gcc/openmpi-1.4.3-qlc/lib64/openmpi/mca_mtl_psm.so(+0x15f4) [0x7f577e9025f4] c. /usr/mpi/gcc/openmpi-1.4.3-qlc/lib64/libopen-pal.so.0(opal_progress+0x5a) [0x7f57815f30fa] d. /usr/mpi/gcc/openmpi-1.4.3-qlc/lib64/libmpi.so.0(+0x35685) [0x7f5782f8a685] e. /usr/mpi/gcc/openmpi-1.4.3-qlc/lib64/libmpi.so.0(PMPI_Waitall+0xa3) [0x7f5782fb6c73] f. comm_state::step() [./cactus_lazevnehalem(_ZN10comm_state4stepEv+0x4a2) [0x8826c2]] 10. dh::recompose(int, bool) [./cactus_lazevnehalem(_ZN2dh9recomposeEib+0x218) [0x89d8a8]] 11. gh::recompose(int, bool) [./cactus_lazevnehalem(_ZN2gh9recomposeEib+0x52) [0x8dc722]] 12. Carpet::Recompose(_cGH const*, int, bool) [./cactus_lazevnehalem(_ZN6Carpet9RecomposeEPK4_cGHib+0xea) [0x7db6ea]] diff --git a/Carpet/CarpetRegrid2/param.ccl b/Carpet/CarpetRegrid2/param.ccl index c7327d2..4843abe 100644 --- a/Carpet/CarpetRegrid2/param.ccl +++ b/Carpet/CarpetRegrid2/param.ccl @@ -62,6 +62,11 @@ BOOLEAN symmetry_rotating180 "Ensure a 180 degree rotating symmetry about the z { } no +BOOLEAN symmetry_parity "parity " +{ +} no + + BOOLEAN symmetry_periodic_x "Ensure a periodicity symmetry in the x direction" { } no diff --git a/Carpet/CarpetRegrid2/src/paramcheck.cc b/Carpet/CarpetRegrid2/src/paramcheck.cc index 5cc8978..679a562 100644 --- a/Carpet/CarpetRegrid2/src/paramcheck.cc +++ b/Carpet/CarpetRegrid2/src/paramcheck.cc @@ -25,7 +25,7 @@ namespace CarpetRegrid2 { DECLARE_CCTK_ARGUMENTS; DECLARE_CCTK_PARAMETERS; - enum sym_t { sym_unknown, sym_90, sym_180 }; + enum sym_t { sym_unknown, sym_90, sym_180, sym_parity }; int num_params = 0; sym_t params = sym_unknown; @@ -40,7 +40,13 @@ namespace CarpetRegrid2 { params = sym_180; param = "symmetry_rotating180"; } - + + if (symmetry_parity) { + ++num_params; + params = sym_parity; + param = "symmetry_parity"; + } + int num_thorns = 0; sym_t thorns = sym_unknown; char const* thorn = ""; @@ -59,13 +65,18 @@ namespace CarpetRegrid2 { thorns = sym_180; thorn = "RotatingSymmetry180"; } - + if (CCTK_IsThornActive ("ParitySymmetry")) { + ++num_thorns; + thorns = sym_parity; + thorn = "ParitySymmetry"; + } + if (num_params > 1) { - CCTK_PARAMWARN ("Too many of the symmetry parameters symmetry_rotating90 and symmetry_rotating180 are specified. (At most one of these can be specified.)"); + CCTK_PARAMWARN ("Too many of the symmetry parameters at least two of symmetry_rotating90, symmetry_rotating180, and parity_symmetry are specified. (At most one of these can be specified.)"); } if (num_thorns > 1) { - CCTK_PARAMWARN ("Too many of the symmetry thorns RotatingSymmetry90, RotatingSymmetry90r, and RotatingSymmetry180 are active. (At most one of these can be active.)"); + CCTK_PARAMWARN ("Too many of the symmetry thorns RotatingSymmetry90, RotatingSymmetry90r, RotatingSymmetry180, and ParitySymmetry are active. (At most one of these can be active.)"); } if (params != sym_unknown and thorns != sym_unknown and params != thorns) { diff --git a/Carpet/CarpetRegrid2/src/property.cc b/Carpet/CarpetRegrid2/src/property.cc index a568e82..c2a31a1 100644 --- a/Carpet/CarpetRegrid2/src/property.cc +++ b/Carpet/CarpetRegrid2/src/property.cc @@ -577,7 +577,121 @@ namespace CarpetRegrid2 { } } + ////////////////////////////////////////////////////////////////////////////// + // Make the boxes parity symmetric + ////////////////////////////////////////////////////////////////////////////// + + ibset parsym:: + symmetrised_regions (gh const& hh, dh const& dd, + level_boundary const& bnd, + vector<ibset> const& regions, int const rl) + { + ibbox const& baseextent = hh.baseextent(0,rl); + + ibset symmetrised = regions.at(rl); + for (ibset::const_iterator + ibb = regions.at(rl).begin(); ibb != regions.at(rl).end(); ++ ibb) + { + ibbox const& bb = *ibb; + + bvect const lower_is_outside_lower = + bb.lower() - bnd.min_bnd_dist_away[0] * bb.stride() <= + bnd.level_physical_ilower; + + // Treat z direction + int const dir = 2; + if (lower_is_outside_lower[dir]) { + ivect const ilo = bb.lower(); + ivect const iup = bb.upper(); + ivect const istr = bb.stride(); + assert (istr[0] == istr[1]); + + // Origin + assert (hh.refcent == vertex_centered or all (istr % 2 == 0)); + rvect const axis ( (bnd.physical_lower[0] + bnd.physical_upper[0]) / 2, + (bnd.physical_lower[1] + bnd.physical_upper[1]) / 2, + bnd.physical_lower[2]); + ivect const iaxis0 = rpos2ipos (axis, bnd.origin, bnd.scale, hh, rl); + assert (all ((iaxis0 - baseextent.lower()) % istr == 0)); + ivect const iaxis1 = rpos2ipos1 (axis, bnd.origin, bnd.scale, hh, rl); + assert (all ((iaxis1 - baseextent.lower()) % istr == 0)); + ivect const offset = iaxis1 - iaxis0; + assert (all (offset % istr == 0)); + if (hh.refcent == vertex_centered) { + assert (all (offset >= 0 and offset < 2*istr)); + assert (all ((iaxis0 + iaxis1 - offset) % (2*istr) == 0)); + } else { + // The offset may be negative because both boundaries are + // shifted inwards by 1/2 grid spacing, and therefore iaxis0 + // < iaxis1 + istr + assert (all (offset >= -istr and offset < istr)); + assert (all ((iaxis0 + iaxis1 - offset) % (2*istr) == istr)); + assert (all (istr % 2 == 0)); + } + ivect const iaxis = (iaxis0 + iaxis1 - offset) / 2; + ivect const neg_ilo = (2*iaxis+offset) - ilo; + ivect const neg_iup = (2*iaxis+offset) - iup; + + // Rotate 180 degrees about z axis + ivect const new_ilo (neg_iup[0], neg_iup[1], neg_iup[2]); + ivect const new_iup (neg_ilo[0], neg_ilo[1], neg_ilo[2]); + ivect const new_istr (istr); + + ibbox const new_bb (new_ilo, new_iup, new_istr); + // Will be clipped later + // assert (new_bb.is_contained_in (baseextent)); + + // symmetrised |= new_bb & baseextent; + symmetrised |= new_bb; + } + } + + return symmetrised; + } + + bool parsym:: + test_impl (gh const& hh, dh const& dd, + level_boundary const& bnd, + vector<ibset> const& regions, int const rl) + { + DECLARE_CCTK_PARAMETERS; + + if (not symmetry_parity) return true; + + ibset const symmetrised = symmetrised_regions (hh, dd, bnd, regions, rl); + + // We cannot test for equality, since the difference may be + // outside of the domain (and hence irrelevant) + // return regions.AT(rl) == symmetrised; + + // Test whether any part of the difference (i.e. that part of the + // level that would be added by symmetrising) is inside the + // domain. If the difference is outside, we can safely ignore it. + ibbox const& baseextent = hh.baseextent(0,rl); + ibset const difference = symmetrised - regions.AT(rl); + return (difference & baseextent).empty(); + } + void parsym:: + enforce_impl (gh const& hh, dh const& dd, + level_boundary const& bnd, + vector<ibset>& regions, int const rl) + { + DECLARE_CCTK_PARAMETERS; + + assert (symmetry_parity); + + if (veryverbose) { + cout << "Refinement level " << rl << ": making regions parity symmetric...\n"; + } + + regions.AT(rl) = symmetrised_regions (hh, dd, bnd, regions, rl); + + if (veryverbose) { + cout << " New regions are " << regions.at(rl) << "\n"; + } + } + ////////////////////////////////////////////////////////////////////////////// // Make the boxes periodic in one direction diff --git a/Carpet/CarpetRegrid2/src/property.hh b/Carpet/CarpetRegrid2/src/property.hh index d5540d6..b0080c7 100644 --- a/Carpet/CarpetRegrid2/src/property.hh +++ b/Carpet/CarpetRegrid2/src/property.hh @@ -112,6 +112,18 @@ namespace CarpetRegrid2 { vector<ibset>& regions, int rl); }; + // Make the boxes parity symmetric + class parsym: public property { + ibset symmetrised_regions (gh const& hh, dh const& dd, + level_boundary const& bnd, + vector<ibset> const& regions, int rl); + bool test_impl (gh const& hh, dh const& dd, + level_boundary const& bnd, + vector<ibset> const& regions, int rl); + void enforce_impl (gh const& hh, dh const& dd, + level_boundary const& bnd, + vector<ibset>& regions, int rl); + }; // Make the boxes rotating-180 symmetric diff --git a/Carpet/CarpetRegrid2/src/regrid.cc b/Carpet/CarpetRegrid2/src/regrid.cc index 427d8b0..5b32a32 100644 --- a/Carpet/CarpetRegrid2/src/regrid.cc +++ b/Carpet/CarpetRegrid2/src/regrid.cc @@ -329,6 +329,7 @@ namespace CarpetRegrid2 { properties.push_back (new snap_coarse()); properties.push_back (new rotsym90()); properties.push_back (new rotsym180()); + properties.push_back (new parsym()); properties.push_back (new periodic<0>()); properties.push_back (new periodic<1>()); properties.push_back (new periodic<2>()); _______________________________________________ Users mailing list Users@einsteintoolkit.org http://lists.einsteintoolkit.org/mailman/listinfo/users