Sorry about 'idx' vs 'index'. In case Rolf is not correct about this being fixed, see below for the topology object. -Paul
(dbx) print *topology *topology = { nb_levels = 3736059629U next_group_depth = 3736059629U level_nbobjects = (3800392320U, 4294966655U, 1U, 1702192501U, 3792869728U, 4294966655U, 408U, 16777216U, 4428736U, 0, 5150816U, 0, 0, 0, 65U, 0, 0, 0, 842477617U, 859058744U, 808333362U, 1885566011U, 825175866U, 825111095U, 774909494U, 741356081U, 775042865U, 808335409U, 808595758U, 876164154U, 14390U, 0, 0, 0, 65U, 0, 0, 0, 5065504U, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65U, 0, 0, 0, 1651664214U, 1953067887U, 1701585017U, 543974774U, 544370534U, 543516788U, 1953853298U, 1713398885U, 1701667186U, 1802661751U, 540026912U, 1869488189U, 1919252000U, 1769172834U, 2718068U, 0, 65U, 0, 4219982788U, 4294967295U, 3736059629U, 3736059629U, 3800400896U, 4294966655U, 1U, 0, 3800248320U, 4294966655U, 81U, 0, 16370U, 1072824320U, 0, 0, 0, 0, 65U, 0, 0, 0, 1852404304U, 1869881460U, 1701344288U, 1684957472U, 1952539497U, 1713398885U, 543517801U, 1025518888U, 1953701950U, 1953853284U, 539697196U, 1931492925U, 1919247476U, 10610U, 1835363691U, 1667326815U, 65U, 0, 0, 0, 1651664214U, 1953067887U, 1701585017U, 543974774U, 544370534U, 543516788U, 1713398372U, 1701667186U, 1802661751U, 540026912U) levels = (0x726576206f6e203d, 0x29797469736f62, 0xfffffffffb87d42c, 0x41, (nil), 0x2072656874656857, 0x612070616d206f74, 0x206b6e617220646e, 0x65737365636f7270, 0x2d646e756f722073, 0x7962206e69626f72, 0x74690065726f6320, 0x2979, 0x41, 0x61636d5f75706367, 0x7469736f62726556, 0x206c6576656c2079, 0x2065687420726f66, 0x206d6d6f63707267, 0x726f77656d617266, 0x6e203d203028206b, 0x736f62726576206f, 0x29797469, 0x41, 0x180, 0x7469736f62726556, 0x206c6576656c2079, 0x2065687420726f66, 0x6d61726620736172, 0x3028206b726f7765, 0x6576206f6e203d20, 0x29797469736f6272, (nil), 0x41, 0x4d4dc0, 0x7469736f62726556, 0x206c6576656c2079, 0x2065687420726f66, 0x7266207370616d72, 0x206b726f77656d61, 0x206f6e203d203028, 0x7469736f62726576, 0x2979, 0x41, 0x6361635f6d656d6b, 0x2072656874656857, 0x6c70736964206f74, 0x6669642061207961, 0x727020656c626166, 0x616d20737365636f, 0x2072657466612070, 0x6f63207369207469, 0x64657475706d, 0x41, 0xfffffffffb87d42c, 0x2072656874656857, 0x612070616d206f74, 0x206b6e617220646e, 0x65737365636f7270, 0x2d646e756f722073, 0x7962206e69626f72, 0x786900746f6c7320, (nil), 0x41, (nil), 0x2072656874656857, 0x612070616d206f74, 0x206b6e617220646e, 0x65737365636f7270, 0x2d646e756f722073, 0x7962206e69626f72, 0x65646f6e20, (nil), 0x41, (nil), 0x6f207265626d754e, 0x7420737570632066, 0x6f6620657375206f, 0x7220686361652072, 0x322d315b206b6e61, 0x6564282035312a2a, 0x29313d746c756166, 0x5d, 0x41, (nil), 0x6f207265626d754e, 0x7420737570632066, 0x6f6620657375206f, 0x7220686361652072, 0x322d315b206b6e61, 0x6564282035312a2a, 0x29313d746c756166, 0x6c6974736e72005d, 0x41, (nil), 0x2072656874656857, 0x6c70736964206f74, 0x7020656874207961, 0x6d20737365636f72, 0x7265746661207061, 0x6320736920746920, 0x64657475706d6f, 0x720, 0x41, 0xfffffffffba56f14, 0x70614d20414d414c, 0x7365636f7250203a, 0x74756f79616c2073, 0x6974617265746920, 0x726564726f206e6f, 0x6565532820676e69, 0x6e656d75636f6420, 0x296e6f69746174, 0x41, (nil), 0x17500000174, 0x17700000176, 0x17900000178, 0x17b0000017a, 0x650000017c, (nil), 0x100000000000000, 0x6361635f6d656d6b, 0x41, (nil), 0x6200000061, 0x6400000063, 0x6600000065) flags = 446676598887U type_depth = (105, 1935762796, 1702256485, 1869181810, -1306853266, -254, -74984404, -1, 65, 0, 0, 0) ignored_types = (842477617, 859058744, 808333362, 1885566011, 825175866, 825111095, 774909494, 741356081, 775042865, 808335409, 808595758, 876164154) is_thissystem = 1862285366 is_loaded = 2037672307 pid = 41 bridge_nbobjects = 0 bridge_level = 0x41 first_bridge = 0x6361635f64657263 last_bridge = 0x7469736f62726556 pcidev_nbobjects = 1701585017U pcidev_level = 0x2065687420726f66 first_pcidev = 0x61726620736c646f last_pcidev = 0x28206b726f77656d osdev_nbobjects = 540876848U osdev_level = 0x797469736f627265 first_osdev = 0x29 last_osdev = 0x41 binding_hooks = { set_thisproc_cpubind = 0x720 get_thisproc_cpubind = 0x3334323832373031 set_thisthread_cpubind = 0x7063743b302e3032 get_thisthread_cpubind = 0x312e3237312f2f3a set_proc_cpubind = 0x2c3032312e302e36 get_proc_cpubind = 0x302e38312e323731 set_thread_cpubind = 0x3439343a3032312e get_thread_cpubind = 0x3836 get_thisproc_last_cpu_location = (nil) get_thisthread_last_cpu_location = 0x41 get_proc_last_cpu_location = 0x4d51d0 set_thisproc_membind = 0x3334323832373031 get_thisproc_membind = 0x7063743b302e3032 set_thisthread_membind = 0x312e3237312f2f3a get_thisthread_membind = 0x2c3032312e302e36 set_proc_membind = 0x302e38312e323731 get_proc_membind = 0x3439343a3032312e set_area_membind = 0x6568003836 get_area_membind = (nil) alloc = 0x41 alloc_membind = 0x6361635f6d656d6b free_membind = 0x65706f2f706d742f } support = { discovery = 0x7365732d69706d6e cpubind = 0x68702d736e6f6973 membind = 0x7040766f72677261 } userdata_export_cb = 0x5f30322d6a2d7063 userdata_import_cb = 0x2f30373336312f30 first_osdist = 0x302f30 last_osdist = 0xfffffffffba56f14 backends = 0x41 } On Wed, Dec 17, 2014 at 12:54 PM, Rolf vandeVaart <rvandeva...@nvidia.com> wrote: > > I think this has already been fixed by Ralph this morning. I had > observed the same issue but is now gone. > > > > *From:* devel [mailto:devel-boun...@open-mpi.org] *On Behalf Of *Brice > Goglin > *Sent:* Wednesday, December 17, 2014 3:53 PM > *To:* de...@open-mpi.org > *Subject:* Re: [OMPI devel] Solaris/x86-64 SEGV with 1.8-latest > > > > Le 17/12/2014 21:43, Paul Hargrove a écrit : > > > > Dbx gives me > > t@1 (l@1) terminated by signal SEGV (no mapping at the fault address) > > Current function is opal_hwloc172_hwloc_get_obj_by_depth > > 74 return topology->levels[depth][idx]; > > (dbx) where > > current thread: t@1 > > =>[1] opal_hwloc172_hwloc_get_obj_by_depth(topology = 0x4d49e0, depth = 0, > idx = 0), line 74 in "traversal.c" > > [2] opal_hwloc172_hwloc_get_root_obj(topology = 0x4d49e0), line 118 in > "helper.h" > > [3] opal_hwloc_base_get_nbobjs_by_type(topo = 0x4d49e0, target = > OPAL_HWLOC172_hwloc_OBJ_CORE, cache_level = 0, rtype = '\003'), line 833 in > "hwloc_base_util.c" > > [4] orte_rmaps_rr_byobj(jdata = 0x43c940, app = 0x483fe0, node_list = > 0xfffffd7fffdff4b0, num_slots = 2, num_procs = 2U, target = > OPAL_HWLOC172_hwloc_OBJ_CORE, cache_level = 0), line 495 in > "rmaps_rr_mappers.c" > > [5] orte_rmaps_rr_map(jdata = 0x43c940), line 165 in "rmaps_rr.c" > > [6] orte_rmaps_base_map_job(fd = -1, args = 4, cbdata = 0x4a3300), line > 277 in "rmaps_base_map_job.c" > > [7] event_process_active_single_queue(0x0, 0x0, 0x0, 0x0, 0x0, 0x0), at > 0xfffffd7fe453afbc > > [8] event_process_active(0x0, 0x0, 0x0, 0x0, 0x0, 0x0), at > 0xfffffd7fe453b361 > > [9] opal_libevent2021_event_base_loop(0x0, 0x0, 0x0, 0x0, 0x0, 0x0), at > 0xfffffd7fe453bc79 > > [10] orterun(argc = 9, argv = 0xfffffd7fffdffa58), line 1081 in > "orterun.c" > > [11] main(argc = 9, argv = 0xfffffd7fffdffa58), line 13 in "main.c" > > (dbx) print depth > > depth = 0 > > (dbx) print index > > index = 0xfffffd7fff19c174 > > > > Pretty sure that index value is bogus. > > > > > I see "idx" instead of "index" in the code above. index may be a pointer > to the "index()" function in your standard library? > Anyway, depth=0 and idx=0 is totally valid, especially when called from > hwloc_get_root_obj(). Something bad happened to the topology object? Can > you print the contents of topology and topology->nblevels and > topology->levels ? > > Brice > ------------------------------ > This email message is for the sole use of the intended recipient(s) and > may contain confidential information. Any unauthorized review, use, > disclosure or distribution is prohibited. If you are not the intended > recipient, please contact the sender by reply email and destroy all copies > of the original message. > ------------------------------ > > _______________________________________________ > devel mailing list > de...@open-mpi.org > Subscription: http://www.open-mpi.org/mailman/listinfo.cgi/devel > Link to this post: > http://www.open-mpi.org/community/lists/devel/2014/12/16652.php > -- Paul H. Hargrove phhargr...@lbl.gov Computer Languages & Systems Software (CLaSS) Group Computer Science Department Tel: +1-510-495-2352 Lawrence Berkeley National Laboratory Fax: +1-510-486-6900