Hi Gilles, > From the jvm logs, there is an alignment error in native_get_attr > but i could not find it by reading the source code. > > Could you please do > ulimit -c unlimited > mpiexec ... > and then > gdb <your path to java>/bin/java core > And run bt on all threads until you get a line number in native_get_attr
I found pmix_native.c:1131 in native_get_attr, attached gdb to the Java process and set a breakpoint to this line. From there I single stepped until I got SIGSEGV, so that you can see what happened. (gdb) b pmix_native.c:1131 No source file named pmix_native.c. Make breakpoint pending on future shared library load? (y or [n]) y Breakpoint 1 (pmix_native.c:1131) pending. (gdb) thread 14 [Switching to thread 14 (Thread 2 (LWP 2))] #0 0xffffffff7eadc6b0 in __pollsys () from /lib/sparcv9/libc.so.1 (gdb) f 3 #3 0xfffffffee5122230 in JNI_OnLoad (vm=0xffffffff7e57e9d8 <main_vm>, reserved=0x0) at ../../../../../openmpi-dev-178-ga16c1e4/ompi/mpi/java/c/mpi_MPI.c:128 128 while (_dbg) poll(NULL, 0, 1); (gdb) set _dbg=0 (gdb) c Continuing. [New LWP 13 ] Breakpoint 1, native_get_attr (attr=0xfffffffee2e05db0 "pmix.jobid", kv=0xffffffff7b4ff028) at ../../../../../openmpi-dev-178-ga16c1e4/opal/mca/pmix/native/pmix_native.c:1131 1131 OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output, (gdb) s opal_proc_local_get () at ../../../openmpi-dev-178-ga16c1e4/opal/util/proc.c:80 80 return opal_proc_my_name; (gdb) 81 } (gdb) _process_name_print_for_opal (procname=14259803799433510912) at ../../openmpi-dev-178-ga16c1e4/orte/runtime/orte_init.c:64 64 orte_process_name_t* rte_name = (orte_process_name_t*)&procname; (gdb) 65 return ORTE_NAME_PRINT(rte_name); (gdb) orte_util_print_name_args (name=0xffffffff7b4feb90) at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:122 122 if (NULL == name) { (gdb) 142 job = orte_util_print_jobids(name->jobid); (gdb) orte_util_print_jobids (job=3320119297) at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:170 170 ptr = get_print_name_buffer(); (gdb) get_print_name_buffer () at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:92 92 if (!fns_init) { (gdb) 101 ret = opal_tsd_getspecific(print_args_tsd_key, (void**)&ptr); (gdb) opal_tsd_getspecific (key=4, valuep=0xffffffff7b4fe8a0) at ../../openmpi-dev-178-ga16c1e4/opal/threads/tsd.h:163 163 *valuep = pthread_getspecific(key); (gdb) 164 return OPAL_SUCCESS; (gdb) 165 } (gdb) get_print_name_buffer () at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:102 102 if (OPAL_SUCCESS != ret) return NULL; (gdb) 104 if (NULL == ptr) { (gdb) 113 return (orte_print_args_buffers_t*) ptr; (gdb) 114 } (gdb) orte_util_print_jobids (job=3320119297) at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:172 172 if (NULL == ptr) { (gdb) 178 if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { (gdb) 179 ptr->cntr = 0; (gdb) 182 if (ORTE_JOBID_INVALID == job) { (gdb) 184 } else if (ORTE_JOBID_WILDCARD == job) { (gdb) 187 tmp1 = ORTE_JOB_FAMILY((unsigned long)job); (gdb) 188 tmp2 = ORTE_LOCAL_JOBID((unsigned long)job); (gdb) 189 snprintf(ptr->buffers[ptr->cntr++], (gdb) 193 return ptr->buffers[ptr->cntr-1]; (gdb) 194 } (gdb) orte_util_print_name_args (name=0xffffffff7b4feb90) at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:143 143 vpid = orte_util_print_vpids(name->vpid); (gdb) orte_util_print_vpids (vpid=0) at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:260 260 ptr = get_print_name_buffer(); (gdb) get_print_name_buffer () at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:92 92 if (!fns_init) { (gdb) 101 ret = opal_tsd_getspecific(print_args_tsd_key, (void**)&ptr); (gdb) opal_tsd_getspecific (key=4, valuep=0xffffffff7b4fe8b0) at ../../openmpi-dev-178-ga16c1e4/opal/threads/tsd.h:163 163 *valuep = pthread_getspecific(key); (gdb) 164 return OPAL_SUCCESS; (gdb) 165 } (gdb) get_print_name_buffer () at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:102 102 if (OPAL_SUCCESS != ret) return NULL; (gdb) 104 if (NULL == ptr) { (gdb) 113 return (orte_print_args_buffers_t*) ptr; (gdb) 114 } (gdb) orte_util_print_vpids (vpid=0) at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:262 262 if (NULL == ptr) { (gdb) 268 if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { (gdb) 272 if (ORTE_VPID_INVALID == vpid) { (gdb) 274 } else if (ORTE_VPID_WILDCARD == vpid) { (gdb) 277 snprintf(ptr->buffers[ptr->cntr++], (gdb) 281 return ptr->buffers[ptr->cntr-1]; (gdb) 282 } (gdb) orte_util_print_name_args (name=0xffffffff7b4feb90) at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:146 146 ptr = get_print_name_buffer(); (gdb) get_print_name_buffer () at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:92 92 if (!fns_init) { (gdb) 101 ret = opal_tsd_getspecific(print_args_tsd_key, (void**)&ptr); (gdb) opal_tsd_getspecific (key=4, valuep=0xffffffff7b4fe970) at ../../openmpi-dev-178-ga16c1e4/opal/threads/tsd.h:163 163 *valuep = pthread_getspecific(key); (gdb) 164 return OPAL_SUCCESS; (gdb) 165 } (gdb) get_print_name_buffer () at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:102 102 if (OPAL_SUCCESS != ret) return NULL; (gdb) 104 if (NULL == ptr) { (gdb) 113 return (orte_print_args_buffers_t*) ptr; (gdb) 114 } (gdb) orte_util_print_name_args (name=0xffffffff7b4feb90) at ../../openmpi-dev-178-ga16c1e4/orte/util/name_fns.c:148 148 if (NULL == ptr) { (gdb) 154 if (ORTE_PRINT_NAME_ARG_NUM_BUFS == ptr->cntr) { (gdb) 158 snprintf(ptr->buffers[ptr->cntr++], (gdb) 162 return ptr->buffers[ptr->cntr-1]; (gdb) 163 } (gdb) _process_name_print_for_opal (procname=14259803799433510912) at ../../openmpi-dev-178-ga16c1e4/orte/runtime/orte_init.c:66 66 } (gdb) Program received signal SIGSEGV, Segmentation fault. 0xfffffffee3210bfc in native_get_attr (attr=0xfffffffee2e05db0 "pmix.jobid", kv=0xffffffff7b4ff028) at ../../../../../openmpi-dev-178-ga16c1e4/opal/mca/pmix/native/pmix_native.c:1131 1131 OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output, (gdb) bt #0 0xfffffffee3210bfc in native_get_attr ( attr=0xfffffffee2e05db0 "pmix.jobid", kv=0xffffffff7b4ff028) at ../../../../../openmpi-dev-178-ga16c1e4/opal/mca/pmix/native/pmix_native.c:1131 #1 0xfffffffee2e033e4 in rte_init () at ../../../../../openmpi-dev-178-ga16c1e4/orte/mca/ess/pmi/ess_pmi_module.c:170 #2 0xfffffffee4a340c0 in orte_init (pargc=0x0, pargv=0x0, flags=32) at ../../openmpi-dev-178-ga16c1e4/orte/runtime/orte_init.c:239 #3 0xfffffffee4d9a164 in ompi_mpi_init (argc=0, argv=0x1003f5850, requested=0, provided=0xffffffff7b4ff44c) at ../../openmpi-dev-178-ga16c1e4/ompi/runtime/ompi_mpi_init.c:480 #4 0xfffffffee4dfbb30 in PMPI_Init (argc=0xffffffff7b4ff554, argv=0xffffffff7b4ff548) at pinit.c:84 #5 0xfffffffee5122f6c in Java_mpi_MPI_Init_1jni (env=0x10010e9e0, clazz=0xffffffff7b4ff760, argv=0xffffffff7b4ff858) at ../../../../../openmpi-dev-178-ga16c1e4/ompi/mpi/java/c/mpi_MPI.c:271 #6 0xffffffff6b810738 in ?? () #7 0xffffffff6b810738 in ?? () Backtrace stopped: previous frame identical to this frame (corrupt stack?) (gdb) Hopefully the above output is helpful. Please let me know if you need something else. Kind regards Siegmar > Siegmar Gross <siegmar.gr...@informatik.hs-fulda.de> wrote: > >Hi, > > > >today I installed openmpi-dev-178-ga16c1e4 on Solaris 10 Sparc > >with gcc-4.9.1 and Java 8. Now a very simple Java program works > >as expected, but other Java programs still break. I removed the > >warnings about "shmem.jar" and used the following configure > >command. > > > >tyr openmpi-dev-178-ga16c1e4-SunOS.sparc.64_gcc 406 head config.log \ > > | grep openmpi > >$ ../openmpi-dev-178-ga16c1e4/configure > > --prefix=/usr/local/openmpi-1.9.0_64_gcc > > --libdir=/usr/local/openmpi-1.9.0_64_gcc/lib64 > > --with-jdk-bindir=/usr/local/jdk1.8.0/bin > > --with-jdk-headers=/usr/local/jdk1.8.0/include > > JAVA_HOME=/usr/local/jdk1.8.0 > > LDFLAGS=-m64 CC=gcc CXX=g++ FC=gfortran CFLAGS=-m64 -D_REENTRANT > > CXXFLAGS=-m64 FCFLAGS=-m64 CPP=cpp CXXCPP=cpp > > CPPFLAGS= -D_REENTRANT CXXCPPFLAGS= > > --enable-mpi-cxx --enable-cxx-exceptions --enable-mpi-java > > --enable-mpi-thread-multiple --with-threads=posix > > --with-hwloc=internal > > --without-verbs --with-wrapper-cflags=-std=c11 -m64 > > --with-wrapper-cxxflags=-m64 --enable-debug > > > > > >tyr java 290 ompi_info | grep -e "Open MPI repo revision:" -e "C compiler > >version:" > > Open MPI repo revision: dev-178-ga16c1e4 > > C compiler version: 4.9.1 > > > > > > > >> > regarding the BUS error reported by Siegmar, i also commited > >> > 62bde1fcb554079143030bb305512c236672386f > >> > in order to fix it (this is based on code review only, i have no sparc64 > >> > hardware to test it is enough) > >> > >> I'll test it, when a new nightly snapshot is available for the trunk. > > > > > >tyr java 291 mpijavac InitFinalizeMain.java > >tyr java 292 mpiexec -np 1 java InitFinalizeMain > >Hello! > > > >tyr java 293 mpijavac BcastIntMain.java > >tyr java 294 mpiexec -np 2 java BcastIntMain > ># > ># A fatal error has been detected by the Java Runtime Environment: > ># > ># SIGBUS (0xa) at pc=0xfffffffee3210bfc, pid=24792, tid=2 > >... > > > > > > > >tyr java 296 /usr/local/gdb-7.6.1_64_gcc/bin/gdb mpiexec > >... > >(gdb) run -np 2 java BcastIntMain > >Starting program: /usr/local/openmpi-1.9.0_64_gcc/bin/mpiexec -np 2 java > >BcastIntMain > >[Thread debugging using libthread_db enabled] > >[New Thread 1 (LWP 1)] > >[New LWP 2 ] > ># > ># A fatal error has been detected by the Java Runtime Environment: > ># > ># SIGBUS (0xa) at pc=0xfffffffee3210bfc, pid=24814, tid=2 > ># > ># JRE version: Java(TM) SE Runtime Environment (8.0-b132) (build 1.8.0-b132) > ># Java VM: Java HotSpot(TM) 64-Bit Server VM (25.0-b70 mixed mode > >solaris-sparc compressed oops) > ># Problematic frame: > ># C [mca_pmix_native.so+0x10bfc] native_get_attr+0x3000 > ># > ># Failed to write core dump. Core dumps have been disabled. To enable core > >dumping, try "ulimit -c unlimited" before starting Java again > ># > ># An error report file with more information is saved as: > ># /home/fd1026/work/skripte/master/parallel/prog/mpi/java/hs_err_pid24814.log > ># > ># A fatal error has been detected by the Java Runtime Environment: > ># > ># SIGBUS (0xa) at pc=0xfffffffee3210bfc, pid=24812, tid=2 > ># > ># JRE version: Java(TM) SE Runtime Environment (8.0-b132) (build 1.8.0-b132) > ># Java VM: Java HotSpot(TM) 64-Bit Server VM (25.0-b70 mixed mode > >solaris-sparc compressed oops) > ># Problematic frame: > ># C [mca_pmix_native.so+0x10bfc] native_get_attr+0x3000 > ># > ># Failed to write core dump. Core dumps have been disabled. To enable core > >dumping, try "ulimit -c unlimited" before starting Java again > ># > ># An error report file with more information is saved as: > ># /home/fd1026/work/skripte/master/parallel/prog/mpi/java/hs_err_pid24812.log > ># > ># If you would like to submit a bug report, please visit: > ># http://bugreport.sun.com/bugreport/crash.jsp > ># The crash happened outside the Java Virtual Machine in native code. > ># See problematic frame for where to report the bug. > ># > >[tyr:24814] *** Process received signal *** > >[tyr:24814] Signal: Abort (6) > >[tyr:24814] Signal code: (-1) > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:opal_backtrace_print+0x2c > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:0xdc2d4 > >/lib/sparcv9/libc.so.1:0xd8b98 > >/lib/sparcv9/libc.so.1:0xcc70c > >/lib/sparcv9/libc.so.1:0xcc918 > >/lib/sparcv9/libc.so.1:0xdd2d0 [ Signal 6 (ABRT)] > >/lib/sparcv9/libc.so.1:_thr_sigsetmask+0x1c4 > >/lib/sparcv9/libc.so.1:sigprocmask+0x28 > >/lib/sparcv9/libc.so.1:_sigrelse+0x5c > >/lib/sparcv9/libc.so.1:abort+0xc0 > >/export2/prog/SunOS_sparc/jdk1.8.0/jre/lib/sparcv9/server/libjvm.so:0xb3cb90 > >/export2/prog/SunOS_sparc/jdk1.8.0/jre/lib/sparcv9/server/libjvm.so:0xd97a04 > >/export2/prog/SunOS_sparc/jdk1.8.0/jre/lib/sparcv9/server/libjvm.so:JVM_handle_solaris_signal+0xc0c > >/export2/prog/SunOS_sparc/jdk1.8.0/jre/lib/sparcv9/server/libjvm.so:0xb44e84 > >/lib/sparcv9/libc.so.1:0xd8b98 > >/lib/sparcv9/libc.so.1:0xcc70c > >/lib/sparcv9/libc.so.1:0xcc918 > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_pmix_native.so:0x10bfc > > [ Signal 10 (BUS)] > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_ess_pmi.so:0x33dc > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-rte.so.0.0.0:orte_init+0x67c > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:ompi_mpi_init+0x374 > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:PMPI_Init+0x2a8 > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi_java.so.0.0.0:Java_mpi_MPI_Init_1jni+0x1a0 > >0xffffffff6b810730 > >0xffffffff6b8106d4 > >0xffffffff6b8078a8 > >0xffffffff6b8078a8 > >0xffffffff6b80024c > >/export2/prog/SunOS_sparc/jdk1.8.0/jre/lib/sparcv9/server/libjvm.so:0x6fd4e8 > >/export2/prog/SunOS_sparc/jdk1.8.0/jre/lib/sparcv9/server/libjvm.so:0x79331c > >/export2/prog/SunOS_sparc/jdk1.8.0/lib/sparcv9/jli/libjli.so:0x7290 > >/lib/sparcv9/libc.so.1:0xd8a6c > >[tyr:24814] *** End of error message *** > >-------------------------------------------------------------------------- > >mpiexec noticed that process rank 1 with PID 0 on node tyr exited on signal > >6 (Abort). > >-------------------------------------------------------------------------- > >[LWP 2 exited] > >[New Thread 2 ] > >[Switching to Thread 1 (LWP 1)] > >sol_thread_fetch_registers: td_ta_map_id2thr: no thread can be found to > >satisfy query > >(gdb) bt > >#0 0xffffffff7f6173d0 in rtld_db_dlactivity () from /usr/lib/sparcv9/ld.so.1 > >#1 0xffffffff7f6175a8 in rd_event () from /usr/lib/sparcv9/ld.so.1 > >#2 0xffffffff7f618950 in lm_delete () from /usr/lib/sparcv9/ld.so.1 > >#3 0xffffffff7f6226bc in remove_so () from /usr/lib/sparcv9/ld.so.1 > >#4 0xffffffff7f624574 in remove_hdl () from /usr/lib/sparcv9/ld.so.1 > >#5 0xffffffff7f61d97c in dlclose_core () from /usr/lib/sparcv9/ld.so.1 > >#6 0xffffffff7f61d9d4 in dlclose_intn () from /usr/lib/sparcv9/ld.so.1 > >#7 0xffffffff7f61db0c in dlclose () from /usr/lib/sparcv9/ld.so.1 > >#8 0xffffffff7ec87ca0 in vm_close () > > from /usr/local/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0 > >#9 0xffffffff7ec85274 in lt_dlclose () > > from /usr/local/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0 > >#10 0xffffffff7ecaa5dc in ri_destructor (obj=0x100187b70) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_component_repository.c:382 > >#11 0xffffffff7eca8fd8 in opal_obj_run_destructors (object=0x100187b70) > > at ../../../../openmpi-dev-178-ga16c1e4/opal/class/opal_object.h:446 > >#12 0xffffffff7eca9eac in mca_base_component_repository_release ( > > component=0xffffffff7b1236f0 <mca_oob_tcp_component>) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_component_repository.c:240 > >#13 0xffffffff7ecac17c in mca_base_component_unload ( > > component=0xffffffff7b1236f0 <mca_oob_tcp_component>, output_id=-1) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:47 > >#14 0xffffffff7ecac210 in mca_base_component_close ( > > component=0xffffffff7b1236f0 <mca_oob_tcp_component>, output_id=-1) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:60 > >#15 0xffffffff7ecac2e4 in mca_base_components_close (output_id=-1, > > components=0xffffffff7f14bc58 <orte_oob_base_framework+80>, skip=0x0) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:86 > >#16 0xffffffff7ecac24c in mca_base_framework_components_close ( > > framework=0xffffffff7f14bc08 <orte_oob_base_framework>, skip=0x0) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:66 > >#17 0xffffffff7efcaf80 in orte_oob_base_close () > > at > > ../../../../openmpi-dev-178-ga16c1e4/orte/mca/oob/base/oob_base_frame.c:112 > >#18 0xffffffff7ecc0d74 in mca_base_framework_close ( > > framework=0xffffffff7f14bc08 <orte_oob_base_framework>) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_framework.c:187 > >#19 0xffffffff7be07858 in rte_finalize () > > at > > ../../../../../openmpi-dev-178-ga16c1e4/orte/mca/ess/hnp/ess_hnp_module.c:857 > >#20 0xffffffff7ef338bc in orte_finalize () > > at ../../openmpi-dev-178-ga16c1e4/orte/runtime/orte_finalize.c:66 > >#21 0x000000010000723c in orterun (argc=5, argv=0xffffffff7fffe0d8) > > at ../../../../openmpi-dev-178-ga16c1e4/orte/tools/orterun/orterun.c:1103 > >#22 0x0000000100003e80 in main (argc=5, argv=0xffffffff7fffe0d8) > >---Type <return> to continue, or q <return> to quit--- > > at ../../../../openmpi-dev-178-ga16c1e4/orte/tools/orterun/main.c:13 > >(gdb) > > > > > > > > > >I get the same error for C programs, if they use more than > >MPI_Init and MPI_Finalize. > > > >tyr small_prog 301 mpicc init_finalize.c > >tyr small_prog 302 mpiexec -np 1 a.out > >Hello! > >tyr small_prog 303 mpicc column_int.c > >tyr small_prog 306 /usr/local/gdb-7.6.1_64_gcc/bin/gdb mpiexec > >... > >(gdb) run -np 4 a.out > >Starting program: /usr/local/openmpi-1.9.0_64_gcc/bin/mpiexec -np 4 a.out > >[Thread debugging using libthread_db enabled] > >[New Thread 1 (LWP 1)] > >[New LWP 2 ] > >[tyr:24880] *** Process received signal *** > >[tyr:24880] Signal: Bus Error (10) > >[tyr:24880] Signal code: Invalid address alignment (1) > >[tyr:24880] Failing at address: ffffffff7bd1c10c > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:opal_backtrace_print+0x2c > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:0xdc2d4 > >/lib/sparcv9/libc.so.1:0xd8b98 > >/lib/sparcv9/libc.so.1:0xcc70c > >/lib/sparcv9/libc.so.1:0xcc918 > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_pmix_native.so:0x10684 > > [ Signal 10 (BUS)] > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_ess_pmi.so:0x33dc > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-rte.so.0.0.0:orte_init+0x67c > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:ompi_mpi_init+0x374 > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:PMPI_Init+0x2a8 > >/home/fd1026/work/skripte/master/parallel/prog/mpi/small_prog/a.out:main+0x20 > >/home/fd1026/work/skripte/master/parallel/prog/mpi/small_prog/a.out:_start+0x7c > >[tyr:24880] *** End of error message *** > >[tyr:24876] *** Process received signal *** > >[tyr:24876] Signal: Bus Error (10) > >[tyr:24876] Signal code: Invalid address alignment (1) > >[tyr:24876] Failing at address: ffffffff7bd1c10c > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:opal_backtrace_print+0x2c > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0.0.0:0xdc2d4 > >/lib/sparcv9/libc.so.1:0xd8b98 > >/lib/sparcv9/libc.so.1:0xcc70c > >/lib/sparcv9/libc.so.1:0xcc918 > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_pmix_native.so:0x10684 > > [ Signal 10 (BUS)] > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/openmpi/mca_ess_pmi.so:0x33dc > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libopen-rte.so.0.0.0:orte_init+0x67c > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:ompi_mpi_init+0x374 > >/export2/prog/SunOS_sparc/openmpi-1.9.0_64_gcc/lib64/libmpi.so.0.0.0:PMPI_Init+0x2a8 > >/home/fd1026/work/skripte/master/parallel/prog/mpi/small_prog/a.out:main+0x20 > >/home/fd1026/work/skripte/master/parallel/prog/mpi/small_prog/a.out:_start+0x7c > >[tyr:24876] *** End of error message *** > >-------------------------------------------------------------------------- > >mpiexec noticed that process rank 2 with PID 0 on node tyr exited on signal > >10 (Bus Error). > >-------------------------------------------------------------------------- > >[LWP 2 exited] > >[New Thread 2 ] > >[Switching to Thread 1 (LWP 1)] > >sol_thread_fetch_registers: td_ta_map_id2thr: no thread can be found to > >satisfy query > >(gdb) bt > >#0 0xffffffff7f6173d0 in rtld_db_dlactivity () from /usr/lib/sparcv9/ld.so.1 > >#1 0xffffffff7f6175a8 in rd_event () from /usr/lib/sparcv9/ld.so.1 > >#2 0xffffffff7f618950 in lm_delete () from /usr/lib/sparcv9/ld.so.1 > >#3 0xffffffff7f6226bc in remove_so () from /usr/lib/sparcv9/ld.so.1 > >#4 0xffffffff7f624574 in remove_hdl () from /usr/lib/sparcv9/ld.so.1 > >#5 0xffffffff7f61d97c in dlclose_core () from /usr/lib/sparcv9/ld.so.1 > >#6 0xffffffff7f61d9d4 in dlclose_intn () from /usr/lib/sparcv9/ld.so.1 > >#7 0xffffffff7f61db0c in dlclose () from /usr/lib/sparcv9/ld.so.1 > >#8 0xffffffff7ec87ca0 in vm_close () > > from /usr/local/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0 > >#9 0xffffffff7ec85274 in lt_dlclose () > > from /usr/local/openmpi-1.9.0_64_gcc/lib64/libopen-pal.so.0 > >#10 0xffffffff7ecaa5dc in ri_destructor (obj=0x100187ae0) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_component_repository.c:382 > >#11 0xffffffff7eca8fd8 in opal_obj_run_destructors (object=0x100187ae0) > > at ../../../../openmpi-dev-178-ga16c1e4/opal/class/opal_object.h:446 > >#12 0xffffffff7eca9eac in mca_base_component_repository_release ( > > component=0xffffffff7b0236f0 <mca_oob_tcp_component>) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_component_repository.c:240 > >#13 0xffffffff7ecac17c in mca_base_component_unload ( > > component=0xffffffff7b0236f0 <mca_oob_tcp_component>, output_id=-1) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:47 > >#14 0xffffffff7ecac210 in mca_base_component_close ( > > component=0xffffffff7b0236f0 <mca_oob_tcp_component>, output_id=-1) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:60 > >#15 0xffffffff7ecac2e4 in mca_base_components_close (output_id=-1, > > components=0xffffffff7f14bc58 <orte_oob_base_framework+80>, skip=0x0) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:86 > >#16 0xffffffff7ecac24c in mca_base_framework_components_close ( > > framework=0xffffffff7f14bc08 <orte_oob_base_framework>, skip=0x0) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_components_close.c:66 > >#17 0xffffffff7efcaf80 in orte_oob_base_close () > > at > > ../../../../openmpi-dev-178-ga16c1e4/orte/mca/oob/base/oob_base_frame.c:112 > >#18 0xffffffff7ecc0d74 in mca_base_framework_close ( > > framework=0xffffffff7f14bc08 <orte_oob_base_framework>) > > at > > ../../../../openmpi-dev-178-ga16c1e4/opal/mca/base/mca_base_framework.c:187 > >#19 0xffffffff7bd07858 in rte_finalize () > > at > > ../../../../../openmpi-dev-178-ga16c1e4/orte/mca/ess/hnp/ess_hnp_module.c:857 > >#20 0xffffffff7ef338bc in orte_finalize () > > at ../../openmpi-dev-178-ga16c1e4/orte/runtime/orte_finalize.c:66 > >#21 0x000000010000723c in orterun (argc=4, argv=0xffffffff7fffe0e8) > > at ../../../../openmpi-dev-178-ga16c1e4/orte/tools/orterun/orterun.c:1103 > >#22 0x0000000100003e80 in main (argc=4, argv=0xffffffff7fffe0e8) > > at ../../../../openmpi-dev-178-ga16c1e4/orte/tools/orterun/main.c:13 > >(gdb) > > > > > > > >Do you need any other information? > > > > > >Kind regards > > > >Siegmar >