Please find below the requested (huge) trace. COORDINATOR
[root@acme11 dmtcp]# dmtcp_coordinator [23875] TRACE at dmtcp_coordinator.cpp:1844 in main; REASON='New DMTCP coordinator starting.' UniquePid::ThisProcess() = 1d64b124afe30f29-23875-56253d4e dmtcp_coordinator starting... Host: acme11.ciemat.es (172.17.31.157) Port: 7779 Checkpoint Interval: disabled (checkpoint manually instead) Exit on last client: 0 Type '?' for help. [23875] TRACE at dmtcp_coordinator.cpp:962 in onConnect; REASON='accepting new connection' remote.sockfd() = 5 (strerror((*__errno_location ()))) = Success [23875] TRACE at dmtcp_coordinator.cpp:971 in onConnect; REASON='Reading from incoming connection...' [23875] TRACE at dmtcp_coordinator.cpp:1263 in validateNewWorkerProcess; REASON='First process connected. Creating new computation group.' compId = 1d64b124afe30f29-40000-56253d73 [23875] NOTE at dmtcp_coordinator.cpp:1661 in updateCheckpointInterval; REASON='CheckpointInterval updated (for this computation only)' oldInterval = 0 theCheckpointInterval = 0 [23875] NOTE at dmtcp_coordinator.cpp:1079 in onConnect; REASON='worker connected' hello_remote.from = 1d64b124afe30f29-23920-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:1084 in onConnect; REASON='END' clients.size() = 1 [23875] NOTE at dmtcp_coordinator.cpp:867 in onData; REASON='Updating process Information after exec()' progname = mpirun_rsh msg.from = 1d64b124afe30f29-40000-56253d73 client->identity() = 1d64b124afe30f29-23920-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:789 in onData; REASON='got DMT_OK message' oldState = WorkerState::RUNNING msg.from = 1d64b124afe30f29-40000-56253d73 msg.state = WorkerState::RUNNING newState = WorkerState::RUNNING [23875] TRACE at dmtcp_coordinator.cpp:962 in onConnect; REASON='accepting new connection' remote.sockfd() = 6 (strerror((*__errno_location ()))) = Success [23875] TRACE at dmtcp_coordinator.cpp:971 in onConnect; REASON='Reading from incoming connection...' [23875] TRACE at dmtcp_coordinator.cpp:1266 in validateNewWorkerProcess; REASON='New process connected' hello_remote.from = 1d64b124afe30f29-40000-56253d73 client->virtualPid() = 41000 [23875] NOTE at dmtcp_coordinator.cpp:1079 in onConnect; REASON='worker connected' hello_remote.from = 1d64b124afe30f29-40000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:1084 in onConnect; REASON='END' clients.size() = 2 [23875] TRACE at dmtcp_coordinator.cpp:962 in onConnect; REASON='accepting new connection' remote.sockfd() = 7 (strerror((*__errno_location ()))) = Success [23875] TRACE at dmtcp_coordinator.cpp:971 in onConnect; REASON='Reading from incoming connection...' [23875] TRACE at dmtcp_coordinator.cpp:1266 in validateNewWorkerProcess; REASON='New process connected' hello_remote.from = 1d64b124afe30f29-40000-56253d73 client->virtualPid() = 42000 [23875] NOTE at dmtcp_coordinator.cpp:1079 in onConnect; REASON='worker connected' hello_remote.from = 1d64b124afe30f29-40000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:1084 in onConnect; REASON='END' clients.size() = 3 [23875] NOTE at dmtcp_coordinator.cpp:858 in onData; REASON='Updating process Information after fork()' client->hostname() = acme11.ciemat.es client->progname() = mpirun_rsh_(forked) msg.from = 1d64b124afe30f29-41000-56253d73 client->identity() = 1d64b124afe30f29-40000-56253d73 [23875] NOTE at dmtcp_coordinator.cpp:858 in onData; REASON='Updating process Information after fork()' client->hostname() = acme11.ciemat.es client->progname() = mpirun_rsh_(forked) msg.from = 1d64b124afe30f29-42000-56253d73 client->identity() = 1d64b124afe30f29-40000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:962 in onConnect; REASON='accepting new connection' remote.sockfd() = 8 (strerror((*__errno_location ()))) = Success [23875] TRACE at dmtcp_coordinator.cpp:971 in onConnect; REASON='Reading from incoming connection...' [23875] TRACE at dmtcp_coordinator.cpp:1266 in validateNewWorkerProcess; REASON='New process connected' hello_remote.from = 1d64b124afe30f29-41000-56253d73 client->virtualPid() = 43000 [23875] NOTE at dmtcp_coordinator.cpp:1079 in onConnect; REASON='worker connected' hello_remote.from = 1d64b124afe30f29-41000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:1084 in onConnect; REASON='END' clients.size() = 4 [23875] NOTE at dmtcp_coordinator.cpp:858 in onData; REASON='Updating process Information after fork()' client->hostname() = acme11.ciemat.es client->progname() = dmtcp_ssh_(forked) msg.from = 1d64b124afe30f29-43000-56253d73 client->identity() = 1d64b124afe30f29-41000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:962 in onConnect; REASON='accepting new connection' remote.sockfd() = 9 (strerror((*__errno_location ()))) = Success [23875] TRACE at dmtcp_coordinator.cpp:971 in onConnect; REASON='Reading from incoming connection...' [23875] TRACE at dmtcp_coordinator.cpp:1266 in validateNewWorkerProcess; REASON='New process connected' hello_remote.from = 1d64b124afe30f29-42000-56253d73 client->virtualPid() = 44000 [23875] NOTE at dmtcp_coordinator.cpp:1079 in onConnect; REASON='worker connected' hello_remote.from = 1d64b124afe30f29-42000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:1084 in onConnect; REASON='END' clients.size() = 5 [23875] NOTE at dmtcp_coordinator.cpp:858 in onData; REASON='Updating process Information after fork()' client->hostname() = acme11.ciemat.es client->progname() = dmtcp_ssh_(forked) msg.from = 1d64b124afe30f29-44000-56253d73 client->identity() = 1d64b124afe30f29-42000-56253d73 [23875] NOTE at dmtcp_coordinator.cpp:917 in onDisconnect; REASON='client disconnected' client->identity() = 1d64b124afe30f29-43000-56253d73 [23875] NOTE at dmtcp_coordinator.cpp:917 in onDisconnect; REASON='client disconnected' client->identity() = 1d64b124afe30f29-44000-56253d73 [23875] NOTE at dmtcp_coordinator.cpp:867 in onData; REASON='Updating process Information after exec()' progname = dmtcp_ssh msg.from = 1d64b124afe30f29-41000-56253d73 client->identity() = 1d64b124afe30f29-41000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:789 in onData; REASON='got DMT_OK message' oldState = WorkerState::RUNNING msg.from = 1d64b124afe30f29-41000-56253d73 msg.state = WorkerState::RUNNING newState = WorkerState::RUNNING [23875] NOTE at dmtcp_coordinator.cpp:867 in onData; REASON='Updating process Information after exec()' progname = dmtcp_ssh msg.from = 1d64b124afe30f29-42000-56253d73 client->identity() = 1d64b124afe30f29-42000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:789 in onData; REASON='got DMT_OK message' oldState = WorkerState::RUNNING msg.from = 1d64b124afe30f29-42000-56253d73 msg.state = WorkerState::RUNNING newState = WorkerState::RUNNING [23875] TRACE at dmtcp_coordinator.cpp:962 in onConnect; REASON='accepting new connection' remote.sockfd() = 8 (strerror((*__errno_location ()))) = Success [23875] TRACE at dmtcp_coordinator.cpp:971 in onConnect; REASON='Reading from incoming connection...' [23875] TRACE at dmtcp_coordinator.cpp:1266 in validateNewWorkerProcess; REASON='New process connected' hello_remote.from = 1d64b124afe30f29-23944-56253d73 client->virtualPid() = 45000 [23875] NOTE at dmtcp_coordinator.cpp:1079 in onConnect; REASON='worker connected' hello_remote.from = 1d64b124afe30f29-23944-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:1084 in onConnect; REASON='END' clients.size() = 4 [23875] NOTE at dmtcp_coordinator.cpp:867 in onData; REASON='Updating process Information after exec()' progname = dmtcp_sshd msg.from = 1d64b124afe30f29-45000-56253d73 client->identity() = 1d64b124afe30f29-23944-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:789 in onData; REASON='got DMT_OK message' oldState = WorkerState::RUNNING msg.from = 1d64b124afe30f29-45000-56253d73 msg.state = WorkerState::RUNNING newState = WorkerState::RUNNING [23875] TRACE at dmtcp_coordinator.cpp:962 in onConnect; REASON='accepting new connection' remote.sockfd() = 9 (strerror((*__errno_location ()))) = Success [23875] TRACE at dmtcp_coordinator.cpp:971 in onConnect; REASON='Reading from incoming connection...' [23875] TRACE at dmtcp_coordinator.cpp:1266 in validateNewWorkerProcess; REASON='New process connected' hello_remote.from = 1d64b124afe30f29-45000-56253d73 client->virtualPid() = 46000 [23875] NOTE at dmtcp_coordinator.cpp:1079 in onConnect; REASON='worker connected' hello_remote.from = 1d64b124afe30f29-45000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:1084 in onConnect; REASON='END' clients.size() = 5 [23875] NOTE at dmtcp_coordinator.cpp:858 in onData; REASON='Updating process Information after fork()' client->hostname() = acme11.ciemat.es client->progname() = dmtcp_sshd_(forked) msg.from = 1d64b124afe30f29-46000-56253d73 client->identity() = 1d64b124afe30f29-45000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:962 in onConnect; REASON='accepting new connection' remote.sockfd() = 10 (strerror((*__errno_location ()))) = Success [23875] TRACE at dmtcp_coordinator.cpp:971 in onConnect; REASON='Reading from incoming connection...' [23875] TRACE at dmtcp_coordinator.cpp:1266 in validateNewWorkerProcess; REASON='New process connected' hello_remote.from = 1b69d09fb3238b30-15072-56253d73 client->virtualPid() = 47000 [23875] NOTE at dmtcp_coordinator.cpp:1079 in onConnect; REASON='worker connected' hello_remote.from = 1b69d09fb3238b30-15072-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:1084 in onConnect; REASON='END' clients.size() = 6 [23875] TRACE at dmtcp_coordinator.cpp:962 in onConnect; REASON='accepting new connection' remote.sockfd() = 11 (strerror((*__errno_location ()))) = Success [23875] TRACE at dmtcp_coordinator.cpp:971 in onConnect; REASON='Reading from incoming connection...' [23875] TRACE at dmtcp_coordinator.cpp:1266 in validateNewWorkerProcess; REASON='New process connected' hello_remote.from = 1d64b124afe30f29-46000-56253d73 client->virtualPid() = 48000 [23875] NOTE at dmtcp_coordinator.cpp:1079 in onConnect; REASON='worker connected' hello_remote.from = 1d64b124afe30f29-46000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:1084 in onConnect; REASON='END' clients.size() = 7 [23875] NOTE at dmtcp_coordinator.cpp:858 in onData; REASON='Updating process Information after fork()' client->hostname() = acme11.ciemat.es client->progname() = mpispawn_(forked) msg.from = 1d64b124afe30f29-48000-56253d73 client->identity() = 1d64b124afe30f29-46000-56253d73 [23875] NOTE at dmtcp_coordinator.cpp:867 in onData; REASON='Updating process Information after exec()' progname = env msg.from = 1d64b124afe30f29-46000-56253d73 client->identity() = 1d64b124afe30f29-46000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:789 in onData; REASON='got DMT_OK message' oldState = WorkerState::RUNNING msg.from = 1d64b124afe30f29-46000-56253d73 msg.state = WorkerState::RUNNING newState = WorkerState::RUNNING [23875] NOTE at dmtcp_coordinator.cpp:867 in onData; REASON='Updating process Information after exec()' progname = mpispawn msg.from = 1d64b124afe30f29-46000-56253d73 client->identity() = 1d64b124afe30f29-46000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:789 in onData; REASON='got DMT_OK message' oldState = WorkerState::RUNNING msg.from = 1d64b124afe30f29-46000-56253d73 msg.state = WorkerState::RUNNING newState = WorkerState::RUNNING [23875] NOTE at dmtcp_coordinator.cpp:867 in onData; REASON='Updating process Information after exec()' progname = helloWorldMPI msg.from = 1d64b124afe30f29-48000-56253d73 client->identity() = 1d64b124afe30f29-48000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:789 in onData; REASON='got DMT_OK message' oldState = WorkerState::RUNNING msg.from = 1d64b124afe30f29-48000-56253d73 msg.state = WorkerState::RUNNING newState = WorkerState::RUNNING [23875] NOTE at dmtcp_coordinator.cpp:867 in onData; REASON='Updating process Information after exec()' progname = dmtcp_sshd msg.from = 1b69d09fb3238b30-47000-56253d74 client->identity() = 1b69d09fb3238b30-15072-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:789 in onData; REASON='got DMT_OK message' oldState = WorkerState::RUNNING msg.from = 1b69d09fb3238b30-47000-56253d74 msg.state = WorkerState::RUNNING newState = WorkerState::RUNNING [23875] TRACE at dmtcp_coordinator.cpp:962 in onConnect; REASON='accepting new connection' remote.sockfd() = 12 (strerror((*__errno_location ()))) = Success [23875] TRACE at dmtcp_coordinator.cpp:971 in onConnect; REASON='Reading from incoming connection...' [23875] TRACE at dmtcp_coordinator.cpp:1266 in validateNewWorkerProcess; REASON='New process connected' hello_remote.from = 1b69d09fb3238b30-47000-56253d74 client->virtualPid() = 49000 [23875] NOTE at dmtcp_coordinator.cpp:1079 in onConnect; REASON='worker connected' hello_remote.from = 1b69d09fb3238b30-47000-56253d74 [23875] TRACE at dmtcp_coordinator.cpp:1084 in onConnect; REASON='END' clients.size() = 8 [23875] NOTE at dmtcp_coordinator.cpp:858 in onData; REASON='Updating process Information after fork()' client->hostname() = acme12.ciemat.es client->progname() = dmtcp_sshd_(forked) msg.from = 1b69d09fb3238b30-49000-56253d74 client->identity() = 1b69d09fb3238b30-47000-56253d74 [23875] TRACE at dmtcp_coordinator.cpp:962 in onConnect; REASON='accepting new connection' remote.sockfd() = 13 (strerror((*__errno_location ()))) = Success [23875] TRACE at dmtcp_coordinator.cpp:971 in onConnect; REASON='Reading from incoming connection...' [23875] TRACE at dmtcp_coordinator.cpp:1266 in validateNewWorkerProcess; REASON='New process connected' hello_remote.from = 1b69d09fb3238b30-49000-56253d74 client->virtualPid() = 50000 [23875] NOTE at dmtcp_coordinator.cpp:1079 in onConnect; REASON='worker connected' hello_remote.from = 1b69d09fb3238b30-49000-56253d74 [23875] TRACE at dmtcp_coordinator.cpp:1084 in onConnect; REASON='END' clients.size() = 9 [23875] NOTE at dmtcp_coordinator.cpp:858 in onData; REASON='Updating process Information after fork()' client->hostname() = acme12.ciemat.es client->progname() = mpispawn_(forked) msg.from = 1b69d09fb3238b30-50000-56253d74 client->identity() = 1b69d09fb3238b30-49000-56253d74 [23875] NOTE at dmtcp_coordinator.cpp:867 in onData; REASON='Updating process Information after exec()' progname = env msg.from = 1b69d09fb3238b30-49000-56253d74 client->identity() = 1b69d09fb3238b30-49000-56253d74 [23875] TRACE at dmtcp_coordinator.cpp:789 in onData; REASON='got DMT_OK message' oldState = WorkerState::RUNNING msg.from = 1b69d09fb3238b30-49000-56253d74 msg.state = WorkerState::RUNNING newState = WorkerState::RUNNING [23875] NOTE at dmtcp_coordinator.cpp:867 in onData; REASON='Updating process Information after exec()' progname = mpispawn msg.from = 1b69d09fb3238b30-49000-56253d74 client->identity() = 1b69d09fb3238b30-49000-56253d74 [23875] TRACE at dmtcp_coordinator.cpp:789 in onData; REASON='got DMT_OK message' oldState = WorkerState::RUNNING msg.from = 1b69d09fb3238b30-49000-56253d74 msg.state = WorkerState::RUNNING newState = WorkerState::RUNNING [23875] NOTE at dmtcp_coordinator.cpp:867 in onData; REASON='Updating process Information after exec()' progname = helloWorldMPI msg.from = 1b69d09fb3238b30-50000-56253d74 client->identity() = 1b69d09fb3238b30-50000-56253d74 [23875] TRACE at dmtcp_coordinator.cpp:789 in onData; REASON='got DMT_OK message' oldState = WorkerState::RUNNING msg.from = 1b69d09fb3238b30-50000-56253d74 msg.state = WorkerState::RUNNING newState = WorkerState::RUNNING [23875] NOTE at dmtcp_coordinator.cpp:917 in onDisconnect; REASON='client disconnected' client->identity() = 1d64b124afe30f29-48000-56253d73 [23875] NOTE at dmtcp_coordinator.cpp:917 in onDisconnect; REASON='client disconnected' client->identity() = 1b69d09fb3238b30-50000-56253d74 [23875] NOTE at dmtcp_coordinator.cpp:917 in onDisconnect; REASON='client disconnected' client->identity() = 1d64b124afe30f29-46000-56253d73 [23875] NOTE at dmtcp_coordinator.cpp:917 in onDisconnect; REASON='client disconnected' client->identity() = 1d64b124afe30f29-45000-56253d73 [23875] NOTE at dmtcp_coordinator.cpp:917 in onDisconnect; REASON='client disconnected' client->identity() = 1b69d09fb3238b30-49000-56253d74 [23875] NOTE at dmtcp_coordinator.cpp:917 in onDisconnect; REASON='client disconnected' client->identity() = 1b69d09fb3238b30-47000-56253d74 [23875] NOTE at dmtcp_coordinator.cpp:917 in onDisconnect; REASON='client disconnected' client->identity() = 1d64b124afe30f29-41000-56253d73 [23875] NOTE at dmtcp_coordinator.cpp:917 in onDisconnect; REASON='client disconnected' client->identity() = 1d64b124afe30f29-42000-56253d73 [23875] NOTE at dmtcp_coordinator.cpp:917 in onDisconnect; REASON='client disconnected' client->identity() = 1d64b124afe30f29-40000-56253d73 [23875] TRACE at dmtcp_coordinator.cpp:892 in removeStaleSharedAreaFile; REASON='Removing sharedArea file.' o.str() = /tmp/dmtcp-r...@acme11.ciemat.es/dmtcpSharedArea.1d64b124afe30f29-40000-56253d73.56253d731 EXECUTION: [root@acme11 tests]# dmtcp_launch -h acme11 -p 7779 --ib mpirun_rsh -n 2 acme11 acme12 ./helloWorldMPI [23920] TRACE at dmtcp_launch.cpp:440 in main; REASON='dmtcp_launch starting new program:' argv[0] = mpirun_rsh [23920] TRACE at dmtcp_launch.cpp:454 in main; REASON='setting DMTCP_CHECKPOINT_DIR' ckptDir = /home/slurm/tests [23920] TRACE at coordinatorapi.cpp:536 in connectToCoordOnStartup; REASON='sending coordinator handshake' UniquePid::ThisProcess() = 1d64b124afe30f29-23920-56253d73 [23920] TRACE at coordinatorapi.cpp:543 in connectToCoordOnStartup; REASON='Got virtual pid from coordinator' hello_remote.virtualPid = 40000 [23920] TRACE at shareddata.cpp:193 in initialize; REASON='Shared area mapped' sharedDataHeader = 0x7f8f3a61b000 [23920] TRACE at dmtcp_launch.cpp:769 in setLDPreloadLibs; REASON='getting value of LD_PRELOAD' getenv("LD_PRELOAD") = /home/localsoft/dmtcp/lib/dmtcp/libdmtcp_infiniband.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_alloc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_dl.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_ipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_svipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_timer.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_pid.so: preloadLibs = /home/localsoft/dmtcp/lib/dmtcp/libdmtcp_infiniband.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_alloc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_dl.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_ipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_svipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_timer.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_pid.so: preloadLibs32 = libdmtcp_infiniband.so:libdmtcp_alloc.so:libdmtcp_dl.so:libdmtcp_ipc.so:libdmtcp_svipc.so:libdmtcp_timer.so:libdmtcp.so:libdmtcp_pid.so: [40000] TRACE at shareddata.cpp:193 in initialize; REASON='Shared area mapped' sharedDataHeader = 0x7ff0faceb000 [40000] TRACE at dmtcpworker.cpp:260 in prepareLogAndProcessdDataFromSerialFile; REASON='Root of processes tree' [40000] TRACE at dmtcpworker.cpp:315 in DmtcpWorker; REASON='libdmtcp.so: Running ' jalib::Filesystem::GetProgramName() = mpirun_rsh getenv ("LD_PRELOAD") = /home/localsoft/dmtcp/lib/dmtcp/libdmtcp_infiniband.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_alloc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_dl.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_ipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_svipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_timer.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_pid.so: [40000] TRACE at dmtcpworker.cpp:111 in restoreUserLDPRELOAD; REASON='LD_PRELOAD' preload = userPreload = [40000] TRACE at coordinatorapi.cpp:118 in init; REASON='Informing coordinator of new process' UniquePid::ThisProcess() = 1d64b124afe30f29-40000-56253d73 [40000] TRACE at processinfo.cpp:180 in growStack; REASON='Original stack area' (void*)area.addr = 0x7fff8681d000 area.size = 135168 [40000] TRACE at processinfo.cpp:218 in growStack; REASON='New stack size' (void*)area.addr = 0x7fff86057000 area.size = 8286208 [40000] TRACE at fileconnlist.cpp:385 in scanForPreExisting; REASON='scanning pre-existing device' fd = 0 device = /dev/pts/1 [40000] TRACE at fileconnection.cpp:251 in PtyConnection; REASON='creating CTTY connection' _ptsName = /dev/pts/1 _virtPtsName = /dev/pts/v0 [40000] TRACE at fileconnlist.cpp:385 in scanForPreExisting; REASON='scanning pre-existing device' fd = 1 device = /dev/pts/1 [40000] TRACE at fileconnlist.cpp:385 in scanForPreExisting; REASON='scanning pre-existing device' fd = 2 device = /dev/pts/1 [40000] TRACE at socketconnlist.cpp:172 in scanForPreExisting; REASON='scanning pre-existing device' fd = 0 device = /dev/pts/1 [40000] TRACE at socketconnlist.cpp:172 in scanForPreExisting; REASON='scanning pre-existing device' fd = 1 device = /dev/pts/1 [40000] TRACE at socketconnlist.cpp:172 in scanForPreExisting; REASON='scanning pre-existing device' fd = 2 device = /dev/pts/1 [40000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23920 th->virtual_tid = 40000 [40000] TRACE at pid_miscwrappers.cpp:148 in __clone; REASON='Calling libc:__clone' [40000] TRACE at pid_miscwrappers.cpp:157 in __clone; REASON='New thread created' tid = 23923 [40000] TRACE at pid_miscwrappers.cpp:108 in clone_start; REASON='Calling user function' virtualTid = 40002 [40000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23923 th->virtual_tid = 40002 [40000] TRACE at threadwrappers.cpp:67 in clone_start; REASON='Calling user function' dmtcp_gettid() = 40002 [40000] TRACE at threadlist.cpp:329 in checkpointhread; REASON='after sigsetjmp/getcontext' curThread->tid = 23923 curThread->virtual_tid = 40002 curThread->saved_sp = 0x7ff0fa2e52e0 [40000] TRACE at threadlist.cpp:348 in checkpointhread; REASON='before callbackSleepBetweenCheckpoint(0)' [40000] TRACE at dmtcpworker.cpp:509 in waitForStage1Suspend; REASON='running' [40000] TRACE at dmtcpworker.cpp:468 in waitForCoordinatorMsg; REASON='waiting for SUSPEND message' [40000] TRACE at pid_miscwrappers.cpp:148 in __clone; REASON='Calling libc:__clone' [40000] TRACE at pid_miscwrappers.cpp:157 in __clone; REASON='New thread created' tid = 23924 [40000] TRACE at pid_miscwrappers.cpp:108 in clone_start; REASON='Calling user function' virtualTid = 40004 [40000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23924 th->virtual_tid = 40004 [40000] TRACE at threadwrappers.cpp:67 in clone_start; REASON='Calling user function' dmtcp_gettid() = 40004 [40000] TRACE at socketwrappers.cpp:59 in socket; REASON='socket created' ret = 3 domain = 2 type = 1 protocol = 6 [40000] TRACE at socketconnection.cpp:195 in TcpConnection; REASON='Creating TcpConnection.' id() = 1d64b124afe30f29-40000-56253d73(99002) domain = 2 type = 1 protocol = 6 [40000] TRACE at socketwrappers.cpp:141 in bind; REASON='bind' sockfd = 3 con->id() = 1d64b124afe30f29-40000-56253d73(99002) [40000] TRACE at socketwrappers.cpp:159 in listen; REASON='listen' sockfd = 3 con->id() = 1d64b124afe30f29-40000-56253d73(99002) backlog = 2 [40000] TRACE at coordinatorapi.cpp:575 in createNewConnectionBeforeFork; REASON='Got virtual pid from coordinator' hello_remote.virtualPid = 41000 [40000] TRACE at processinfo.cpp:393 in insertChild; REASON='Creating new virtualPid -> realPid mapping.' pid = 41000 uniquePid = 1d64b124afe30f29-41000-56253d73 [40000] TRACE at execwrappers.cpp:205 in fork; REASON='fork()ed [PARENT] done' child = 1d64b124afe30f29-41000-56253d73 [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 5 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 [40000] TRACE at coordinatorapi.cpp:575 in createNewConnectionBeforeFork; REASON='Got virtual pid from coordinator' hello_remote.virtualPid = 42000 [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 5 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 [41000] TRACE at uniquepid.cpp:178 in resetOnFork; REASON='Explicitly setting process UniquePid' newId = 1d64b124afe30f29-41000-56253d73 [40000] TRACE at processinfo.cpp:393 in insertChild; REASON='Creating new virtualPid -> realPid mapping.' pid = 42000 uniquePid = 1d64b124afe30f29-42000-56253d73 [40000] TRACE at execwrappers.cpp:205 in fork; REASON='fork()ed [PARENT] done' child = 1d64b124afe30f29-42000-56253d73 [40000] TRACE at pid_miscwrappers.cpp:148 in __clone; REASON='Calling libc:__clone' [40000] TRACE at pid_miscwrappers.cpp:157 in __clone; REASON='New thread created' tid = 23927 [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 6 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 42000 -> 23926 [40000] TRACE at pid_miscwrappers.cpp:108 in clone_start; REASON='Calling user function' virtualTid = 40006 [41000] TRACE at execwrappers.cpp:157 in pthread_atfork_child; REASON='fork()ed [CHILD]' child = 1d64b124afe30f29-41000-56253d73 parent = 1d64b124afe30f29-40000-56253d73 [40000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23927 th->virtual_tid = 40006 [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 6 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 42000 -> 23926 [41000] TRACE at coordinatorapi.cpp:140 in resetOnFork; REASON='Informing coordinator of new process' UniquePid::ThisProcess() = 1d64b124afe30f29-41000-56253d73 [40000] TRACE at threadwrappers.cpp:67 in clone_start; REASON='Calling user function' dmtcp_gettid() = 40006 [42000] TRACE at uniquepid.cpp:178 in resetOnFork; REASON='Explicitly setting process UniquePid' newId = 1d64b124afe30f29-42000-56253d73 [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 5 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 [42000] TRACE at execwrappers.cpp:157 in pthread_atfork_child; REASON='fork()ed [CHILD]' child = 1d64b124afe30f29-42000-56253d73 parent = 1d64b124afe30f29-40000-56253d73 [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 5 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 [42000] TRACE at coordinatorapi.cpp:140 in resetOnFork; REASON='Informing coordinator of new process' UniquePid::ThisProcess() = 1d64b124afe30f29-42000-56253d73 [41000] TRACE at dmtcpworker.cpp:389 in cleanupWorker; REASON='disconnecting from dmtcp coordinator' [41000] TRACE at threadlist.cpp:866 in threadIsDead; REASON='Putting thread on freelist' thread->tid = 23924 [41000] TRACE at threadlist.cpp:866 in threadIsDead; REASON='Putting thread on freelist' thread->tid = 23923 [41000] TRACE at threadlist.cpp:866 in threadIsDead; REASON='Putting thread on freelist' thread->tid = 23920 [41000] TRACE at execwrappers.cpp:199 in fork; REASON='fork() done [CHILD]' child = 1d64b124afe30f29-41000-56253d73 parent = 1d64b124afe30f29-40000-56253d73 [41000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23925 th->virtual_tid = 41000 [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 6 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 42000 -> 23926 [41000] TRACE at pid_miscwrappers.cpp:148 in __clone; REASON='Calling libc:__clone' [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 6 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 42000 -> 23926 [41000] TRACE at pid_miscwrappers.cpp:157 in __clone; REASON='New thread created' tid = 23928 [41000] TRACE at pid_miscwrappers.cpp:108 in clone_start; REASON='Calling user function' virtualTid = 41002 [42000] TRACE at dmtcpworker.cpp:389 in cleanupWorker; REASON='disconnecting from dmtcp coordinator' [41000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23928 th->virtual_tid = 41002 [42000] TRACE at threadlist.cpp:866 in threadIsDead; REASON='Putting thread on freelist' thread->tid = 23924 [41000] TRACE at threadwrappers.cpp:67 in clone_start; REASON='Calling user function' dmtcp_gettid() = 41002 [42000] TRACE at threadlist.cpp:866 in threadIsDead; REASON='Putting thread on freelist' thread->tid = 23923 [42000] TRACE at threadlist.cpp:866 in threadIsDead; REASON='Putting thread on freelist' thread->tid = 23920 [41000] TRACE at threadlist.cpp:329 in checkpointhread; REASON='after sigsetjmp/getcontext' curThread->tid = 23928 curThread->virtual_tid = 41002 curThread->saved_sp = 0x7ff0f9ae42e0 [42000] TRACE at execwrappers.cpp:199 in fork; REASON='fork() done [CHILD]' child = 1d64b124afe30f29-42000-56253d73 parent = 1d64b124afe30f29-40000-56253d73 [41000] TRACE at execwrappers.cpp:533 in execv; REASON='execv() wrapper, calling execve with environ' path = /usr/bin/ssh [41000] TRACE at threadlist.cpp:348 in checkpointhread; REASON='before callbackSleepBetweenCheckpoint(0)' [42000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23926 th->virtual_tid = 42000 [41000] TRACE at dmtcpworker.cpp:509 in waitForStage1Suspend; REASON='running' [41000] TRACE at dmtcpworker.cpp:468 in waitForCoordinatorMsg; REASON='waiting for SUSPEND message' [42000] TRACE at pid_miscwrappers.cpp:148 in __clone; REASON='Calling libc:__clone' [42000] TRACE at pid_miscwrappers.cpp:157 in __clone; REASON='New thread created' tid = 23929 [42000] TRACE at pid_miscwrappers.cpp:108 in clone_start; REASON='Calling user function' virtualTid = 42002 [42000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23929 th->virtual_tid = 42002 [42000] TRACE at threadwrappers.cpp:67 in clone_start; REASON='Calling user function' dmtcp_gettid() = 42002 [42000] TRACE at threadlist.cpp:329 in checkpointhread; REASON='after sigsetjmp/getcontext' curThread->tid = 23929 curThread->virtual_tid = 42002 curThread->saved_sp = 0x7ff0f9ae42e0 [42000] TRACE at threadlist.cpp:348 in checkpointhread; REASON='before callbackSleepBetweenCheckpoint(0)' [42000] TRACE at dmtcpworker.cpp:509 in waitForStage1Suspend; REASON='running' [42000] TRACE at dmtcpworker.cpp:468 in waitForCoordinatorMsg; REASON='waiting for SUSPEND message' [41000] TRACE at ssh.cpp:314 in prepareForExec; REASON='Prefix' prefix = /home/localsoft/dmtcp/bin/dmtcp_launch --coord-host 172.17.29.173 --coord-port 7779 --ckptdir /home/slurm/tests --infiniband /home/localsoft/dmtcp/bin/dmtcp_sshd [41000] NOTE at ssh.cpp:369 in prepareForExec; REASON='New ssh command' newCommand = /home/localsoft/dmtcp/bin/dmtcp_ssh /home/localsoft/dmtcp/bin/dmtcp_nocheckpoint /usr/bin/ssh -q acme11 cd /home/slurm/tests;/home/localsoft/dmtcp/bin/dmtcp_launch --coord-host 172.17.29.173 --coord-port 7779 --ckptdir /home/slurm/tests --infiniband /home/localsoft/dmtcp/bin/dmtcp_sshd /usr/bin/env MPISPAWN_MPIRUN_MPD=0 USE_LINEAR_SSH=1 MPISPAWN_MPIRUN_HOST=acme11.ciemat.es MPISPAWN_MPIRUN_HOSTIP=172.17.29.173 MPIRUN_RSH_LAUNCH=1 MPISPAWN_CHECKIN_PORT=35785 MPISPAWN_MPIRUN_PORT=35785 MPISPAWN_NNODES=2 MPISPAWN_GLOBAL_NPROCS=2 MPISPAWN_MPIRUN_ID=40000 MPISPAWN_ARGC=1 MPDMAN_KVS_TEMPLATE=kvs_885_acme11.ciemat.es_40000 MPISPAWN_LOCAL_NPROCS=1 MPISPAWN_ARGV_0='./helloWorldMPI' MPISPAWN_ARGC=1 MPISPAWN_GENERIC_ENV_COUNT=0 MPISPAWN_ID=0 MPISPAWN_WORKING_DIR=/home/slurm/tests MPISPAWN_MPIRUN_RANK_0=0 /usr/local/bin/mpispawn 0 [41000] TRACE at execwrappers.cpp:506 in execve; REASON='execve() wrapper' filename = /home/localsoft/dmtcp/bin/dmtcp_ssh [42000] TRACE at execwrappers.cpp:533 in execv; REASON='execv() wrapper, calling execve with environ' path = /usr/bin/ssh [41000] TRACE at execwrappers.cpp:266 in dmtcpPrepareForExec; REASON='Preparing for Exec' path = /home/localsoft/dmtcp/bin/dmtcp_ssh [41000] TRACE at pidwrappers.cpp:188 in tcgetpgrp; REASON='tcgetpgrp return value' fd = 4 retval = 40000 [42000] TRACE at ssh.cpp:314 in prepareForExec; REASON='Prefix' prefix = /home/localsoft/dmtcp/bin/dmtcp_launch --coord-host 172.17.29.173 --coord-port 7779 --ckptdir /home/slurm/tests --infiniband /home/localsoft/dmtcp/bin/dmtcp_sshd [41000] TRACE at processinfo.cpp:523 in refresh; REASON='CHECK GROUP PID' _gid = 40000 _fgid = 40000 _ppid = 40000 _pid = 41000 [42000] NOTE at ssh.cpp:369 in prepareForExec; REASON='New ssh command' newCommand = /home/localsoft/dmtcp/bin/dmtcp_ssh /home/localsoft/dmtcp/bin/dmtcp_nocheckpoint /usr/bin/ssh -q acme12 cd /home/slurm/tests;/home/localsoft/dmtcp/bin/dmtcp_launch --coord-host 172.17.29.173 --coord-port 7779 --ckptdir /home/slurm/tests --infiniband /home/localsoft/dmtcp/bin/dmtcp_sshd /usr/bin/env MPISPAWN_MPIRUN_MPD=0 USE_LINEAR_SSH=1 MPISPAWN_MPIRUN_HOST=acme11.ciemat.es MPISPAWN_MPIRUN_HOSTIP=172.17.29.173 MPIRUN_RSH_LAUNCH=1 MPISPAWN_CHECKIN_PORT=35785 MPISPAWN_MPIRUN_PORT=35785 MPISPAWN_NNODES=2 MPISPAWN_GLOBAL_NPROCS=2 MPISPAWN_MPIRUN_ID=40000 MPISPAWN_ARGC=1 MPDMAN_KVS_TEMPLATE=kvs_885_acme11.ciemat.es_40000 MPISPAWN_LOCAL_NPROCS=1 MPISPAWN_ARGV_0='./helloWorldMPI' MPISPAWN_ARGC=1 MPISPAWN_GENERIC_ENV_COUNT=0 MPISPAWN_ID=1 MPISPAWN_WORKING_DIR=/home/slurm/tests MPISPAWN_MPIRUN_RANK_0=1 /usr/local/bin/mpispawn 0 [42000] TRACE at execwrappers.cpp:506 in execve; REASON='execve() wrapper' filename = /home/localsoft/dmtcp/bin/dmtcp_ssh [41000] TRACE at processinfo.cpp:554 in serialize; REASON='Serialized process information' _sid = 23885 _ppid = 40000 _gid = 40000 _fgid = 40000 _isRootOfProcessTree = 0 _procname = mpirun_rsh _hostname = acme11.ciemat.es _launchCWD = /home/slurm/tests _ckptCWD = /home/slurm/tests _upid = 1d64b124afe30f29-41000-56253d73 _uppid = 1d64b124afe30f29-40000-56253d73 _compGroup = 0-0-0 _numPeers = 0 _noCoordinator = 0 _argvSize = 46 _envSize = 3567 _elfType = 1 [42000] TRACE at execwrappers.cpp:266 in dmtcpPrepareForExec; REASON='Preparing for Exec' path = /home/localsoft/dmtcp/bin/dmtcp_ssh [41000] TRACE at processinfo.cpp:565 in serialize; REASON='Serializing ChildPid Table' _childTable.size() = 0 o.filename() = [41000] TRACE at fileconnection.cpp:492 in serializeSubClass; REASON='Serializing PtyConn.' _ptsName = /dev/pts/1 _virtPtsName = /dev/pts/v0 [42000] TRACE at pidwrappers.cpp:188 in tcgetpgrp; REASON='tcgetpgrp return value' fd = 5 retval = 40000 [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 0 out.str() = SysVShm Maps Virtual -> Real [42000] TRACE at processinfo.cpp:523 in refresh; REASON='CHECK GROUP PID' _gid = 40000 _fgid = 40000 _ppid = 40000 _pid = 42000 [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 0 out.str() = SysVSem Maps Virtual -> Real [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 0 out.str() = SysVMsq Maps Virtual -> Real [42000] TRACE at processinfo.cpp:554 in serialize; REASON='Serialized process information' _sid = 23885 _ppid = 40000 _gid = 40000 _fgid = 40000 _isRootOfProcessTree = 0 _procname = mpirun_rsh _hostname = acme11.ciemat.es _launchCWD = /home/slurm/tests _ckptCWD = /home/slurm/tests _upid = 1d64b124afe30f29-42000-56253d73 _uppid = 1d64b124afe30f29-40000-56253d73 _compGroup = 0-0-0 _numPeers = 0 _noCoordinator = 0 _argvSize = 46 _envSize = 3567 _elfType = 1 [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 6 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 41002 -> 23928 [42000] TRACE at processinfo.cpp:565 in serialize; REASON='Serializing ChildPid Table' _childTable.size() = 0 o.filename() = [41000] TRACE at execwrappers.cpp:336 in dmtcpPrepareForExec; REASON='Will exec filename instead of path' path = /home/localsoft/dmtcp/bin/dmtcp_ssh *filename = /home/localsoft/dmtcp/bin/dmtcp_ssh [41000] TRACE at execwrappers.cpp:348 in dmtcpPrepareForExec; REASON='Prepared for Exec' getenv("LD_PRELOAD") = [42000] TRACE at fileconnection.cpp:492 in serializeSubClass; REASON='Serializing PtyConn.' _ptsName = /dev/pts/1 _virtPtsName = /dev/pts/v0 [42000] TRACE at fileconnection.cpp:936 in serializeSubClass; REASON='Serializing FileConn.' _path = /dev/null _rel_path = dmtcp_get_ckpt_files_subdir() = /home/slurm/tests/ckpt_mpirun_rsh_1d64b124afe30f29-42000-56253d73_files _ckpted_file = 858923069 _fcntlFlags = -1 [41000] TRACE at execwrappers.cpp:469 in patchUserEnv; REASON='Creating a copy of (non-DMTCP) user env vars...' out.str() = non-DMTCP env vars: addenv[user]:XDG_SESSION_ID=32 addenv[user]:HOSTNAME=acme11.ciemat.es addenv[user]:TERM=xterm-256color addenv[user]:SHELL=/bin/bash addenv[user]:HISTSIZE=1000 addenv[user]:SSH_CLIENT=192.101.161.229 55755 22 addenv[user]:SSH_TTY=/dev/pts/1 addenv[user]:USER=root addenv[user]:LS_COLORS=rs=0:di=38;5;27:ln=38;5;51:mh=44;38;5;15:pi=40;38;5;11:so=38;5;13:do=38;5;5:bd=48;5;232;38;5;11:cd=48;5;232;38;5;3:or=48;5;232;38;5;9:mi=05;48;5;232;38;5;15:su=48;5;196;38;5;15:sg=48;5;11;38;5;16:ca=48;5;196;38;5;226:tw=48;5;10;38;5;16:ow=48;5;10;38;5;21:st=48;5;21;38;5;15:ex=38;5;34:*.tar=38;5;9:*.tgz=38;5;9:*.arc=38;5;9:*.arj=38;5;9:*.taz=38;5;9:*.lha=38;5;9:*.lz4=38;5;9:*.lzh=38;5;9:*.lzma=38;5;9:*.tlz=38;5;9:*.txz=38;5;9:*.tzo=38;5;9:*.t7z=38;5;9:*.zip=38;5;9:*.z=38;5;9:*.Z=38;5;9:*.dz=38;5;9:*.gz=38;5;9:*.lrz=38;5;9:*.lz=38;5;9:*.lzo=38;5;9:*.xz=38;5;9:*.bz2=38;5;9:*.bz=38;5;9:*.tbz=38;5;9:*.tbz2=38;5;9:*.tz=38;5;9:*.deb=38;5;9:*.rpm=38;5;9:*.jar=38;5;9:*.war=38;5;9:*.ear=38;5;9:*.sar=38;5;9:*.rar=38;5;9:*.alz=38;5;9:*.ace=38;5;9:*.zoo=38;5;9:*.cpio=38;5;9:*.7z=38;5;9:*.rz=38;5;9:*.cab=38;5;9:*.jpg=38;5;13:*.jpeg=38;5;13:*.gif=38;5;13:*.bmp=38;5;13:*.pbm=38;5;13:*.pgm=38;5;13:*.ppm=38;5;13:*.tga=38;5;13:*.xbm=38;5;13:*.xpm=38;5;13:*.tif=38;5;13:*.tiff=38;5;13:*.png=38;5;13:*.svg=38;5;13:*.svgz=38;5;13:*.mng=38;5;13:*.pcx=38;5;13:*.mov=38;5;13:*.mpg=38;5;13:*.mpeg=38;5;13:*.m2v=38;5;13:*.mkv=38;5;13:*.webm=38;5;13:*.ogm=38;5;13:*.mp4=38;5;13:*.m4v=38;5;13:*.mp4v=38;5;13:*.vob=38;5;13:*.qt=38;5;13:*.nuv=38;5;13:*.wmv=38;5;13:*.asf=38;5;13:*.rm=38;5;13:*.rmvb=38;5;13:*.flc=38;5;13:*.avi=38;5;13:*.fli=38;5;13:*.flv=38;5;13:*.gl=38;5;13:*.dl=38;5;13:*.xcf=38;5;13:*.xwd=38;5;13:*.yuv=38;5;13:*.cgm=38;5;13:*.emf=38;5;13:*.axv=38;5;13:*.anx=38;5;13:*.ogv=38;5;13:*.ogx=38;5;13:*.aac=38;5;45:*.au=38;5;45:*.flac=38;5;45:*.mid=38;5;45:*.midi=38;5;45:*.mka=38;5;45:*.mp3=38;5;45:*.mpc=38;5;45:*.ogg=38;5;45:*.ra=38;5;45:*.wav=38;5;45:*.axa=38;5;45:*.oga=38;5;45:*.spx=38;5;45:*.xspf=38;5;45: addenv[user]:MAIL=/var/spool/mail/root addenv[user]:PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/home/localsoft/dmtcp/bin/:/home/localsoft/mvapich/bin/:/home/localsoft/slurm/soft/bin/:/home/localsoft/slurm/soft/sbin/:/root/bin addenv[user]:PWD=/home/slurm/tests addenv[user]:LANG=es_ES.UTF-8 addenv[user]:HISTCONTROL=ignoredups addenv[user]:SHLVL=1 addenv[user]:HOME=/root addenv[user]:LOGNAME=root addenv[user]:SSH_CONNECTION=192.101.161.229 55755 172.17.29.173 22 addenv[user]:LESSOPEN=||/usr/bin/lesspipe.sh %s addenv[user]:XDG_RUNTIME_DIR=/run/user/0 addenv[user]:OLDPWD=/root addenv[user]:_=/home/localsoft/dmtcp/bin/dmtcp_launch skipping: DMTCP_QUIET=0 skipping: DMTCP_CHECKPOINT_DIR=/home/slurm/tests skipping: DMTCP_VIRTUAL_PID=41000:40000:23920:############################################################# skipping: DMTCP_DLSYM_OFFSET=-1776 skipping: DMTCP_DLSYM_OFFSET_M32=0 skipping: DMTCP_HIJACK_LIBS=/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_infiniband.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_alloc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_dl.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_ipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_svipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_timer.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_pid.so: skipping: DMTCP_HIJACK_LIBS_M32=libdmtcp_infiniband.so:libdmtcp_alloc.so:libdmtcp_dl.so:libdmtcp_ipc.so:libdmtcp_svipc.so:libdmtcp_timer.so:libdmtcp.so:libdmtcp_pid.so: [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 0 out.str() = SysVShm Maps Virtual -> Real [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 0 out.str() = SysVSem Maps Virtual -> Real [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 0 out.str() = SysVMsq Maps Virtual -> Real [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 7 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 42000 -> 23926 42002 -> 23929 [41000] TRACE at execwrappers.cpp:494 in patchUserEnv; REASON='Patched user envp...' out.str() = addenv[dmtcp]:DMTCP_HIJACK_LIBS=/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_infiniband.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_alloc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_dl.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_ipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_svipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_timer.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_pid.so: addenv[dmtcp]:DMTCP_HIJACK_LIBS_M32=libdmtcp_infiniband.so:libdmtcp_alloc.so:libdmtcp_dl.so:libdmtcp_ipc.so:libdmtcp_svipc.so:libdmtcp_timer.so:libdmtcp.so:libdmtcp_pid.so: addenv[dmtcp]:DMTCP_CHECKPOINT_DIR=/home/slurm/tests addenv[dmtcp]:DMTCP_QUIET=0 addenv[dmtcp]:DMTCP_DLSYM_OFFSET=-1776 addenv[dmtcp]:DMTCP_DLSYM_OFFSET_M32=0 addenv[dmtcp]:DMTCP_VIRTUAL_PID=41000:40000:23920:############################################################# addenv[dmtcp]:LD_PRELOAD=/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_infiniband.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_alloc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_dl.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_ipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_svipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_timer.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_pid.so [42000] TRACE at execwrappers.cpp:336 in dmtcpPrepareForExec; REASON='Will exec filename instead of path' path = /home/localsoft/dmtcp/bin/dmtcp_ssh *filename = /home/localsoft/dmtcp/bin/dmtcp_ssh [42000] TRACE at execwrappers.cpp:348 in dmtcpPrepareForExec; REASON='Prepared for Exec' getenv("LD_PRELOAD") = [42000] TRACE at execwrappers.cpp:469 in patchUserEnv; REASON='Creating a copy of (non-DMTCP) user env vars...' out.str() = non-DMTCP env vars: addenv[user]:XDG_SESSION_ID=32 addenv[user]:HOSTNAME=acme11.ciemat.es addenv[user]:TERM=xterm-256color addenv[user]:SHELL=/bin/bash addenv[user]:HISTSIZE=1000 addenv[user]:SSH_CLIENT=192.101.161.229 55755 22 addenv[user]:SSH_TTY=/dev/pts/1 addenv[user]:USER=root addenv[user]:LS_COLORS=rs=0:di=38;5;27:ln=38;5;51:mh=44;38;5;15:pi=40;38;5;11:so=38;5;13:do=38;5;5:bd=48;5;232;38;5;11:cd=48;5;232;38;5;3:or=48;5;232;38;5;9:mi=05;48;5;232;38;5;15:su=48;5;196;38;5;15:sg=48;5;11;38;5;16:ca=48;5;196;38;5;226:tw=48;5;10;38;5;16:ow=48;5;10;38;5;21:st=48;5;21;38;5;15:ex=38;5;34:*.tar=38;5;9:*.tgz=38;5;9:*.arc=38;5;9:*.arj=38;5;9:*.taz=38;5;9:*.lha=38;5;9:*.lz4=38;5;9:*.lzh=38;5;9:*.lzma=38;5;9:*.tlz=38;5;9:*.txz=38;5;9:*.tzo=38;5;9:*.t7z=38;5;9:*.zip=38;5;9:*.z=38;5;9:*.Z=38;5;9:*.dz=38;5;9:*.gz=38;5;9:*.lrz=38;5;9:*.lz=38;5;9:*.lzo=38;5;9:*.xz=38;5;9:*.bz2=38;5;9:*.bz=38;5;9:*.tbz=38;5;9:*.tbz2=38;5;9:*.tz=38;5;9:*.deb=38;5;9:*.rpm=38;5;9:*.jar=38;5;9:*.war=38;5;9:*.ear=38;5;9:*.sar=38;5;9:*.rar=38;5;9:*.alz=38;5;9:*.ace=38;5;9:*.zoo=38;5;9:*.cpio=38;5;9:*.7z=38;5;9:*.rz=38;5;9:*.cab=38;5;9:*.jpg=38;5;13:*.jpeg=38;5;13:*.gif=38;5;13:*.bmp=38;5;13:*.pbm=38;5;13:*.pgm=38;5;13:*.ppm=38;5;13:*.tga=38;5;13:*.xbm=38;5;13:*.xpm=38;5;13:*.tif=38;5;13:*.tiff=38;5;13:*.png=38;5;13:*.svg=38;5;13:*.svgz=38;5;13:*.mng=38;5;13:*.pcx=38;5;13:*.mov=38;5;13:*.mpg=38;5;13:*.mpeg=38;5;13:*.m2v=38;5;13:*.mkv=38;5;13:*.webm=38;5;13:*.ogm=38;5;13:*.mp4=38;5;13:*.m4v=38;5;13:*.mp4v=38;5;13:*.vob=38;5;13:*.qt=38;5;13:*.nuv=38;5;13:*.wmv=38;5;13:*.asf=38;5;13:*.rm=38;5;13:*.rmvb=38;5;13:*.flc=38;5;13:*.avi=38;5;13:*.fli=38;5;13:*.flv=38;5;13:*.gl=38;5;13:*.dl=38;5;13:*.xcf=38;5;13:*.xwd=38;5;13:*.yuv=38;5;13:*.cgm=38;5;13:*.emf=38;5;13:*.axv=38;5;13:*.anx=38;5;13:*.ogv=38;5;13:*.ogx=38;5;13:*.aac=38;5;45:*.au=38;5;45:*.flac=38;5;45:*.mid=38;5;45:*.midi=38;5;45:*.mka=38;5;45:*.mp3=38;5;45:*.mpc=38;5;45:*.ogg=38;5;45:*.ra=38;5;45:*.wav=38;5;45:*.axa=38;5;45:*.oga=38;5;45:*.spx=38;5;45:*.xspf=38;5;45: addenv[user]:MAIL=/var/spool/mail/root addenv[user]:PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/home/localsoft/dmtcp/bin/:/home/localsoft/mvapich/bin/:/home/localsoft/slurm/soft/bin/:/home/localsoft/slurm/soft/sbin/:/root/bin addenv[user]:PWD=/home/slurm/tests addenv[user]:LANG=es_ES.UTF-8 addenv[user]:HISTCONTROL=ignoredups addenv[user]:SHLVL=1 addenv[user]:HOME=/root addenv[user]:LOGNAME=root addenv[user]:SSH_CONNECTION=192.101.161.229 55755 172.17.29.173 22 addenv[user]:LESSOPEN=||/usr/bin/lesspipe.sh %s addenv[user]:XDG_RUNTIME_DIR=/run/user/0 addenv[user]:OLDPWD=/root addenv[user]:_=/home/localsoft/dmtcp/bin/dmtcp_launch skipping: DMTCP_QUIET=0 skipping: DMTCP_CHECKPOINT_DIR=/home/slurm/tests skipping: DMTCP_VIRTUAL_PID=42000:40000:23920:############################################################# skipping: DMTCP_DLSYM_OFFSET=-1776 skipping: DMTCP_DLSYM_OFFSET_M32=0 skipping: DMTCP_HIJACK_LIBS=/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_infiniband.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_alloc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_dl.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_ipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_svipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_timer.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_pid.so: skipping: DMTCP_HIJACK_LIBS_M32=libdmtcp_infiniband.so:libdmtcp_alloc.so:libdmtcp_dl.so:libdmtcp_ipc.so:libdmtcp_svipc.so:libdmtcp_timer.so:libdmtcp.so:libdmtcp_pid.so: [42000] TRACE at execwrappers.cpp:494 in patchUserEnv; REASON='Patched user envp...' out.str() = addenv[dmtcp]:DMTCP_HIJACK_LIBS=/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_infiniband.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_alloc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_dl.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_ipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_svipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_timer.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_pid.so: addenv[dmtcp]:DMTCP_HIJACK_LIBS_M32=libdmtcp_infiniband.so:libdmtcp_alloc.so:libdmtcp_dl.so:libdmtcp_ipc.so:libdmtcp_svipc.so:libdmtcp_timer.so:libdmtcp.so:libdmtcp_pid.so: addenv[dmtcp]:DMTCP_CHECKPOINT_DIR=/home/slurm/tests addenv[dmtcp]:DMTCP_QUIET=0 addenv[dmtcp]:DMTCP_DLSYM_OFFSET=-1776 addenv[dmtcp]:DMTCP_DLSYM_OFFSET_M32=0 addenv[dmtcp]:DMTCP_VIRTUAL_PID=42000:40000:23920:############################################################# addenv[dmtcp]:LD_PRELOAD=/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_infiniband.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_alloc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_dl.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_ipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_svipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_timer.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_pid.so [41000] TRACE at shareddata.cpp:193 in initialize; REASON='Shared area mapped' sharedDataHeader = 0x7f571f82a000 [41000] TRACE at processinfo.cpp:554 in serialize; REASON='Serialized process information' _sid = 23885 _ppid = 40000 _gid = 40000 _fgid = 40000 _isRootOfProcessTree = 0 _procname = mpirun_rsh _hostname = acme11.ciemat.es _launchCWD = /home/slurm/tests _ckptCWD = /home/slurm/tests _upid = 1d64b124afe30f29-41000-56253d73 _uppid = 1d64b124afe30f29-40000-56253d73 _compGroup = 0-0-0 _numPeers = 0 _noCoordinator = 0 _argvSize = 46 _envSize = 3567 _elfType = 1 [41000] TRACE at processinfo.cpp:565 in serialize; REASON='Serializing ChildPid Table' _childTable.size() = 0 o.filename() = [41000] TRACE at fileconnection.cpp:492 in serializeSubClass; REASON='Serializing PtyConn.' _ptsName = /dev/pts/1 _virtPtsName = /dev/pts/v0 [42000] TRACE at shareddata.cpp:193 in initialize; REASON='Shared area mapped' sharedDataHeader = 0x7f1cfc013000 [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 0 out.str() = SysVShm Maps Virtual -> Real [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 0 out.str() = SysVSem Maps Virtual -> Real [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 0 out.str() = SysVMsq Maps Virtual -> Real [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 6 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 41002 -> 23928 [41000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 6 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 41002 -> 23928 [42000] TRACE at processinfo.cpp:554 in serialize; REASON='Serialized process information' _sid = 23885 _ppid = 40000 _gid = 40000 _fgid = 40000 _isRootOfProcessTree = 0 _procname = mpirun_rsh _hostname = acme11.ciemat.es _launchCWD = /home/slurm/tests _ckptCWD = /home/slurm/tests _upid = 1d64b124afe30f29-42000-56253d73 _uppid = 1d64b124afe30f29-40000-56253d73 _compGroup = 0-0-0 _numPeers = 0 _noCoordinator = 0 _argvSize = 46 _envSize = 3567 _elfType = 1 [41000] TRACE at dmtcpworker.cpp:315 in DmtcpWorker; REASON='libdmtcp.so: Running ' jalib::Filesystem::GetProgramName() = dmtcp_ssh getenv ("LD_PRELOAD") = /home/localsoft/dmtcp/lib/dmtcp/libdmtcp_infiniband.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_alloc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_dl.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_ipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_svipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_timer.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_pid.so [42000] TRACE at processinfo.cpp:565 in serialize; REASON='Serializing ChildPid Table' _childTable.size() = 0 o.filename() = [41000] TRACE at dmtcpworker.cpp:111 in restoreUserLDPRELOAD; REASON='LD_PRELOAD' preload = userPreload = [41000] TRACE at coordinatorapi.cpp:118 in init; REASON='Informing coordinator of new process' UniquePid::ThisProcess() = 1d64b124afe30f29-41000-56253d73 [42000] TRACE at fileconnection.cpp:492 in serializeSubClass; REASON='Serializing PtyConn.' _ptsName = /dev/pts/1 _virtPtsName = /dev/pts/v0 [42000] TRACE at fileconnection.cpp:936 in serializeSubClass; REASON='Serializing FileConn.' _path = /dev/null _rel_path = dmtcp_get_ckpt_files_subdir() = /home/slurm/tests/ckpt_dmtcp_ssh_1d64b124afe30f29-42000-56253d73_files _ckpted_file = 858923069 _fcntlFlags = -1 [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 0 out.str() = SysVShm Maps Virtual -> Real [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 0 out.str() = SysVSem Maps Virtual -> Real [41000] TRACE at processinfo.cpp:180 in growStack; REASON='Original stack area' (void*)area.addr = 0x7ffe133b2000 area.size = 139264 [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 0 out.str() = SysVMsq Maps Virtual -> Real [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 7 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 42000 -> 23926 42002 -> 23929 [42000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 7 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 42000 -> 23926 42002 -> 23929 [42000] TRACE at dmtcpworker.cpp:315 in DmtcpWorker; REASON='libdmtcp.so: Running ' jalib::Filesystem::GetProgramName() = dmtcp_ssh getenv ("LD_PRELOAD") = /home/localsoft/dmtcp/lib/dmtcp/libdmtcp_infiniband.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_alloc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_dl.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_ipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_svipc.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_timer.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp.so:/home/localsoft/dmtcp/lib/dmtcp/libdmtcp_pid.so [42000] TRACE at dmtcpworker.cpp:111 in restoreUserLDPRELOAD; REASON='LD_PRELOAD' preload = userPreload = [42000] TRACE at coordinatorapi.cpp:118 in init; REASON='Informing coordinator of new process' UniquePid::ThisProcess() = 1d64b124afe30f29-42000-56253d73 [42000] TRACE at processinfo.cpp:180 in growStack; REASON='Original stack area' (void*)area.addr = 0x7ffc8f372000 area.size = 139264 [41000] TRACE at processinfo.cpp:218 in growStack; REASON='New stack size' (void*)area.addr = 0x7ffe12bef000 area.size = 8278016 [41000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23925 th->virtual_tid = 41000 [41000] TRACE at pid_miscwrappers.cpp:148 in __clone; REASON='Calling libc:__clone' [41000] TRACE at pid_miscwrappers.cpp:157 in __clone; REASON='New thread created' tid = 23930 [41000] TRACE at pid_miscwrappers.cpp:108 in clone_start; REASON='Calling user function' virtualTid = 41003 [41000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23930 th->virtual_tid = 41003 [41000] TRACE at threadwrappers.cpp:67 in clone_start; REASON='Calling user function' dmtcp_gettid() = 41003 [41000] TRACE at threadlist.cpp:329 in checkpointhread; REASON='after sigsetjmp/getcontext' curThread->tid = 23930 curThread->virtual_tid = 41003 curThread->saved_sp = 0x7f571ee242e0 [41000] TRACE at threadlist.cpp:348 in checkpointhread; REASON='before callbackSleepBetweenCheckpoint(0)' [41000] TRACE at dmtcpworker.cpp:509 in waitForStage1Suspend; REASON='running' [41000] TRACE at dmtcpworker.cpp:468 in waitForCoordinatorMsg; REASON='waiting for SUSPEND message' [41000] TRACE at miscwrappers.cpp:127 in pipe; REASON='promoting pipe() to socketpair()' [41000] TRACE at socketwrappers.cpp:271 in socketpair; REASON='socketpair()' sv[0] = 3 sv[1] = 4 [41000] TRACE at socketconnection.cpp:195 in TcpConnection; REASON='Creating TcpConnection.' id() = 1d64b124afe30f29-41000-56253d73(99004) domain = 1 type = 1 protocol = 0 [41000] TRACE at miscwrappers.cpp:127 in pipe; REASON='promoting pipe() to socketpair()' [41000] TRACE at socketwrappers.cpp:271 in socketpair; REASON='socketpair()' sv[0] = 5 sv[1] = 6 [41000] TRACE at socketconnection.cpp:195 in TcpConnection; REASON='Creating TcpConnection.' id() = 1d64b124afe30f29-41000-56253d73(99006) domain = 1 type = 1 protocol = 0 [41000] TRACE at miscwrappers.cpp:127 in pipe; REASON='promoting pipe() to socketpair()' [41000] TRACE at socketwrappers.cpp:271 in socketpair; REASON='socketpair()' sv[0] = 7 sv[1] = 8 [41000] TRACE at socketconnection.cpp:195 in TcpConnection; REASON='Creating TcpConnection.' id() = 1d64b124afe30f29-41000-56253d73(99008) domain = 1 type = 1 protocol = 0 [41000] TRACE at socketwrappers.cpp:59 in socket; REASON='socket created' ret = 9 domain = 2 type = 1 protocol = 0 [42000] TRACE at processinfo.cpp:218 in growStack; REASON='New stack size' (void*)area.addr = 0x7ffc8ebad000 area.size = 8286208 [41000] TRACE at socketconnection.cpp:195 in TcpConnection; REASON='Creating TcpConnection.' id() = 1d64b124afe30f29-41000-56253d73(99010) domain = 2 type = 1 protocol = 0 [41000] TRACE at socketwrappers.cpp:141 in bind; REASON='bind' sockfd = 9 con->id() = 1d64b124afe30f29-41000-56253d73(99010) [42000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23926 th->virtual_tid = 42000 [41000] TRACE at socketwrappers.cpp:159 in listen; REASON='listen' sockfd = 9 con->id() = 1d64b124afe30f29-41000-56253d73(99010) backlog = 1 [42000] TRACE at pid_miscwrappers.cpp:148 in __clone; REASON='Calling libc:__clone' [42000] TRACE at pid_miscwrappers.cpp:157 in __clone; REASON='New thread created' tid = 23931 [42000] TRACE at pid_miscwrappers.cpp:108 in clone_start; REASON='Calling user function' virtualTid = 42003 [42000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23931 th->virtual_tid = 42003 [41000] TRACE at coordinatorapi.cpp:575 in createNewConnectionBeforeFork; REASON='Got virtual pid from coordinator' hello_remote.virtualPid = 43000 [42000] TRACE at threadwrappers.cpp:67 in clone_start; REASON='Calling user function' dmtcp_gettid() = 42003 [42000] TRACE at threadlist.cpp:329 in checkpointhread; REASON='after sigsetjmp/getcontext' curThread->tid = 23931 curThread->virtual_tid = 42003 curThread->saved_sp = 0x7f1cfb60d2e0 [42000] TRACE at threadlist.cpp:348 in checkpointhread; REASON='before callbackSleepBetweenCheckpoint(0)' [42000] TRACE at dmtcpworker.cpp:509 in waitForStage1Suspend; REASON='running' [42000] TRACE at dmtcpworker.cpp:468 in waitForCoordinatorMsg; REASON='waiting for SUSPEND message' [41000] TRACE at processinfo.cpp:393 in insertChild; REASON='Creating new virtualPid -> realPid mapping.' pid = 43000 uniquePid = 1d64b124afe30f29-43000-56253d73 [41000] TRACE at execwrappers.cpp:205 in fork; REASON='fork()ed [PARENT] done' child = 1d64b124afe30f29-43000-56253d73 [43000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 8 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 41002 -> 23928 41003 -> 23930 43000 -> 23932 [42000] TRACE at miscwrappers.cpp:127 in pipe; REASON='promoting pipe() to socketpair()' [43000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 8 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 41002 -> 23928 41003 -> 23930 43000 -> 23932 [42000] TRACE at socketwrappers.cpp:271 in socketpair; REASON='socketpair()' sv[0] = 3 sv[1] = 4 [43000] TRACE at uniquepid.cpp:178 in resetOnFork; REASON='Explicitly setting process UniquePid' newId = 1d64b124afe30f29-43000-56253d73 [42000] TRACE at socketconnection.cpp:195 in TcpConnection; REASON='Creating TcpConnection.' id() = 1d64b124afe30f29-42000-56253d73(99005) domain = 1 type = 1 protocol = 0 [42000] TRACE at miscwrappers.cpp:127 in pipe; REASON='promoting pipe() to socketpair()' [42000] TRACE at socketwrappers.cpp:271 in socketpair; REASON='socketpair()' sv[0] = 5 sv[1] = 6 [42000] TRACE at socketconnection.cpp:195 in TcpConnection; REASON='Creating TcpConnection.' id() = 1d64b124afe30f29-42000-56253d73(99007) domain = 1 type = 1 protocol = 0 [42000] TRACE at miscwrappers.cpp:127 in pipe; REASON='promoting pipe() to socketpair()' [42000] TRACE at socketwrappers.cpp:271 in socketpair; REASON='socketpair()' sv[0] = 7 sv[1] = 8 [42000] TRACE at socketconnection.cpp:195 in TcpConnection; REASON='Creating TcpConnection.' id() = 1d64b124afe30f29-42000-56253d73(99009) domain = 1 type = 1 protocol = 0 [42000] TRACE at socketwrappers.cpp:59 in socket; REASON='socket created' ret = 9 domain = 2 type = 1 protocol = 0 [43000] TRACE at execwrappers.cpp:157 in pthread_atfork_child; REASON='fork()ed [CHILD]' child = 1d64b124afe30f29-43000-56253d73 parent = 1d64b124afe30f29-41000-56253d73 [42000] TRACE at socketconnection.cpp:195 in TcpConnection; REASON='Creating TcpConnection.' id() = 1d64b124afe30f29-42000-56253d73(99011) domain = 2 type = 1 protocol = 0 [43000] TRACE at coordinatorapi.cpp:140 in resetOnFork; REASON='Informing coordinator of new process' UniquePid::ThisProcess() = 1d64b124afe30f29-43000-56253d73 [42000] TRACE at socketwrappers.cpp:141 in bind; REASON='bind' sockfd = 9 con->id() = 1d64b124afe30f29-42000-56253d73(99011) [42000] TRACE at socketwrappers.cpp:159 in listen; REASON='listen' sockfd = 9 con->id() = 1d64b124afe30f29-42000-56253d73(99011) backlog = 1 [43000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 8 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 41002 -> 23928 41003 -> 23930 43000 -> 23932 [43000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 8 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 41002 -> 23928 41003 -> 23930 43000 -> 23932 [42000] TRACE at coordinatorapi.cpp:575 in createNewConnectionBeforeFork; REASON='Got virtual pid from coordinator' hello_remote.virtualPid = 44000 [43000] TRACE at dmtcpworker.cpp:389 in cleanupWorker; REASON='disconnecting from dmtcp coordinator' [43000] TRACE at threadlist.cpp:866 in threadIsDead; REASON='Putting thread on freelist' thread->tid = 23930 [43000] TRACE at threadlist.cpp:866 in threadIsDead; REASON='Putting thread on freelist' thread->tid = 23925 [43000] TRACE at execwrappers.cpp:199 in fork; REASON='fork() done [CHILD]' child = 1d64b124afe30f29-43000-56253d73 parent = 1d64b124afe30f29-41000-56253d73 [43000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23932 th->virtual_tid = 43000 [43000] TRACE at pid_miscwrappers.cpp:148 in __clone; REASON='Calling libc:__clone' [43000] TRACE at pid_miscwrappers.cpp:157 in __clone; REASON='New thread created' tid = 23934 [42000] TRACE at processinfo.cpp:393 in insertChild; REASON='Creating new virtualPid -> realPid mapping.' pid = 44000 uniquePid = 1d64b124afe30f29-44000-56253d73 [42000] TRACE at execwrappers.cpp:205 in fork; REASON='fork()ed [PARENT] done' child = 1d64b124afe30f29-44000-56253d73 [43000] TRACE at pid_miscwrappers.cpp:108 in clone_start; REASON='Calling user function' virtualTid = 43002 [43000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23934 th->virtual_tid = 43002 [43000] TRACE at threadwrappers.cpp:67 in clone_start; REASON='Calling user function' dmtcp_gettid() = 43002 [44000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 9 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 42000 -> 23926 42002 -> 23929 42003 -> 23931 44000 -> 23933 [43000] TRACE at threadlist.cpp:329 in checkpointhread; REASON='after sigsetjmp/getcontext' curThread->tid = 23934 curThread->virtual_tid = 43002 curThread->saved_sp = 0x7f571ee242e0 [44000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 9 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 42000 -> 23926 42002 -> 23929 42003 -> 23931 44000 -> 23933 [43000] TRACE at threadlist.cpp:348 in checkpointhread; REASON='before callbackSleepBetweenCheckpoint(0)' [43000] TRACE at dmtcpworker.cpp:509 in waitForStage1Suspend; REASON='running' [44000] TRACE at uniquepid.cpp:178 in resetOnFork; REASON='Explicitly setting process UniquePid' newId = 1d64b124afe30f29-44000-56253d73 [43000] TRACE at dmtcpworker.cpp:468 in waitForCoordinatorMsg; REASON='waiting for SUSPEND message' [44000] TRACE at execwrappers.cpp:157 in pthread_atfork_child; REASON='fork()ed [CHILD]' child = 1d64b124afe30f29-44000-56253d73 parent = 1d64b124afe30f29-42000-56253d73 [44000] TRACE at coordinatorapi.cpp:140 in resetOnFork; REASON='Informing coordinator of new process' UniquePid::ThisProcess() = 1d64b124afe30f29-44000-56253d73 [44000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 9 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 42000 -> 23926 42002 -> 23929 42003 -> 23931 44000 -> 23933 [44000] TRACE at virtualidtable.h:182 in printMaps; REASON='Virtual To Real Mappings:' _idMapTable.size() = 9 out.str() = Pid Maps Virtual -> Real 23885 -> 23885 40000 -> 23920 40002 -> 23923 40004 -> 23924 41000 -> 23925 42000 -> 23926 42002 -> 23929 42003 -> 23931 44000 -> 23933 [44000] TRACE at dmtcpworker.cpp:389 in cleanupWorker; REASON='disconnecting from dmtcp coordinator' [44000] TRACE at threadlist.cpp:866 in threadIsDead; REASON='Putting thread on freelist' thread->tid = 23931 [44000] TRACE at threadlist.cpp:866 in threadIsDead; REASON='Putting thread on freelist' thread->tid = 23926 [44000] TRACE at execwrappers.cpp:199 in fork; REASON='fork() done [CHILD]' child = 1d64b124afe30f29-44000-56253d73 parent = 1d64b124afe30f29-42000-56253d73 [44000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23933 th->virtual_tid = 44000 [44000] TRACE at pid_miscwrappers.cpp:148 in __clone; REASON='Calling libc:__clone' [44000] TRACE at pid_miscwrappers.cpp:157 in __clone; REASON='New thread created' tid = 23935 [44000] TRACE at pid_miscwrappers.cpp:108 in clone_start; REASON='Calling user function' virtualTid = 44002 [44000] TRACE at threadlist.cpp:228 in updateTid; REASON='starting thread' th->tid = 23935 th->virtual_tid = 44002 [44000] TRACE at threadwrappers.cpp:67 in clone_start; REASON='Calling user function' dmtcp_gettid() = 44002 [44000] TRACE at threadlist.cpp:329 in checkpointhread; REASON='after sigsetjmp/getcontext' curThread->tid = 23935 curThread->virtual_tid = 44002 curThread->saved_sp = 0x7f1cfb60d2e0 [44000] TRACE at threadlist.cpp:348 in checkpointhread; REASON='before callbackSleepBetweenCheckpoint(0)' [44000] TRACE at dmtcpworker.cpp:509 in waitForStage1Suspend; REASON='running' [44000] TRACE at dmtcpworker.cpp:468 in waitForCoordinatorMsg; REASON='waiting for SUSPEND message' [41000] TRACE at socketwrappers.cpp:184 in process_accept; REASON='accepted incoming connection' sockfd = 9 con->id() = 1d64b124afe30f29-41000-56253d73(99011) [40000] TRACE at socketwrappers.cpp:184 in process_accept; REASON='accepted incoming connection' sockfd = 3 con->id() = 1d64b124afe30f29-40000-56253d73(99004) [42000] TRACE at socketwrappers.cpp:184 in process_accept; REASON='accepted incoming connection' sockfd = 9 con->id() = 1d64b124afe30f29-42000-56253d73(99012) [40000] TRACE at socketwrappers.cpp:184 in process_accept; REASON='accepted incoming connection' sockfd = 3 con->id() = 1d64b124afe30f29-40000-56253d73(99005) [40000] TRACE at socketwrappers.cpp:59 in socket; REASON='socket created' ret = 4 domain = 2 type = 1 protocol = 6 [40000] TRACE at socketconnection.cpp:195 in TcpConnection; REASON='Creating TcpConnection.' id() = 1d64b124afe30f29-40000-56253d73(99006) domain = 2 type = 1 protocol = 6 [40000] TRACE at socketwrappers.cpp:59 in socket; REASON='socket created' ret = 5 domain = 2 type = 1 protocol = 6 [40000] TRACE at socketconnection.cpp:195 in TcpConnection; REASON='Creating TcpConnection.' id() = 1d64b124afe30f29-40000-56253d73(99008) domain = 2 type = 1 protocol = 6 [40000] TRACE at socketwrappers.cpp:121 in connect; REASON='connected' sockfd = 5 con->id() = 1d64b124afe30f29-40000-56253d73(99008) Process 0 of 2 is on acme11.ciemat.es Hello world from process 0 of 2 Goodbye world from process 0 of 2 [41000] TRACE at coordinatorapi.cpp:61 in dmtcp_CoordinatorAPI_EventHook; REASON='exit() in progress, disconnecting from dmtcp coordinator' [41000] TRACE at threadlist.cpp:241 in killCkpthread; REASON='Kill checkpinthread' ckptThread->tid = 23930 [41000] TRACE at dmtcpworker.cpp:389 in cleanupWorker; REASON='disconnecting from dmtcp coordinator' [42000] TRACE at coordinatorapi.cpp:61 in dmtcp_CoordinatorAPI_EventHook; REASON='exit() in progress, disconnecting from dmtcp coordinator' [42000] TRACE at threadlist.cpp:241 in killCkpthread; REASON='Kill checkpinthread' ckptThread->tid = 23931 [42000] TRACE at dmtcpworker.cpp:389 in cleanupWorker; REASON='disconnecting from dmtcp coordinator' [40000] TRACE at coordinatorapi.cpp:61 in dmtcp_CoordinatorAPI_EventHook; REASON='exit() in progress, disconnecting from dmtcp coordinator' [40000] TRACE at threadlist.cpp:241 in killCkpthread; REASON='Kill checkpinthread' ckptThread->tid = 23923 [40000] TRACE at dmtcpworker.cpp:389 in cleanupWorker; REASON='disconnecting from dmtcp coordinator' -- Dr. Manuel Rodríguez-Pascual skype: manuel.rodriguez.pascual phone: (+34) 913466173 // (+34) 679925108 CIEMAT-Moncloa Edificio 22, desp. 1.25 Avenida Complutense, 40 28040- MADRID SPAIN ------------------------------------------------------------------------------ _______________________________________________ Dmtcp-forum mailing list Dmtcp-forum@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/dmtcp-forum