Hi,
Open MPI's signal handler (show_stackframe function defined in
opal/util/stacktrace.c) calls non-async-signal-safe functions
and it causes a problem.
See attached mpisigabrt.c. Passing corrupted memory to realloc(3)
will cause SIGABRT and show_stackframe function will be invoked.
But invoked show_stackframe function deadlocks in backtrace_symbols(3)
on some systems because backtrace_symbols(3) calls malloc(3)
internally and a deadlock of realloc/malloc mutex occurs.
Attached mpisigabrt.gstack.txt shows the stacktrace gotten
by gdb in this deadlock situation on Ubuntu 12.04 LTS (precise)
x86_64. Though I could not reproduce this behavior on RHEL 5/6,
I can reproduce it also on K computer and its successor PRIMEHPC FX10.
Passing non-heap memory to free(3) and double-free also cause
this deadlock.
malloc (and backtrace_symbols) is not marked as async-signal-safe
in POSIX and current glibc, though it seems to have been marked
in old glibc. So we should not call it in the signal handler now.
http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04
http://cygwin.com/ml/libc-help/2013-06/msg00005.html
I wrote a patch to address this issue. See the attached
async-signal-safe-stacktrace.patch.
This patch calls backtrace_symbols_fd(3) instead of backtrace_symbols(3).
Though backtrace_symbols_fd is not declared as async-signal-safe,
it is described not to call malloc internally in its man. So it
should be rather safer.
Output format of show_stackframe function is not changed by
this patch. But the opal_backtrace_print function (backtrace
framework) interface is changed for the output format compatibility.
This requires changes in some additional files (ompi_mpi_abort.c
etc.).
This patch also removes unnecessary fflush(3) calls, which are
meaningless for write(2) system call but might cause a similar
problem.
What do you think about this patch?
Takahiro Kawashima,
MPI development team,
Fujitsu
Index: opal/mca/backtrace/backtrace.h
===================================================================
--- opal/mca/backtrace/backtrace.h (revision 29841)
+++ opal/mca/backtrace/backtrace.h (working copy)
@@ -34,11 +34,12 @@
/*
- * print back trace to FILE file
+ * Print back trace to FILE file with a prefix for each line.
+ * First strip lines are not printed.
*
* \note some attempts made to be signal safe.
*/
-OPAL_DECLSPEC void opal_backtrace_print(FILE *file);
+OPAL_DECLSPEC int opal_backtrace_print(FILE *file, char *prefix, int strip);
/*
* Return back trace in buffer. buffer will be allocated by the
Index: opal/mca/backtrace/execinfo/backtrace_execinfo.c
===================================================================
--- opal/mca/backtrace/execinfo/backtrace_execinfo.c (revision 29841)
+++ opal/mca/backtrace/execinfo/backtrace_execinfo.c (working copy)
@@ -20,6 +20,10 @@
#include "opal_config.h"
#include <stdio.h>
+#include <string.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
#ifdef HAVE_EXECINFO_H
#include <execinfo.h>
#endif
@@ -27,23 +31,31 @@
#include "opal/constants.h"
#include "opal/mca/backtrace/backtrace.h"
-void
-opal_backtrace_print(FILE *file)
+int
+opal_backtrace_print(FILE *file, char *prefix, int strip)
{
- int i;
+ int i, fd, len;
int trace_size;
void * trace[32];
- char ** messages = (char **)NULL;
+ char buf[6];
+ fd = fileno (file);
+ if (-1 == fd) {
+ return OPAL_ERR_BAD_PARAM;
+ }
+
trace_size = backtrace (trace, 32);
- messages = backtrace_symbols (trace, trace_size);
- for (i = 0; i < trace_size; i++) {
- fprintf(file, "[%d] func:%s\n", i, messages[i]);
- fflush(file);
+ for (i = strip; i < trace_size; i++) {
+ if (NULL != prefix) {
+ write (fd, prefix, strlen (prefix));
+ }
+ len = snprintf (buf, sizeof(buf), "[%2d] ", i - strip);
+ write (fd, buf, len);
+ backtrace_symbols_fd (&trace[i], 1, fd);
}
- free(messages);
+ return OPAL_SUCCESS;
}
Index: opal/mca/backtrace/printstack/backtrace_printstack.c
===================================================================
--- opal/mca/backtrace/printstack/backtrace_printstack.c (revision 29841)
+++ opal/mca/backtrace/printstack/backtrace_printstack.c (working copy)
@@ -24,10 +24,12 @@
#include "opal/constants.h"
#include "opal/mca/backtrace/backtrace.h"
-void
-opal_backtrace_print(FILE *file)
+int
+opal_backtrace_print(FILE *file, char *prefix, int strip)
{
printstack(fileno(file));
+
+ return OPAL_SUCCESS;
}
Index: opal/mca/backtrace/none/backtrace_none.c
===================================================================
--- opal/mca/backtrace/none/backtrace_none.c (revision 29841)
+++ opal/mca/backtrace/none/backtrace_none.c (working copy)
@@ -23,9 +23,10 @@
#include "opal/constants.h"
#include "opal/mca/backtrace/backtrace.h"
-void
-opal_backtrace_print(FILE *file)
+int
+opal_backtrace_print(FILE *file, char *prefix, int strip)
{
+ return OPAL_ERR_NOT_IMPLEMENTED;
}
Index: opal/util/stacktrace.c
===================================================================
--- opal/util/stacktrace.c (revision 29841)
+++ opal/util/stacktrace.c (working copy)
@@ -53,12 +53,10 @@
/**
* This function is being called as a signal-handler in response
* to a user-specified signal (e.g. SIGFPE or SIGSEGV).
- * For Linux/Glibc, it then uses backtrace and backtrace_symbols
- * to figure the current stack and then prints that out to stdout.
+ * For Linux/Glibc, it then uses backtrace and backtrace_symbols_fd
+ * to figure the current stack and print that out to stderr.
* Where available, the BSD libexecinfo is used to provide Linux/Glibc
- * compatible backtrace and backtrace_symbols functions.
- * Yes, printf and malloc are not signal-safe per se, but should be
- * on Linux?
+ * compatible backtrace and backtrace_symbols_fd functions.
*
* @param signo with the signal number raised
* @param info with information regarding the reason/send of the signal
@@ -72,9 +70,8 @@
char print_buffer[1024];
char * tmp = print_buffer;
int size = sizeof (print_buffer);
- int ret, traces_size;
+ int ret;
char *si_code_str = "";
- char **traces;
/* write out the footer information */
memset (print_buffer, 0, sizeof (print_buffer));
@@ -82,18 +79,8 @@
HOSTFORMAT "*** Process received signal ***\n",
stacktrace_hostname, getpid());
write(fileno(stderr), print_buffer, ret);
- fflush(stderr);
- /*
- * Yes, we are doing printf inside a signal-handler.
- * However, backtrace itself calls malloc (which may not be signal-safe,
- * under linux, printf and malloc are)
- *
- * We could use backtrace_symbols_fd and write directly into an
- * filedescriptor, however, without formatting -- also this fd
- * should be opened in a sensible way...
- */
memset (print_buffer, 0, sizeof (print_buffer));
#ifdef HAVE_STRSIGNAL
@@ -342,28 +329,14 @@
/* write out the signal information generated above */
write(fileno(stderr), print_buffer, sizeof(print_buffer)-size);
- fflush(stderr);
/* print out the stack trace */
- ret = opal_backtrace_buffer(&traces, &traces_size);
- if (OPAL_SUCCESS == ret) {
- int i;
- /* since we have the opportunity, strip off the bottom two
- function calls, which will be this function and
- opal_backtrace_buffer(). */
- for (i = 2 ; i < traces_size ; ++i) {
- ret = snprintf(print_buffer, sizeof(print_buffer),
- HOSTFORMAT "[%2d] %s\n",
- stacktrace_hostname, getpid(), i - 2, traces[i]);
- if (ret > 0) {
- write(fileno(stderr), print_buffer, ret);
- } else {
- write(fileno(stderr), unable_to_print_msg,
- strlen(unable_to_print_msg));
- }
- }
- } else {
- opal_backtrace_print(stderr);
+ snprintf(print_buffer, sizeof(print_buffer), HOSTFORMAT,
+ stacktrace_hostname, getpid());
+ print_buffer[sizeof(print_buffer) - 1] = '\0';
+ ret = opal_backtrace_print(stderr, print_buffer, 2);
+ if (OPAL_SUCCESS != ret) {
+ write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg));
}
/* write out the footer information */
@@ -376,7 +349,6 @@
} else {
write(fileno(stderr), unable_to_print_msg, strlen(unable_to_print_msg));
}
- fflush(stderr);
}
#endif /* OPAL_WANT_PRETTY_PRINT_STACKTRACE */
@@ -393,12 +365,12 @@
int i;
/* since we have the opportunity, strip off the bottom two
function calls, which will be this function and
- opa_backtrace_buffer(). */
+ opal_backtrace_buffer(). */
for (i = 2; i < traces_size; ++i) {
opal_output(stream, "%s", traces[i]);
}
} else {
- opal_backtrace_print(stderr);
+ opal_backtrace_print(stderr, NULL, 2);
}
}
Index: ompi/runtime/ompi_mpi_abort.c
===================================================================
--- ompi/runtime/ompi_mpi_abort.c (revision 29841)
+++ ompi/runtime/ompi_mpi_abort.c (working copy)
@@ -87,7 +87,7 @@
/* This will print an message if it's unable to print the
backtrace, so we don't need an additional "else" clause
if opal_backtrace_print() is not supported. */
- opal_backtrace_print(stderr);
+ opal_backtrace_print(stderr, NULL, 1);
}
}
Index: orte/mca/oob/tcp/oob_tcp_common.c
===================================================================
--- orte/mca/oob/tcp/oob_tcp_common.c (revision 29841)
+++ orte/mca/oob/tcp/oob_tcp_common.c (working copy)
@@ -79,7 +79,7 @@
int optval;
optval = 1;
if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) {
- opal_backtrace_print(stderr);
+ opal_backtrace_print(stderr, NULL, 1);
opal_output(0, "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
#include <stdlib.h>
#include <execinfo.h>
#include <mpi.h>
int main(int argc, char *argv[])
{
void *p;
{
/* This code is not interest of us. Just avoiding hang in backtrace(3)
which is called in realloc(3) failure message.
https://sourceware.org/bugzilla/show_bug.cgi?id=956
https://sourceware.org/bugzilla/show_bug.cgi?id=16159
*/
void *b;
backtrace(&b, 0);
}
MPI_Init(NULL, NULL);
p = malloc(100);
*((size_t *)p - 1) = 0x10; /* simulate buffer overrun */
p = realloc(p, 100);
MPI_Finalize();
return 0;
}
Thread 2 (Thread 0x7f2a74605700 (LWP 16294)):
#0 0x00007f2a7585ea43 in __GI___poll (fds=<optimized out>,
#1 0x00007f2a7529fa3a in poll_dispatch ()
#2 0x00007f2a75294336 in opal_libevent2021_event_base_loop ()
#3 0x00007f2a7551711e in orte_progress_thread_engine ()
#4 0x00007f2a75b3de9a in start_thread (arg=0x7f2a74605700)
#5 0x00007f2a7586a3fd in clone ()
#6 0x0000000000000000 in ?? ()
Thread 1 (Thread 0x7f2a76240700 (LWP 16293)):
#0 __lll_lock_wait_private ()
#1 0x00007f2a757fb231 in _L_lock_10574 () at malloc.c:5241
#2 0x00007f2a757f8f87 in __GI___libc_malloc (bytes=139820340016928)
#3 0x00007f2a7588131a in __backtrace_symbols (array=0x7fff4a14a970, size=12)
#4 0x00007f2a7528e165 in opal_backtrace_buffer ()
#5 0x00007f2a7528b8e6 in show_stackframe ()
#6 <signal handler called>
#7 0x00007f2a757ac425 in __GI_raise (sig=<optimized out>)
#8 0x00007f2a757afb8b in __GI_abort () at abort.c:91
#9 0x00007f2a757ea39e in __libc_message (do_abort=2,
#10 0x00007f2a757f4b96 in malloc_printerr (action=3,
#11 0x00007f2a757f7ee7 in _int_realloc (av=0x7f2a75b2f720, oldp=0x172d050,
#12 0x00007f2a757f96fe in __GI___libc_realloc (oldmem=0x172d060, bytes=100)
#13 0x00000000004007b1 in main (argc=1, argv=0x7fff4a14bfb8) at mpisigabrt.c:23