You mentioned your using openais (not corosync).
Try the attached patch and report if it resolves the problem for you.
Regards
-steve
On Tue, 2009-12-15 at 10:06 -0700, hj lee wrote:
> Sorry forgot to attack the stacktrace.
>
> hj
>
> On Tue, Dec 15, 2009 at 10:05 AM, hj lee <[email protected]> wrote:
> Hi,
>
> Even after disabling timestamp in syslog, I still have this
> issue. I attached the stack trace of aisexec process. I found
> two issues in openais code.
> 1. The openais has log_mode_mutex to allow only one
> thread to call syslog. This is used for
> pthread_atfork() to block two calling syslog. But
> clearly as we can see from the trace, two
> threads(thread 1 and 2) called log_printf_worker_fn()
> at the same time, so two threads called syslog() at
> the same time, that caused seg fault. If
> log_mode_mutex is moved to log_printf_worker_fn() from
> _log_printf, it can be prevented, so no seg fault
> 2. When thread 2 had a seg fault, it calls seg fault
> signal handler sigsegv_handler(). The signal handler
> calls syslog again and it never return. I read an
> artical about the syslog is not a safe function for
> signal handler and it should not used insde signal
> handler. So if syslog is removed in the signal
> handler, the this signal handler can return. Then at
> least the deadlock may be avoided.
> This is one of serious issue for our product development using
> pacemaker/openais, so I want to reslve this issue as soon as
> possible.
>
> Thank you
> hj
>
>
>
> On Mon, Dec 7, 2009 at 2:21 AM, Fabio M. Di Nitto
> <[email protected]> wrote:
> Jan Friesse wrote:
> > Fabio M. Di Nitto wrote:
> >> Jan Friesse wrote:
> >>> Included is patch removing strftime and replace
> it:
> >>> - first version with ISO style date format
> (2009-11-04 10:40:40 ...)
> >>> - second version with output similar to what we
> have now (Dec 04 ...)
> >>>
> >>> Choose what you prefer more.
> >>
> >> we need to be consistent with the current format.
> There is no choice there.
> >>
> >> Fabio
> >
> > we *Need*? Why?
> >
>
>
> because we need to be consistent as that same format
> is used by all
> applications in the cluster stack and not just
> corosync.
>
> Fabio
>
> _______________________________________________
> Openais mailing list
> [email protected]
> https://lists.linux-foundation.org/mailman/listinfo/openais
>
>
>
>
>
> --
> Dream with longterm vision!
> kerdosa
>
>
>
> --
> Dream with longterm vision!
> kerdosa
>
>
>
>
>
>
> plain text
> document
> attachment
> (aisexec-stacktrace.txt)
>
> 0x00350402 in __kernel_vsyscall ()
> (gdb) bt
> #0 0x00350402 in __kernel_vsyscall ()
> #1 0x00c432d3 in __lll_lock_wait_private ()
> from /lib/i686/nosegneg/libc.so.6
> #2 0x00be78ab in _L_lock_1790 () from /lib/i686/nosegneg/libc.so.6
> #3 0x00be7634 in __tz_convert () from /lib/i686/nosegneg/libc.so.6
> #4 0x00be5c6c in localtime_r () from /lib/i686/nosegneg/libc.so.6
> #5 0x00c326df in __vsyslog_chk () from /lib/i686/nosegneg/libc.so.6
> #6 0x00c32c5a in syslog () from /lib/i686/nosegneg/libc.so.6
> #7 0x08061d66 in log_printf_worker_fn ()
> #8 0x08062029 in _log_printf ()
> #9 0x0806222c in internal_log_printf2 ()
> #10 0x00cd7abd in pcmk_plugin_init ()
> from /usr/libexec/lcrso/pacemaker.lcrso
> #11 0x00cd85d3 in pcmk_startup ()
> from /usr/libexec/lcrso/pacemaker.lcrso
> #12 0x08063d92 in openais_service_link_and_init ()
> #13 0x08063ff5 in openais_service_defaults_link_and_init ()
> #14 0x08061524 in main ()
> (gdb) info thread
> 3 Thread 0xb7f4d230 (LWP 3897) 0x00350402 in __kernel_vsyscall ()
> 2 Thread 0xb7f34b90 (LWP 3899) 0x00350402 in __kernel_vsyscall ()
> * 1 Thread 0xb7f4e6c0 (LWP 3896) 0x00350402 in __kernel_vsyscall ()
> (gdb) thread 2
> [Switching to thread 2 (Thread 0xb7f34b90 (LWP 3899))]#0 0x00350402
> in __kernel_vsyscall ()
> (gdb) bt
> #0 0x00350402 in __kernel_vsyscall ()
> #1 0x00c432d3 in __lll_lock_wait_private ()
> from /lib/i686/nosegneg/libc.so.6
> #2 0x00be78ab in _L_lock_1790 () from /lib/i686/nosegneg/libc.so.6
> #3 0x00be7634 in __tz_convert () from /lib/i686/nosegneg/libc.so.6
> #4 0x00be5c6c in localtime_r () from /lib/i686/nosegneg/libc.so.6
> #5 0x00c326df in __vsyslog_chk () from /lib/i686/nosegneg/libc.so.6
> #6 0x00c32c5a in syslog () from /lib/i686/nosegneg/libc.so.6
> #7 0x08061d66 in log_printf_worker_fn ()
> #8 0x08060aa3 in worker_thread_group_atsegv ()
> #9 0x08062390 in log_flush ()
> #10 0x08060dcb in sigsegv_handler ()
> #11 <signal handler called>
> #12 0x00b8e7ee in getenv () from /lib/i686/nosegneg/libc.so.6
> #13 0x00be6cf0 in tzset_internal () from /lib/i686/nosegneg/libc.so.6
> #14 0x00be780d in tzset () from /lib/i686/nosegneg/libc.so.6
> #15 0x00bebf96 in strftime_l () from /lib/i686/nosegneg/libc.so.6
> #16 0x00bec8a4 in strftime_l () from /lib/i686/nosegneg/libc.so.6
> #17 0x00c3270b in __vsyslog_chk () from /lib/i686/nosegneg/libc.so.6
> #18 0x00c32c5a in syslog () from /lib/i686/nosegneg/libc.so.6
> #19 0x08061d66 in log_printf_worker_fn ()
> #20 0x080607ed in worker_thread ()
> #21 0x00ae74d2 in start_thread ()
> from /lib/i686/nosegneg/libpthread.so.0
> #22 0x00c3648e in clone () from /lib/i686/nosegneg/libc.so.6
> (gdb) thread 3
> [Switching to thread 3 (Thread 0xb7f4d230 (LWP 3897))]#0 0x00350402
> in __kernel_vsyscall ()
> (gdb) bt
> #0 0x00350402 in __kernel_vsyscall ()
> #1 0x00c2c563 in poll () from /lib/i686/nosegneg/libc.so.6
> #2 0x080660ce in prioritized_timer_thread ()
> #3 0x00ae74d2 in start_thread ()
> from /lib/i686/nosegneg/libpthread.so.0
> #4 0x00c3648e in clone () from /lib/i686/nosegneg/libc.so.6
> (gdb)
Index: exec/ckpt.c
===================================================================
--- exec/ckpt.c (revision 2093)
+++ exec/ckpt.c (working copy)
@@ -506,19 +506,19 @@
.lib_handler_fn = message_handler_req_lib_ckpt_sectioniterationinitialize,
.response_size = sizeof (struct res_lib_ckpt_sectioniterationinitialize),
.response_id = MESSAGE_RES_CKPT_SECTIONITERATIONINITIALIZE,
- .flow_control = OPENAIS_FLOW_CONTROL_NOT_REQUIRED
+ .flow_control = OPENAIS_FLOW_CONTROL_REQUIRED
},
{ /* 15 */
.lib_handler_fn = message_handler_req_lib_ckpt_sectioniterationfinalize,
.response_size = sizeof (struct res_lib_ckpt_sectioniterationfinalize),
.response_id = MESSAGE_RES_CKPT_SECTIONITERATIONFINALIZE,
- .flow_control = OPENAIS_FLOW_CONTROL_NOT_REQUIRED
+ .flow_control = OPENAIS_FLOW_CONTROL_REQUIRED
},
{ /* 16 */
.lib_handler_fn = message_handler_req_lib_ckpt_sectioniterationnext,
.response_size = sizeof (struct res_lib_ckpt_sectioniterationnext),
.response_id = MESSAGE_RES_CKPT_SECTIONITERATIONNEXT,
- .flow_control = OPENAIS_FLOW_CONTROL_NOT_REQUIRED
+ .flow_control = OPENAIS_FLOW_CONTROL_REQUIRED
}
};
Index: exec/print.c
===================================================================
--- exec/print.c (revision 2093)
+++ exec/print.c (working copy)
@@ -76,6 +76,11 @@
static unsigned int dropped_log_entries = 0;
+static const char log_month_name[][4] = {
+ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+ "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
+};
+
#ifndef MAX_LOGGERS
#define MAX_LOGGERS 32
#endif
@@ -186,6 +191,7 @@
int len;
struct log_data log_data;
unsigned int res = 0;
+ struct tm tm_res;
assert (id < MAX_LOGGERS);
@@ -202,8 +208,11 @@
if (((logmode & LOG_MODE_FILE) || (logmode & LOG_MODE_STDERR)) &&
(logmode & LOG_MODE_TIMESTAMP)) {
gettimeofday (&tv, NULL);
- strftime (char_time, sizeof (char_time), "%b %e %k:%M:%S",
- localtime (&tv.tv_sec));
+ (void)localtime_r ((time_t *)&tv.tv_sec, &tm_res);
+ snprintf (char_time, sizeof (char_time),
+ "%s %02d %02d:%02d:%02d",
+ log_month_name[tm_res.tm_mon], tm_res.tm_mday,
+ tm_res.tm_hour, tm_res.tm_min, tm_res.tm_sec);
i = sprintf (newstring, "%s.%06ld ", char_time, (long)tv.tv_usec);
}
_______________________________________________
Openais mailing list
[email protected]
https://lists.linux-foundation.org/mailman/listinfo/openais