[devel] [PATCH 1/1] base: Place TraceLog instance on heap memory V3 [#2860]
When a process calls exit(), the exit_handler trigger __do_global_dtor_aux then calls TraceLog destructor. One of process thread is calling TraceLog::Log while the destructor is called. This leads to a coredump. The process (which could by any applications) calling exit() first is responsible to make exit() thread-safe. However, the TraceLog should also avoid the coredump occurring at OpenSAF side. Runtime allocate TraceLog pointer so that its memory is located in heap. This allocation to avoid its desctructor to be called as part of __do_global_dtor_aux. --- src/base/logtrace.cc| 21 - src/base/logtrace_client.cc | 5 + src/base/logtrace_client.h | 2 ++ src/mds/mds_log.cc | 15 --- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/src/base/logtrace.cc b/src/base/logtrace.cc index fd3d829..3d64e4d 100644 --- a/src/base/logtrace.cc +++ b/src/base/logtrace.cc @@ -43,8 +43,8 @@ bool enable_osaf_log = false; } // namespace global -static TraceLog gl_trace; -static TraceLog gl_osaflog; +TraceLog* gl_trace = nullptr; +TraceLog* gl_osaflog = nullptr; static pid_t gettid() { return syscall(SYS_gettid); } @@ -88,8 +88,8 @@ void trace_output(const char *file, unsigned line, unsigned priority, if (strncmp(file, "src/", 4) == 0) file += 4; snprintf(preamble, sizeof(preamble), "%d:%s:%u %s %s", gettid(), file, line, global::prefix_name[priority + category], format); - gl_trace.Log(static_cast(priority), preamble, - ap); + TraceLog::Log(gl_trace, static_cast(priority), + preamble, ap); } void log_output(const char *file, unsigned line, unsigned priority, @@ -101,8 +101,8 @@ void log_output(const char *file, unsigned line, unsigned priority, if (strncmp(file, "src/", 4) == 0) file += 4; snprintf(preamble, sizeof(preamble), "%d:%s:%u %s %s", gettid(), file, line, global::prefix_name[priority + category], format); - gl_osaflog.Log(static_cast(priority), preamble, - ap); + TraceLog::Log(gl_osaflog, static_cast(priority), + preamble, ap); } void logtrace_log(const char *file, unsigned line, int priority, @@ -176,11 +176,13 @@ int logtrace_init(const char *, const char *pathname, unsigned mask) { global::msg_id = nullptr; } if (result && mask != 0) { -result = gl_trace.Init(global::msg_id, TraceLog::kBlocking); +if (!gl_trace) gl_trace = new TraceLog(); +result = gl_trace->Init(global::msg_id, TraceLog::kBlocking); } if (base::GetEnv("OSAF_LOCAL_NODE_LOG", uint32_t{0}) == 1) { global::enable_osaf_log = true; -gl_osaflog.Init(global::osaf_log_file, TraceLog::kBlocking); +if (!gl_osaflog) gl_osaflog = new TraceLog(); +gl_osaflog->Init(global::osaf_log_file, TraceLog::kBlocking); } if (result) { syslog(LOG_INFO, "logtrace: trace enabled to file '%s', mask=0x%x", @@ -221,7 +223,8 @@ int trace_category_set(unsigned mask) { if (global::category_mask == 0) { syslog(LOG_INFO, "logtrace: trace disabled"); } else { -gl_trace.Init(global::msg_id, TraceLog::kBlocking); +if (!gl_trace) gl_trace = new TraceLog(); +gl_trace->Init(global::msg_id, TraceLog::kBlocking); syslog(LOG_INFO, "logtrace: trace enabled to file %s, mask=0x%x", global::msg_id, global::category_mask); } diff --git a/src/base/logtrace_client.cc b/src/base/logtrace_client.cc index 0dac6d3..a9d82e2 100644 --- a/src/base/logtrace_client.cc +++ b/src/base/logtrace_client.cc @@ -81,6 +81,11 @@ bool TraceLog::Init(const char *msg_id, WriteMode mode) { return true; } +void TraceLog::Log(TraceLog* tracelog, base::LogMessage::Severity severity, + const char *fmt, va_list ap) { + if (tracelog != nullptr) tracelog->Log(severity, fmt, ap); +} + void TraceLog::Log(base::LogMessage::Severity severity, const char *fmt, va_list ap) { if (log_socket_ != nullptr && mutex_ != nullptr) { diff --git a/src/base/logtrace_client.h b/src/base/logtrace_client.h index eac31d0..21c7d2e 100644 --- a/src/base/logtrace_client.h +++ b/src/base/logtrace_client.h @@ -32,6 +32,8 @@ class TraceLog { kNonblocking = base::UnixSocket::Mode::kNonblocking, }; bool Init(const char *msg_id, WriteMode mode); + static void Log(TraceLog* tracelog, base::LogMessage::Severity severity, + const char *fmt, va_list ap); void Log(base::LogMessage::Severity severity, const char *fmt, va_list ap); TraceLog(); diff --git a/src/mds/mds_log.cc b/src/mds/mds_log.cc index 0792975..24bc398 100644 --- a/src/mds/mds_log.cc +++ b/src/mds/mds_log.cc @@ -29,7 +29,7 @@ #include "mds/mds_papi.h" int gl_mds_log_level = 3; -static TraceLog gl_mds_log; +TraceLog* gl_mds_log = nullptr; /*** * Funtion Name :mds_log_init @@ -40,7 +40,8 @@ static TraceLog gl_mds_log; *
[devel] [PATCH 0/1] Review Request for base: Place TraceLog instance on heap memory V3 [#2860]
Summary: base: Place TraceLog instance on heap memory V3 [#2860] Review request for Ticket(s): 2860 Peer Reviewer(s): Hans, Anders, Ravi Pull request to: *** LIST THE PERSON WITH PUSH ACCESS HERE *** Affected branch(es): develop Development branch: ticket-2860 Base revision: e0bcf786e0b3417d31b767073bb789ef150eb2ad Personal repository: git://git.code.sf.net/u/minh-chau/review Impacted area Impact y/n Docsn Build systemn RPM/packaging n Configuration files n Startup scripts n SAF servicesn OpenSAF servicesn Core libraries y Samples n Tests n Other n Comments (indicate scope for each "y" above): - *** EXPLAIN/COMMENT THE PATCH SERIES HERE *** revision 4ce641a69ba7bb5938cfce5f915476fbf6e22b07 Author: Minh ChauDate: Fri, 25 May 2018 13:51:38 +1000 base: Place TraceLog instance on heap memory V3 [#2860] When a process calls exit(), the exit_handler trigger __do_global_dtor_aux then calls TraceLog destructor. One of process thread is calling TraceLog::Log while the destructor is called. This leads to a coredump. The process (which could by any applications) calling exit() first is responsible to make exit() thread-safe. However, the TraceLog should also avoid the coredump occurring at OpenSAF side. Runtime allocate TraceLog pointer so that its memory is located in heap. This allocation to avoid its desctructor to be called as part of __do_global_dtor_aux. Complete diffstat: -- src/base/logtrace.cc| 21 - src/base/logtrace_client.cc | 5 + src/base/logtrace_client.h | 2 ++ src/mds/mds_log.cc | 15 --- 4 files changed, 27 insertions(+), 16 deletions(-) Testing Commands: - *** LIST THE COMMAND LINE TOOLS/STEPS TO TEST YOUR CHANGES *** Testing, Expected Results: -- *** PASTE COMMAND OUTPUTS / TEST RESULTS *** Conditions of Submission: - *** HOW MANY DAYS BEFORE PUSHING, CONSENSUS ETC *** Arch Built StartedLinux distro --- mipsn n mips64 n n x86 n n x86_64 y y powerpc n n powerpc64 n n Reviewer Checklist: --- [Submitters: make sure that your review doesn't trigger any checkmarks!] Your checkin has not passed review because (see checked entries): ___ Your RR template is generally incomplete; it has too many blank entries that need proper data filled in. ___ You have failed to nominate the proper persons for review and push. ___ Your patches do not have proper short+long header ___ You have grammar/spelling in your header that is unacceptable. ___ You have exceeded a sensible line length in your headers/comments/text. ___ You have failed to put in a proper Trac Ticket # into your commits. ___ You have incorrectly put/left internal data in your comments/files (i.e. internal bug tracking tool IDs, product names etc) ___ You have not given any evidence of testing beyond basic build tests. Demonstrate some level of runtime or other sanity testing. ___ You have ^M present in some of your files. These have to be removed. ___ You have needlessly changed whitespace or added whitespace crimes like trailing spaces, or spaces before tabs. ___ You have mixed real technical changes with whitespace and other cosmetic code cleanup changes. These have to be separate commits. ___ You need to refactor your submission into logical chunks; there is too much content into a single commit. ___ You have extraneous garbage in your review (merge commits etc) ___ You have giant attachments which should never have been sent; Instead you should place your content in a public tree to be pulled. ___ You have too many commits attached to an e-mail; resend as threaded commits, or place in a public tree for a pull. ___ You have resent this content multiple times without a clear indication of what has changed between each re-send. ___ You have failed to adequately and individually address all of the comments and change requests that were proposed in the initial review. ___ You have a misconfigured ~/.gitconfig file (i.e. user.name, user.email etc) ___ Your computer have a badly configured date and time; confusing the the threaded patch review. ___ Your changes affect IPC mechanism, and you don't present any results for in-service upgradability test. ___ Your changes affect user manual and documentation, your patch series do not contain the patch that updates the Doxygen manual. -- Check out the vibrant tech
Re: [devel] [PATCH 1/1] base: Improve backtrace print in daemon.c [#2853]
Hi Hans Ack Thanks Gary On 16/5/18, 5:27 pm, "Hans Nordeback"wrote: --- src/base/daemon.c | 52 ++--- tools/cluster_sim_uml/build_uml | 1 + 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/src/base/daemon.c b/src/base/daemon.c index 361dd8dd6..2ad0dcd2d 100644 --- a/src/base/daemon.c +++ b/src/base/daemon.c @@ -608,14 +608,18 @@ static void fatal_signal_handler(int sig, siginfo_t *siginfo, void *ctx) const int BT_ARRAY_SIZE = 20; void *bt_array[BT_ARRAY_SIZE]; size_t bt_size; - int fd; char bt_header[40]; + char cmd_buf[200]; + char addr2line_buf[120]; + Dl_info dl_info; + FILE *fp; - if ((fd = open(bt_filename, O_RDWR | O_CREAT, 0644)) < 0) { + int fd = open(bt_filename, O_RDWR | O_CREAT, 0644); + + if (fd < 0) goto done; - } - snprintf(bt_header, sizeof(bt_header), "signal: %d pid: %u uid: %u\n", + snprintf(bt_header, sizeof(bt_header), "signal: %d pid: %u uid: %u\n\n", sig, siginfo->si_pid, siginfo->si_uid); if (write(fd, bt_header, strlen(bt_header)) < 0) { @@ -624,6 +628,45 @@ static void fatal_signal_handler(int sig, siginfo_t *siginfo, void *ctx) } bt_size = plibc_backtrace(bt_array, BT_ARRAY_SIZE); + + if (system("which addr2line") == 0) { + for (int i = 0; i < bt_size; ++i) { + memset(_info, 0, sizeof(dl_info)); + dladdr(bt_array[i], _info); + ptrdiff_t offset = bt_array[i] - dl_info.dli_fbase; + + snprintf(cmd_buf, sizeof(cmd_buf), +"addr2line %tx -p -f -e %s", +offset, dl_info.dli_fname); + + fp = popen(cmd_buf, "r"); + if (fp == NULL) { + syslog(LOG_ERR, + "popen failed: %s", strerror(errno)); + } else { + if (fgets(addr2line_buf, + sizeof(addr2line_buf), + fp) != NULL) { + snprintf(cmd_buf, sizeof(cmd_buf), +"# %d %s", +i, addr2line_buf); + if (write(fd, cmd_buf, + strlen(cmd_buf)) < 0) { + syslog(LOG_ERR, + "write failed: %s", + strerror(errno)); + } + } + pclose(fp); + } + } + } + + if (write(fd, "\n", 1) < 0) { + syslog(LOG_ERR, + "write failed: %s", strerror(errno)); + } + plibc_backtrace_symbols_fd(bt_array, bt_size, fd); close(fd); @@ -677,6 +720,7 @@ static void install_fatal_signal_handlers(void) time_string, getpid()); struct sigaction action; + memset(, 0, sizeof(action)); action.sa_sigaction = fatal_signal_handler; sigfillset(_mask); diff --git a/tools/cluster_sim_uml/build_uml b/tools/cluster_sim_uml/build_uml index b9f224360..16d49d03e 100755 --- a/tools/cluster_sim_uml/build_uml +++ b/tools/cluster_sim_uml/build_uml @@ -176,6 +176,7 @@ cmd_create_rootfs() test -e /usr/bin/lsof && install /usr/bin/lsof usr/bin test -e /bin/pidof && install /bin/pidof usr/bin test -e /usr/sbin/tcpdump && install /usr/sbin/tcpdump usr/sbin +test -e /usr/bin/addr2line && install /usr/bin/addr2line usr/bin if test -e /usr/bin/gdb; then install /usr/bin/gdb usr/bin if test -d /usr/share/gdb; then -- 2.17.0 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel
Re: [devel] [PATCH 1/1] base: Destructor of TraceLog causes coredump V2 [#2860]
Hi Hans, Place it on heap so destructor won't be called as part of __do_global_dtor_aux I guess, it should be equivalent to the previous of TraceLog regarding this issue. I think you can make "if (mutex_ && mutext_->good())", it can pass the check mutex_ against null but not sure after that. I will try the shared_ptr and test it to see if it is good to go. Thanks, Minh On 24/05/18 16:26, Hans Nordeback wrote: Hi Minh, yes gl_trace/gl_log can be put on the heap, you mean without any owner? But in this case when destructors for one of the threads has been run other threads should notice that some states may not be valid anymore, e.g. that mutex_->good() returns false, but if mutex_ has been released it should be if (mutex_ && mutex_->good() instead. /Thanks HansN On 05/24/2018 06:20 AM, Hans Nordebäck wrote: Hi Minh, yes you are right about the possibility for a segv, but using a std::shared_ptr instead of the naked ptr may be an option ? /Thanks Hans Från: Minh Hon ChauSkickat: den 24 maj 2018 02:34:13 Till: Hans Nordebäck; Anders Widell; Gary Lee Kopia: opensaf-devel@lists.sourceforge.net Ämne: Re: [PATCH 1/1] base: Destructor of TraceLog causes coredump V2 [#2860] Hi Hans, It is good to give an option to Mutex class not to abort. We can avoid the abort in mutex_unlock (as reported in coredump), but I feel the issue is still there. We may hit a problem (segv?) with "mutex_->good()" since the other thread is wiping out the mutex_ in destructor, it is a matter of timing to happen I guess. As we don't have (and don't want to have) any protection between two threads for the TraceLog, so the good one (I hope) is making one of those threads not to touch the TraceLog. If you don't like to remove the destructor, another way is locating the gl_trace/gl_log to the HEAP? Thanks, Minh On 23/05/18 20:50, Hans Nordeback wrote: Change Mutex class to make it possible for caller to decide if abort --- src/base/logtrace_client.cc | 5 - src/base/mutex.cc | 2 +- src/base/mutex.h | 22 +- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/base/logtrace_client.cc b/src/base/logtrace_client.cc index 0dac6d389..f597c1ae3 100644 --- a/src/base/logtrace_client.cc +++ b/src/base/logtrace_client.cc @@ -76,7 +76,7 @@ bool TraceLog::Init(const char *msg_id, WriteMode mode) { msg_id_ = base::LogMessage::MsgId{msg_id}; log_socket_ = new base::UnixClientSocket{Osaflog::kServerSocketPath, static_cast(mode)}; - mutex_ = new base::Mutex{}; + mutex_ = new base::Mutex{false}; return true; } @@ -91,6 +91,9 @@ void TraceLog::Log(base::LogMessage::Severity severity, const char *fmt, void TraceLog::LogInternal(base::LogMessage::Severity severity, const char *fmt, va_list ap) { base::Lock lock(*mutex_); + + if (!mutex_->good()) return; + uint32_t id = sequence_id_; sequence_id_ = id < kMaxSequenceId ? id + 1 : 1; buffer_.clear(); diff --git a/src/base/mutex.cc b/src/base/mutex.cc index 5fa6ac55a..1627ac20b 100644 --- a/src/base/mutex.cc +++ b/src/base/mutex.cc @@ -20,7 +20,7 @@ namespace base { -Mutex::Mutex() : mutex_{} { +Mutex::Mutex(bool abort) : abort_{abort}, mutex_{}, result_{0} { pthread_mutexattr_t attr; int result = pthread_mutexattr_init(); if (result != 0) osaf_abort(result); diff --git a/src/base/mutex.h b/src/base/mutex.h index 7b3cee187..e3c54a711 100644 --- a/src/base/mutex.h +++ b/src/base/mutex.h @@ -31,30 +31,34 @@ namespace base { class Mutex { public: using NativeHandleType = pthread_mutex_t*; - Mutex(); + Mutex(bool abort = true); ~Mutex(); void Lock() { - int result = pthread_mutex_lock(_); - if (result != 0) osaf_abort(result); + result_ = pthread_mutex_lock(_); + if (abort_ && result_ != 0) osaf_abort(result_); } bool TryLock() { - int result = pthread_mutex_trylock(_); - if (result == 0) { + result_ = pthread_mutex_trylock(_); + if (result_ == 0) { return true; - } else if (result == EBUSY) { + } else if (result_ == EBUSY) { return false; } else { - osaf_abort(result); + if (abort_) osaf_abort(result_); + return false; } } void Unlock() { - int result = pthread_mutex_unlock(_); - if (result != 0) osaf_abort(result); + result_ = pthread_mutex_unlock(_); + if (abort_ && result_ != 0) osaf_abort(result_); } NativeHandleType native_handle() { return _; } + bool good() const {return result_ == 0;}; private: + bool abort_; pthread_mutex_t mutex_; + int result_; DELETE_COPY_AND_MOVE_OPERATORS(Mutex); }; -- Check out the vibrant tech community on one of the world's most engaging tech sites,
Re: [devel] [PATCH 1/1] base: Destructor of TraceLog causes coredump V2 [#2860]
Hi Minh, yes gl_trace/gl_log can be put on the heap, you mean without any owner? But in this case when destructors for one of the threads has been run other threads should notice that some states may not be valid anymore, e.g. that mutex_->good() returns false, but if mutex_ has been released it should be if (mutex_ && mutex_->good() instead. /Thanks HansN On 05/24/2018 06:20 AM, Hans Nordebäck wrote: Hi Minh, yes you are right about the possibility for a segv, but using a std::shared_ptr instead of the naked ptr may be an option ? /Thanks Hans Från: Minh Hon ChauSkickat: den 24 maj 2018 02:34:13 Till: Hans Nordebäck; Anders Widell; Gary Lee Kopia: opensaf-devel@lists.sourceforge.net Ämne: Re: [PATCH 1/1] base: Destructor of TraceLog causes coredump V2 [#2860] Hi Hans, It is good to give an option to Mutex class not to abort. We can avoid the abort in mutex_unlock (as reported in coredump), but I feel the issue is still there. We may hit a problem (segv?) with "mutex_->good()" since the other thread is wiping out the mutex_ in destructor, it is a matter of timing to happen I guess. As we don't have (and don't want to have) any protection between two threads for the TraceLog, so the good one (I hope) is making one of those threads not to touch the TraceLog. If you don't like to remove the destructor, another way is locating the gl_trace/gl_log to the HEAP? Thanks, Minh On 23/05/18 20:50, Hans Nordeback wrote: Change Mutex class to make it possible for caller to decide if abort --- src/base/logtrace_client.cc | 5 - src/base/mutex.cc | 2 +- src/base/mutex.h| 22 +- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/base/logtrace_client.cc b/src/base/logtrace_client.cc index 0dac6d389..f597c1ae3 100644 --- a/src/base/logtrace_client.cc +++ b/src/base/logtrace_client.cc @@ -76,7 +76,7 @@ bool TraceLog::Init(const char *msg_id, WriteMode mode) { msg_id_ = base::LogMessage::MsgId{msg_id}; log_socket_ = new base::UnixClientSocket{Osaflog::kServerSocketPath, static_cast(mode)}; - mutex_ = new base::Mutex{}; + mutex_ = new base::Mutex{false}; return true; } @@ -91,6 +91,9 @@ void TraceLog::Log(base::LogMessage::Severity severity, const char *fmt, void TraceLog::LogInternal(base::LogMessage::Severity severity, const char *fmt, va_list ap) { base::Lock lock(*mutex_); + + if (!mutex_->good()) return; + uint32_t id = sequence_id_; sequence_id_ = id < kMaxSequenceId ? id + 1 : 1; buffer_.clear(); diff --git a/src/base/mutex.cc b/src/base/mutex.cc index 5fa6ac55a..1627ac20b 100644 --- a/src/base/mutex.cc +++ b/src/base/mutex.cc @@ -20,7 +20,7 @@ namespace base { -Mutex::Mutex() : mutex_{} { +Mutex::Mutex(bool abort) : abort_{abort}, mutex_{}, result_{0} { pthread_mutexattr_t attr; int result = pthread_mutexattr_init(); if (result != 0) osaf_abort(result); diff --git a/src/base/mutex.h b/src/base/mutex.h index 7b3cee187..e3c54a711 100644 --- a/src/base/mutex.h +++ b/src/base/mutex.h @@ -31,30 +31,34 @@ namespace base { class Mutex { public: using NativeHandleType = pthread_mutex_t*; - Mutex(); + Mutex(bool abort = true); ~Mutex(); void Lock() { -int result = pthread_mutex_lock(_); -if (result != 0) osaf_abort(result); +result_ = pthread_mutex_lock(_); +if (abort_ && result_ != 0) osaf_abort(result_); } bool TryLock() { -int result = pthread_mutex_trylock(_); -if (result == 0) { +result_ = pthread_mutex_trylock(_); +if (result_ == 0) { return true; -} else if (result == EBUSY) { +} else if (result_ == EBUSY) { return false; } else { - osaf_abort(result); + if (abort_) osaf_abort(result_); + return false; } } void Unlock() { -int result = pthread_mutex_unlock(_); -if (result != 0) osaf_abort(result); +result_ = pthread_mutex_unlock(_); +if (abort_ && result_ != 0) osaf_abort(result_); } NativeHandleType native_handle() { return _; } + bool good() const {return result_ == 0;}; private: + bool abort_; pthread_mutex_t mutex_; + int result_; DELETE_COPY_AND_MOVE_OPERATORS(Mutex); }; -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ Opensaf-devel mailing list Opensaf-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/opensaf-devel -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot