common/SigUtil.cpp | 2 +- kit/ForKit.cpp | 36 ++++++++++++++++++++++++++++++++++-- wsd/Admin.cpp | 5 +++++ wsd/Admin.hpp | 1 + wsd/AdminModel.cpp | 6 ++++++ wsd/AdminModel.hpp | 4 ++++ wsd/LOOLWSD.cpp | 22 +++++++++++++++++++--- 7 files changed, 70 insertions(+), 6 deletions(-)
New commits: commit 0ac330f599b8daaeae5ba057fddb6994fd078e73 Author: Gabriel Masei <[email protected]> AuthorDate: Wed Apr 8 00:21:17 2020 +0300 Commit: Michael Meeks <[email protected]> CommitDate: Wed Apr 8 15:06:39 2020 +0200 Add segmentation fault metric for Kit processes Change-Id: Ifb0de004274213ef512f601e4419f98f456c7288 Reviewed-on: https://gerrit.libreoffice.org/c/online/+/91857 Tested-by: Jenkins CollaboraOffice <[email protected]> Reviewed-by: Michael Meeks <[email protected]> diff --git a/common/SigUtil.cpp b/common/SigUtil.cpp index 944a8d376..cd4c779dc 100644 --- a/common/SigUtil.cpp +++ b/common/SigUtil.cpp @@ -260,7 +260,7 @@ namespace SigUtil dumpBacktrace(); // let default handler process the signal - kill(getpid(), signal); + ::raise(signal); } void dumpBacktrace() diff --git a/kit/ForKit.cpp b/kit/ForKit.cpp index 8b18ef34e..bea72f4d5 100644 --- a/kit/ForKit.cpp +++ b/kit/ForKit.cpp @@ -36,6 +36,9 @@ #include <Unit.hpp> #include <Util.hpp> #include <WebSocketHandler.hpp> +#if !MOBILEAPP +#include <Admin.hpp> +#endif #include <common/FileUtil.hpp> #include <common/Seccomp.hpp> @@ -226,7 +229,7 @@ static void cleanupChildren() { std::vector<std::string> jails; Process::PID exitedChildPid; - int status; + int status, segFaultCount = 0; // Reap quickly without doing slow cleanup so WSD can spawn more rapidly. while ((exitedChildPid = waitpid(-1, &status, WUNTRACED | WNOHANG)) > 0) @@ -242,13 +245,42 @@ static void cleanupChildren() // We ran out of kits and we aren't terminating. LOG_WRN("No live Kits exist, and we are not terminating yet."); } + + if (WIFSIGNALED(status) && (WTERMSIG(status) == SIGSEGV || WTERMSIG(status) == SIGBUS)) + { + segFaultCount ++; + } } else { LOG_ERR("Unknown child " << exitedChildPid << " has exited"); } } - + + if (segFaultCount) + { +#ifdef KIT_IN_PROCESS +#if !MOBILEAPP + Admin::instance().addSegFaultCount(segFaultCount); +#endif +#else + if (WSHandler) + { + std::stringstream stream; + stream << "segfaultcount " << segFaultCount << "\n"; + int ret = WSHandler->sendMessage(stream.str()); + if (ret == -1) + { + LOG_WRN("Could not send 'segfaultcount' message through websocket"); + } + else + { + LOG_WRN("Successfully sent 'segfaultcount' message " << stream.str()); + } + } +#endif + } + // Now delete the jails. for (const auto& path : jails) { diff --git a/wsd/Admin.cpp b/wsd/Admin.cpp index 88b9268df..3fc0b6955 100644 --- a/wsd/Admin.cpp +++ b/wsd/Admin.cpp @@ -593,6 +593,11 @@ void Admin::setDocWopiUploadDuration(const std::string& docKey, const std::chron addCallback([=]{ _model.setDocWopiUploadDuration(docKey, uploadDuration); }); } +void Admin::addSegFaultCount(unsigned segFaultCount) +{ + addCallback([=]{ _model.addSegFaultCount(segFaultCount); }); +} + void Admin::notifyForkit() { std::ostringstream oss; diff --git a/wsd/Admin.hpp b/wsd/Admin.hpp index e39d89fe7..6287d38bc 100644 --- a/wsd/Admin.hpp +++ b/wsd/Admin.hpp @@ -129,6 +129,7 @@ public: void setViewLoadDuration(const std::string& docKey, const std::string& sessionId, std::chrono::milliseconds viewLoadDuration); void setDocWopiDownloadDuration(const std::string& docKey, std::chrono::milliseconds wopiDownloadDuration); void setDocWopiUploadDuration(const std::string& docKey, const std::chrono::milliseconds uploadDuration); + void addSegFaultCount(unsigned segFaultCount); void getMetrics(std::ostringstream &metrics); diff --git a/wsd/AdminModel.cpp b/wsd/AdminModel.cpp index 22e402a83..f19d6e367 100644 --- a/wsd/AdminModel.cpp +++ b/wsd/AdminModel.cpp @@ -771,6 +771,11 @@ void AdminModel::setDocWopiUploadDuration(const std::string& docKey, const std:: it->second.setWopiUploadDuration(wopiUploadDuration); } +void AdminModel::addSegFaultCount(unsigned segFaultCount) +{ + _segFaultCount += segFaultCount; +} + int filterNumberName(const struct dirent *dir) { return !fnmatch("[0-9]*", dir->d_name, 0); @@ -993,6 +998,7 @@ void AdminModel::getMetrics(std::ostringstream &oss) oss << "kit_count " << kitStats.unassignedCount + kitStats.assignedCount << std::endl; oss << "kit_unassigned_count " << kitStats.unassignedCount << std::endl; oss << "kit_assigned_count " << kitStats.assignedCount << std::endl; + oss << "kit_segfault_count " << _segFaultCount << std::endl; PrintKitAggregateMetrics(oss, "thread_count", "", kitStats._threadCount); PrintKitAggregateMetrics(oss, "memory_used", "bytes", docStats._kitUsedMemory._all); PrintKitAggregateMetrics(oss, "cpu_time", "seconds", kitStats._cpuTime); diff --git a/wsd/AdminModel.hpp b/wsd/AdminModel.hpp index be5d9dd0f..b097289ef 100644 --- a/wsd/AdminModel.hpp +++ b/wsd/AdminModel.hpp @@ -249,6 +249,7 @@ class AdminModel public: AdminModel() : + _segFaultCount(0), _owner(std::this_thread::get_id()) { LOG_INF("AdminModel ctor."); @@ -316,6 +317,7 @@ public: void setViewLoadDuration(const std::string& docKey, const std::string& sessionId, std::chrono::milliseconds viewLoadDuration); void setDocWopiDownloadDuration(const std::string& docKey, std::chrono::milliseconds wopiDownloadDuration); void setDocWopiUploadDuration(const std::string& docKey, const std::chrono::milliseconds wopiUploadDuration); + void addSegFaultCount(unsigned segFaultCount); void setForKitPid(pid_t pid) { _forKitPid = pid; } void getMetrics(std::ostringstream &oss); @@ -360,6 +362,8 @@ private: uint64_t _sentBytesTotal; uint64_t _recvBytesTotal; + uint64_t _segFaultCount; + pid_t _forKitPid; /// We check the owner even in the release builds, needs to be always correct. diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp index e9e17b86f..c732169c7 100644 --- a/wsd/LOOLWSD.cpp +++ b/wsd/LOOLWSD.cpp @@ -835,9 +835,25 @@ void ForKitProcWSHandler::handleMessage(const std::vector<char> &data) const std::string firstLine = LOOLProtocol::getFirstLine(&data[0], data.size()); const StringVector tokens = LOOLProtocol::tokenize(firstLine.data(), firstLine.size()); - // Just add here the processing of specific received messages - - LOG_ERR("ForKitProcWSHandler: unknown command: " << tokens[0]); + if (tokens.equals(0, "segfaultcount")) + { + int count = std::stoi(tokens[1]); + if (count >= 0) + { +#if !MOBILEAPP + Admin::instance().addSegFaultCount(count); +#endif + LOG_INF(count << " loolkit processes crashed with segmentation fault."); + } + else + { + LOG_WRN("Invalid 'segfaultcount' message received."); + } + } + else + { + LOG_ERR("ForKitProcWSHandler: unknown command: " << tokens[0]); + } } LOOLWSD::LOOLWSD() _______________________________________________ Libreoffice-commits mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
