KUDU-2291 (part 2): Add a /stacks page This adds a simple /stacks web page which dumps the currently running threads in a plain text format.
Since we often have a lot of idle threadpool threads sitting around in the "wait for work" state, the output collapses all threads with matching stacks and only displays the stack once, making it more suitable for human consumption. Example output from a local kudu-master: https://gist.github.com/b64739ee5fb146ea1953380f57b996c4 Longer term we may want to integrate stack-tracing capability into the /threadz view as well, but for now I left this as a low-level utility which doesn't access the thread manager, etc. I left a few TODOs for further enhancements, but I've already found this helpful for understanding some perf anomalies while playing with YCSB, so let's get it committed and improve as we go. Change-Id: I8b8f6d50d44e40fd51357fdbfd8f9ba2ebaa724b Reviewed-on: http://gerrit.cloudera.org:8080/9253 Tested-by: Todd Lipcon <[email protected]> Reviewed-by: Mike Percy <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/270dd999 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/270dd999 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/270dd999 Branch: refs/heads/master Commit: 270dd999683e930d89cfa7865a1f85239b525183 Parents: c182d62 Author: Todd Lipcon <[email protected]> Authored: Wed Feb 7 19:30:39 2018 -0800 Committer: Mike Percy <[email protected]> Committed: Wed Feb 21 00:07:25 2018 +0000 ---------------------------------------------------------------------- .../integration-tests/linked_list-test-util.h | 14 +-- src/kudu/server/default_path_handlers.cc | 93 ++++++++++++++++++++ src/kudu/util/debug-util.cc | 6 +- 3 files changed, 105 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/270dd999/src/kudu/integration-tests/linked_list-test-util.h ---------------------------------------------------------------------- diff --git a/src/kudu/integration-tests/linked_list-test-util.h b/src/kudu/integration-tests/linked_list-test-util.h index 71c6758..3f873ed 100644 --- a/src/kudu/integration-tests/linked_list-test-util.h +++ b/src/kudu/integration-tests/linked_list-test-util.h @@ -312,22 +312,24 @@ class PeriodicWebUIChecker { // List of master and ts web pages to fetch std::vector<std::string> master_pages, ts_pages; - master_pages.emplace_back("/metrics"); + master_pages.emplace_back("/dump-entities"); master_pages.emplace_back("/masters"); + master_pages.emplace_back("/mem-trackers"); + master_pages.emplace_back("/metrics"); + master_pages.emplace_back("/stacks"); master_pages.emplace_back("/tables"); - master_pages.emplace_back("/dump-entities"); master_pages.emplace_back("/tablet-servers"); - master_pages.emplace_back("/mem-trackers"); + ts_pages.emplace_back("/maintenance-manager"); + ts_pages.emplace_back("/mem-trackers"); ts_pages.emplace_back("/metrics"); + ts_pages.emplace_back("/scans"); + ts_pages.emplace_back("/stacks"); ts_pages.emplace_back("/tablets"); if (!tablet_id.empty()) { ts_pages.push_back(strings::Substitute("/transactions?tablet_id=$0", tablet_id)); } - ts_pages.emplace_back("/maintenance-manager"); - ts_pages.emplace_back("/mem-trackers"); - ts_pages.emplace_back("/scans"); // Generate list of urls for each master and tablet server for (int i = 0; i < cluster.num_masters(); i++) { http://git-wip-us.apache.org/repos/asf/kudu/blob/270dd999/src/kudu/server/default_path_handlers.cc ---------------------------------------------------------------------- diff --git a/src/kudu/server/default_path_handlers.cc b/src/kudu/server/default_path_handlers.cc index 8322017..3ecc0fe 100644 --- a/src/kudu/server/default_path_handlers.cc +++ b/src/kudu/server/default_path_handlers.cc @@ -18,13 +18,17 @@ #include "kudu/server/default_path_handlers.h" #include <sys/stat.h> +#include <sys/types.h> #include <cstddef> #include <cstdint> #include <fstream> +#include <iterator> +#include <map> #include <memory> #include <string> #include <unordered_map> +#include <utility> #include <vector> #include <boost/algorithm/string/predicate.hpp> @@ -44,10 +48,13 @@ #include "kudu/gutil/strings/human_readable.h" #include "kudu/gutil/strings/numbers.h" #include "kudu/gutil/strings/split.h" +#include "kudu/gutil/strings/strip.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/server/pprof_path_handlers.h" #include "kudu/server/webserver.h" +#include "kudu/util/debug-util.h" #include "kudu/util/easy_json.h" +#include "kudu/util/env.h" #include "kudu/util/faststring.h" #include "kudu/util/flag_tags.h" #include "kudu/util/flags.h" @@ -55,6 +62,7 @@ #include "kudu/util/logging.h" #include "kudu/util/mem_tracker.h" #include "kudu/util/metrics.h" +#include "kudu/util/monotime.h" #include "kudu/util/process_memory.h" #include "kudu/util/status.h" #include "kudu/util/web_callback_registry.h" @@ -144,6 +152,87 @@ static void FlagsHandler(const Webserver::WebRequest& req, << tags.end_pre_tag; } +// Registered to handle "/stacks". +// +// Prints out the current stack trace of all threads in the process. +static void StacksHandler(const Webserver::WebRequest& /*req*/, + Webserver::PrerenderedWebResponse* resp) { + std::ostringstream* output = resp->output; + vector<pid_t> tids; + Status s = ListThreads(&tids); + if (!s.ok()) { + *output << "Failed to list threads: " << s.ToString(); + return; + } + struct Info { + pid_t tid; + Status status; + string thread_name; + StackTrace stack; + }; + std::multimap<string, Info> grouped_infos; + vector<Info> failed; + + // Capture all the stacks without symbolization initially so that + // the stack traces come from as close together in time as possible. + // + // TODO(todd): would be good to actually send the dump signal to all + // threads and then wait for them all to collect their traces, to get + // an even tighter snapshot. + MonoTime start = MonoTime::Now(); + for (int i = 0; i < tids.size(); i++) { + Info info; + info.tid = tids[i]; + + // Get the thread's name by reading proc. + // TODO(todd): should we have the dumped thread fill in its own name using + // prctl to avoid having to open and read /proc? Or maybe we should use the + // Kudu ThreadMgr to get the thread names for the cases where we are using + // the kudu::Thread wrapper at least. + faststring buf; + Status s = ReadFileToString(Env::Default(), + Substitute("/proc/self/task/$0/comm", info.tid), + &buf); + if (!s.ok()) { + info.thread_name = "<unknown name>"; + } else { + info.thread_name = buf.ToString(); + StripTrailingNewline(&info.thread_name); + } + + info.status = GetThreadStack(info.tid, &info.stack); + if (info.status.ok()) { + grouped_infos.emplace(info.stack.ToHexString(), std::move(info)); + } else { + failed.emplace_back(std::move(info)); + } + } + MonoDelta dur = MonoTime::Now() - start; + + *output << "Collected stacks from " << grouped_infos.size() << " threads in " + << dur.ToString() << "\n"; + if (!failed.empty()) { + *output << "Failed to collect stacks from " << failed.size() << " threads " + << "(they may have exited while we were iterating over the threads)\n"; + } + *output << "\n"; + for (auto it = grouped_infos.begin(); it != grouped_infos.end();) { + auto end_group = grouped_infos.equal_range(it->first).second; + const auto& stack = it->second.stack; + int num_in_group = std::distance(it, end_group); + if (num_in_group > 1) { + *output << num_in_group << " threads with same stack:\n"; + } + + while (it != end_group) { + const auto& info = it->second; + *output << "TID " << info.tid << "(" << info.thread_name << "):\n"; + ++it; + } + *output << stack.Symbolize() << "\n\n"; + } +} + // Registered to handle "/memz", and prints out memory allocation statistics. static void MemUsageHandler(const Webserver::WebRequest& req, Webserver::PrerenderedWebResponse* resp) { @@ -263,6 +352,10 @@ void AddDefaultPathHandlers(Webserver* webserver) { webserver->RegisterPathHandler("/config", "Configuration", ConfigurationHandler, styled, on_nav_bar); + webserver->RegisterPrerenderedPathHandler("/stacks", "Stacks", StacksHandler, + /*is_styled=*/false, + /*is_on_nav_bar=*/false); + AddPprofPathHandlers(webserver); } http://git-wip-us.apache.org/repos/asf/kudu/blob/270dd999/src/kudu/util/debug-util.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/debug-util.cc b/src/kudu/util/debug-util.cc index bd382a9..c8f077f 100644 --- a/src/kudu/util/debug-util.cc +++ b/src/kudu/util/debug-util.cc @@ -310,7 +310,9 @@ string DumpThreadStack(int64_t tid) { Status ListThreads(vector<pid_t> *tids) { -#if defined(__linux__) +#ifndef __linux__ + return Status::NotSupported("unable to list threads on this platform"); +#else DIR *dir = opendir("/proc/self/task/"); if (dir == NULL) { return Status::IOError("failed to open task dir", ErrnoToString(errno), errno); @@ -327,8 +329,8 @@ Status ListThreads(vector<pid_t> *tids) { } } closedir(dir); -#endif // defined(__linux__) return Status::OK(); +#endif // __linux__ } string GetStackTrace() {
