Remove tcmalloc_spinlock_contention metric Spinlock contention was removed from tcmalloc upstream, so this metric has been set to 0 since we upgraded to tcmalloc 2.6. I looked briefly at reverting its removal upstream, but the revert would be fairly large, so I think it's best to just accept the loss of this instrumentation.
Change-Id: I2c29b81a2c692d5d876d2dd5381ba2c295684324 Reviewed-on: http://gerrit.cloudera.org:8080/9595 Tested-by: Todd Lipcon <t...@apache.org> Reviewed-by: Adar Dembo <a...@cloudera.com> (cherry picked from commit 4889a9156a7e1f51b076f4bc9f44621a1b2f46e4) Reviewed-on: http://gerrit.cloudera.org:8080/9608 Tested-by: Grant Henke <granthe...@gmail.com> Reviewed-by: Grant Henke <granthe...@gmail.com> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/9e20ca67 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/9e20ca67 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/9e20ca67 Branch: refs/heads/branch-1.7.x Commit: 9e20ca67020e3ec308fa831b9ca6f1475ce59255 Parents: 7eb1a9f Author: Todd Lipcon <t...@apache.org> Authored: Mon Mar 12 17:26:19 2018 -0700 Committer: Grant Henke <granthe...@gmail.com> Committed: Tue Mar 13 21:18:27 2018 +0000 ---------------------------------------------------------------------- docs/release_notes.adoc | 2 ++ src/kudu/scripts/parse_metrics_log.py | 1 - src/kudu/util/spinlock_profiling-test.cc | 10 ------ src/kudu/util/spinlock_profiling.cc | 47 --------------------------- src/kudu/util/spinlock_profiling.h | 4 --- src/kudu/util/trace.h | 26 ++------------- src/kudu/util/trace_metrics.h | 20 ++---------- 7 files changed, 7 insertions(+), 103 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/9e20ca67/docs/release_notes.adoc ---------------------------------------------------------------------- diff --git a/docs/release_notes.adoc b/docs/release_notes.adoc index 916282c..482dc83 100644 --- a/docs/release_notes.adoc +++ b/docs/release_notes.adoc @@ -40,6 +40,8 @@ [[rn_1.7.0_obsoletions]] == Obsoletions +* The `tcmalloc_contention_time` metric, which previously tracked the amount + of time spent in memory allocator lock contention, has been removed. [[rn_1.7.0_deprecations]] == Deprecations http://git-wip-us.apache.org/repos/asf/kudu/blob/9e20ca67/src/kudu/scripts/parse_metrics_log.py ---------------------------------------------------------------------- diff --git a/src/kudu/scripts/parse_metrics_log.py b/src/kudu/scripts/parse_metrics_log.py index 3b58991..75406ad 100644 --- a/src/kudu/scripts/parse_metrics_log.py +++ b/src/kudu/scripts/parse_metrics_log.py @@ -63,7 +63,6 @@ RATE_METRICS = [ # ("server.voluntary_context_switches", "vol_cs"), # ("tablet.rows_inserted", "inserts_per_sec"), # ("tablet.rows_upserted", "upserts_per_sec"), - # ("server.tcmalloc_contention_time", "tcmalloc_contention_time") ] # These metrics will be extracted as percentile metrics into the TSV. http://git-wip-us.apache.org/repos/asf/kudu/blob/9e20ca67/src/kudu/util/spinlock_profiling-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/spinlock_profiling-test.cc b/src/kudu/util/spinlock_profiling-test.cc index 5a9fd6b..28089df 100644 --- a/src/kudu/util/spinlock_profiling-test.cc +++ b/src/kudu/util/spinlock_profiling-test.cc @@ -43,10 +43,6 @@ namespace gutil { extern void SubmitSpinLockProfileData(const void *, int64); } // namespace gutil -namespace base { -extern void SubmitSpinLockProfileData(const void *, int64); -} // namespace base - namespace kudu { class SpinLockProfilingTest : public KuduTest {}; @@ -82,10 +78,4 @@ TEST_F(SpinLockProfilingTest, TestStackCollection) { ASSERT_EQ(0, dropped); } -TEST_F(SpinLockProfilingTest, TestTcmallocContention) { - StartSynchronizationProfiling(); - base::SubmitSpinLockProfileData(nullptr, 12345); - ASSERT_GE(GetTcmallocContentionMicros(), 0); -} - } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/9e20ca67/src/kudu/util/spinlock_profiling.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/spinlock_profiling.cc b/src/kudu/util/spinlock_profiling.cc index b6abf59..5660760 100644 --- a/src/kudu/util/spinlock_profiling.cc +++ b/src/kudu/util/spinlock_profiling.cc @@ -38,7 +38,6 @@ #include "kudu/util/metrics.h" #include "kudu/util/striped64.h" #include "kudu/util/trace.h" -#include "kudu/util/trace_metrics.h" DEFINE_int32(lock_contention_trace_threshold_cycles, 2000000, // 2M cycles should be about 1ms @@ -53,12 +52,6 @@ METRIC_DEFINE_gauge_uint64(server, spinlock_contention_time, "started. If this increases rapidly, it may indicate a performance issue in Kudu " "internals triggered by a particular workload and warrant investigation.", kudu::EXPOSE_AS_COUNTER); -METRIC_DEFINE_gauge_uint64(server, tcmalloc_contention_time, - "TCMalloc Contention Time", kudu::MetricUnit::kMicroseconds, - "Amount of time consumed by contention on tcmalloc's locks since the server " - "started. If this increases rapidly, it may indicate a performance issue in Kudu " - "internals triggered by a particular workload and warrant investigation.", - kudu::EXPOSE_AS_COUNTER); using base::SpinLock; @@ -72,18 +65,6 @@ static LongAdder* g_contended_cycles = nullptr; namespace { -// We can't use LongAdder for tcmalloc contention, because its -// implementation can allocate memory, and doing so is prohibited -// in the tcmalloc contention callback. -// -// We pad and align this struct to two cachelines to avoid any false -// sharing with the g_contended_cycles counter or any other globals. -struct PaddedAtomic64 { - Atomic64 val; - char padding[CACHELINE_SIZE * 2 - sizeof(Atomic64)]; -} CACHELINE_ALIGNED; -static PaddedAtomic64 g_tcmalloc_contention; - // Implements a very simple linear-probing hashtable of stack traces with // a fixed number of entries. // @@ -291,10 +272,6 @@ void RegisterSpinLockContentionMetrics(const scoped_refptr<MetricEntity>& entity entity->NeverRetire( METRIC_spinlock_contention_time.InstantiateFunctionGauge( entity, Bind(&GetSpinLockContentionMicros))); - entity->NeverRetire( - METRIC_tcmalloc_contention_time.InstantiateFunctionGauge( - entity, Bind(&GetTcmallocContentionMicros))); - } uint64_t GetSpinLockContentionMicros() { @@ -304,13 +281,6 @@ uint64_t GetSpinLockContentionMicros() { return implicit_cast<int64_t>(micros); } -uint64_t GetTcmallocContentionMicros() { - int64_t wait_cycles = base::subtle::NoBarrier_Load(&g_tcmalloc_contention.val); - double micros = static_cast<double>(wait_cycles) / base::CyclesPerSecond() - * kMicrosPerSecond; - return implicit_cast<int64_t>(micros); -} - void StartSynchronizationProfiling() { InitSpinLockContentionProfiling(); base::subtle::Barrier_AtomicIncrement(&g_profiling_enabled, 1); @@ -335,20 +305,3 @@ void SubmitSpinLockProfileData(const void *contendedlock, int64_t wait_cycles) { kudu::SubmitSpinLockProfileData(contendedlock, wait_cycles); } } // namespace gutil - -// tcmalloc's internal spinlocks also support submitting contention metrics -// using the base::SubmitSpinLockProfileData weak symbol. However, this function might be -// called while tcmalloc's heap lock is held. Thus, we cannot allocate memory here or else -// we risk a deadlock. So, this implementation just does the bare minimum to expose -// tcmalloc contention. -namespace base { -void SubmitSpinLockProfileData(const void* /* contendedlock */, int64_t wait_cycles) { -#if !defined(__APPLE__) - auto t = kudu::Trace::CurrentTrace(); - if (t) { - t->metrics()->IncrementTcmallocContentionCycles(wait_cycles); - } -#endif // !defined(__APPLE__) - base::subtle::NoBarrier_AtomicIncrement(&kudu::g_tcmalloc_contention.val, wait_cycles); -} -} // namespace base http://git-wip-us.apache.org/repos/asf/kudu/blob/9e20ca67/src/kudu/util/spinlock_profiling.h ---------------------------------------------------------------------- diff --git a/src/kudu/util/spinlock_profiling.h b/src/kudu/util/spinlock_profiling.h index 3bfb09e..702eb18 100644 --- a/src/kudu/util/spinlock_profiling.h +++ b/src/kudu/util/spinlock_profiling.h @@ -38,10 +38,6 @@ void InitSpinLockContentionProfiling(); // since the server started. uint64_t GetSpinLockContentionMicros(); -// Return the total number of microseconds spent in tcmalloc contention -// since the server started. -uint64_t GetTcmallocContentionMicros(); - // Register metrics in the given server entity which measure the amount of // spinlock contention. void RegisterSpinLockContentionMetrics(const scoped_refptr<MetricEntity>& entity); http://git-wip-us.apache.org/repos/asf/kudu/blob/9e20ca67/src/kudu/util/trace.h ---------------------------------------------------------------------- diff --git a/src/kudu/util/trace.h b/src/kudu/util/trace.h index a24d252..1c29fa9 100644 --- a/src/kudu/util/trace.h +++ b/src/kudu/util/trace.h @@ -256,30 +256,10 @@ class ScopedAdoptTrace { } ~ScopedAdoptTrace() { - auto t = Trace::threadlocal_trace_; - Trace::threadlocal_trace_ = old_trace_; - - // It's critical that we Release() the reference count on 't' only - // after we've unset the thread-local variable. Otherwise, we can hit - // a nasty interaction with tcmalloc contention profiling. Consider - // the following sequence: - // - // 1. threadlocal_trace_ has refcount = 1 - // 2. we call threadlocal_trace_->Release() which decrements refcount to 0 - // 3. this calls 'delete' on the Trace object - // 3a. this calls tcmalloc free() on the Trace and various sub-objects - // 3b. the free() calls may end up experiencing contention in tcmalloc - // 3c. we try to account the contention in threadlocal_trace_'s TraceMetrics, - // but it has already been freed. - // - // In the best case, we just scribble into some free tcmalloc memory. In the - // worst case, tcmalloc would have already re-used this memory for a new - // allocation on another thread, and we end up overwriting someone else's memory. - // - // Waiting to Release() only after 'unpublishing' the trace solves this. - if (t) { - t->Release(); + if (Trace::threadlocal_trace_) { + Trace::threadlocal_trace_->Release(); } + Trace::threadlocal_trace_ = old_trace_; DFAKE_SCOPED_LOCK_THREAD_LOCKED(ctor_dtor_); } http://git-wip-us.apache.org/repos/asf/kudu/blob/9e20ca67/src/kudu/util/trace_metrics.h ---------------------------------------------------------------------- diff --git a/src/kudu/util/trace_metrics.h b/src/kudu/util/trace_metrics.h index f451495..58a26ef 100644 --- a/src/kudu/util/trace_metrics.h +++ b/src/kudu/util/trace_metrics.h @@ -38,7 +38,7 @@ namespace kudu { // are plausible. class TraceMetrics { public: - TraceMetrics() : tcmalloc_contention_cycles_(0) {} + TraceMetrics() {} ~TraceMetrics() {} // Internalize the given string by duplicating it into a process-wide @@ -59,14 +59,6 @@ class TraceMetrics { // Return a copy of the current counter map. std::map<const char*, int64_t> Get() const; - // Increment the number of cycles spent in tcmalloc lock contention. - // - // tcmalloc contention is stored separately from other metrics since - // it is incremented from a code path that prohibits allocation. - void IncrementTcmallocContentionCycles(int64_t cycles) { - tcmalloc_contention_cycles_.IncrementBy(cycles); - } - // Return metric's current value. // // NOTE: the 'name' MUST be the same const char* which is used for @@ -76,7 +68,6 @@ class TraceMetrics { private: mutable simple_spinlock lock_; std::map<const char*, int64_t> counters_; - AtomicInt<int64_t> tcmalloc_contention_cycles_; DISALLOW_COPY_AND_ASSIGN(TraceMetrics); }; @@ -88,14 +79,7 @@ inline void TraceMetrics::Increment(const char* name, int64_t amount) { inline std::map<const char*, int64_t> TraceMetrics::Get() const { std::unique_lock<simple_spinlock> l(lock_); - auto m = counters_; - l.unlock(); - - auto v = tcmalloc_contention_cycles_.Load(); - if (v > 0) { - m["tcmalloc_contention_cycles"] = v; - } - return m; + return counters_; } inline int64_t TraceMetrics::GetMetric(const char* name) const {