mem_tracker: fix race between FindTracker() and destructor In very rare cases, it's possible for an interleaving between these two functions to lead to a recursive lock acquisition of child_trackers_lock_ in the destructor.
For example: 1. The tracker hierarchy contains one parent (P) and one child (C1). 2. Thread 1 creates a second child (C2) parented to P. It has the sole ref to C2. 3. Thread 2 calls FindTracker() looking for C1. 4. Thread 2 runs as far as the loop in FindTrackerUnlocked(), getting descheduled just as it has locked a ref to C2. It also holds P's child_trackers_lock_. 5. Thread 1 is rescheduled and drops its ref to C2. 6. Thread 2 is rescheduled. It also drops its ref to C2, which was the last ref, so it runs C2's destructor. This acquires P's child_trackers_lock_, which it already owns. Boom. The fix is simple: when looking up a tracker, make a local copy of the list of children while holding child_trackers_lock_, then iterate without it. It gets a little complicated due to the unusual requirements of FindOrCreateTracker(); I ended up introducing a special static lock to handle that case. The rest of the changes are basically code motion. Unfortunately, the new test does not fail reliably without the fix. In my testing, it failed maybe once every few hundred runs. But it doesn't fail at all with the fix in place. Change-Id: I69610f782c7a00d161bfb48d2487c29c0309c985 Reviewed-on: http://gerrit.cloudera.org:8080/4614 Tested-by: Kudu Jenkins Reviewed-by: Todd Lipcon <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/64f9ab34 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/64f9ab34 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/64f9ab34 Branch: refs/heads/master Commit: 64f9ab34ffbc249cf72fc187308a1402888ba994 Parents: 0558aaa Author: Adar Dembo <[email protected]> Authored: Mon Oct 3 17:21:54 2016 -0700 Committer: Todd Lipcon <[email protected]> Committed: Wed Oct 5 21:29:23 2016 +0000 ---------------------------------------------------------------------- src/kudu/consensus/log_cache.cc | 4 +-- src/kudu/util/cache.cc | 2 +- src/kudu/util/mem_tracker-test.cc | 36 +++++++++++++++++++-- src/kudu/util/mem_tracker.cc | 58 +++++++++++++++++++--------------- src/kudu/util/mem_tracker.h | 34 ++++++-------------- 5 files changed, 78 insertions(+), 56 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/64f9ab34/src/kudu/consensus/log_cache.cc ---------------------------------------------------------------------- diff --git a/src/kudu/consensus/log_cache.cc b/src/kudu/consensus/log_cache.cc index dcb4ba2..8ac6578 100644 --- a/src/kudu/consensus/log_cache.cc +++ b/src/kudu/consensus/log_cache.cc @@ -85,8 +85,8 @@ LogCache::LogCache(const scoped_refptr<MetricEntity>& metric_entity, // Set up (or reuse) a tracker with the global limit. It is parented directly // to the root tracker so that it's always global. - parent_tracker_ = MemTracker::FindOrCreateTracker(global_max_ops_size_bytes, - kParentMemTrackerId); + parent_tracker_ = MemTracker::FindOrCreateGlobalTracker(global_max_ops_size_bytes, + kParentMemTrackerId); // And create a child tracker with the per-tablet limit. tracker_ = MemTracker::CreateTracker( http://git-wip-us.apache.org/repos/asf/kudu/blob/64f9ab34/src/kudu/util/cache.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/cache.cc b/src/kudu/util/cache.cc index 1ef16a0..9d46121 100644 --- a/src/kudu/util/cache.cc +++ b/src/kudu/util/cache.cc @@ -407,7 +407,7 @@ class ShardedLRUCache : public Cache { // A cache is often a singleton, so: // 1. We reuse its MemTracker if one already exists, and // 2. It is directly parented to the root MemTracker. - mem_tracker_ = MemTracker::FindOrCreateTracker( + mem_tracker_ = MemTracker::FindOrCreateGlobalTracker( -1, strings::Substitute("$0-sharded_lru_cache", id)); int num_shards = 1 << shard_bits_; http://git-wip-us.apache.org/repos/asf/kudu/blob/64f9ab34/src/kudu/util/mem_tracker-test.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/mem_tracker-test.cc b/src/kudu/util/mem_tracker-test.cc index 22a0bcd..11bf2a2 100644 --- a/src/kudu/util/mem_tracker-test.cc +++ b/src/kudu/util/mem_tracker-test.cc @@ -27,6 +27,7 @@ #include <boost/bind.hpp> #include <gperftools/malloc_extension.h> +#include "kudu/gutil/strings/substitute.h" #include "kudu/util/test_util.h" DECLARE_int32(memory_limit_soft_percentage); @@ -40,6 +41,7 @@ using std::shared_ptr; using std::string; using std::unordered_map; using std::vector; +using strings::Substitute; TEST(MemTrackerTest, SingleTrackerNoLimit) { shared_ptr<MemTracker> t = MemTracker::CreateTracker(-1, "t"); @@ -218,7 +220,7 @@ TEST(MemTrackerTest, FindFunctionsTakeOwnership) { { shared_ptr<MemTracker> m = MemTracker::CreateTracker(-1, "test"); - ref = MemTracker::FindOrCreateTracker(-1, m->id()); + ref = MemTracker::FindOrCreateGlobalTracker(-1, m->id()); } LOG(INFO) << ref->ToString(); ref.reset(); @@ -337,7 +339,7 @@ TEST(MemTrackerTest, CollisionDetection) { MemTracker::FindTracker("parent", &found); }, kDeathMsg); EXPECT_DEATH({ - MemTracker::FindOrCreateTracker(-1, "parent"); + MemTracker::FindOrCreateGlobalTracker(-1, "parent"); }, kDeathMsg); #endif } @@ -348,7 +350,8 @@ TEST(MemTrackerTest, TestMultiThreadedRegisterAndDestroy) { for (int i = 0; i < 10; i++) { threads.emplace_back([&done]{ while (!done.load()) { - shared_ptr<MemTracker> t = MemTracker::FindOrCreateTracker(1000, "foo"); + shared_ptr<MemTracker> t = MemTracker::FindOrCreateGlobalTracker( + 1000, "foo"); } }); } @@ -360,4 +363,31 @@ TEST(MemTrackerTest, TestMultiThreadedRegisterAndDestroy) { } } +TEST(MemTrackerTest, TestMultiThreadedCreateFind) { + shared_ptr<MemTracker> p = MemTracker::CreateTracker(-1, "p"); + shared_ptr<MemTracker> c1 = MemTracker::CreateTracker(-1, "c1", p); + std::atomic<bool> done(false); + vector<std::thread> threads; + threads.emplace_back([&]{ + while (!done.load()) { + shared_ptr<MemTracker> c1_copy; + CHECK(MemTracker::FindTracker(c1->id(), &c1_copy, p)); + } + }); + for (int i = 0; i < 5; i++) { + threads.emplace_back([&, i]{ + while (!done.load()) { + shared_ptr<MemTracker> c2 = + MemTracker::CreateTracker(-1, Substitute("ci-$0", i), p); + } + }); + } + + SleepFor(MonoDelta::FromMilliseconds(500)); + done.store(true); + for (auto& t : threads) { + t.join(); + } +} + } // namespace kudu http://git-wip-us.apache.org/repos/asf/kudu/blob/64f9ab34/src/kudu/util/mem_tracker.cc ---------------------------------------------------------------------- diff --git a/src/kudu/util/mem_tracker.cc b/src/kudu/util/mem_tracker.cc index 51ee211..b247cb4 100644 --- a/src/kudu/util/mem_tracker.cc +++ b/src/kudu/util/mem_tracker.cc @@ -148,16 +148,9 @@ shared_ptr<MemTracker> MemTracker::CreateTracker(int64_t byte_limit, const string& id, const shared_ptr<MemTracker>& parent) { shared_ptr<MemTracker> real_parent = parent ? parent : GetRootTracker(); - MutexLock l(real_parent->child_trackers_lock_); - return CreateTrackerUnlocked(byte_limit, id, real_parent); -} - -shared_ptr<MemTracker> MemTracker::CreateTrackerUnlocked(int64_t byte_limit, - const string& id, - const shared_ptr<MemTracker>& parent) { - DCHECK(parent); - shared_ptr<MemTracker> tracker(new MemTracker(ConsumptionFunction(), byte_limit, id, parent)); - parent->AddChildTrackerUnlocked(tracker); + shared_ptr<MemTracker> tracker( + new MemTracker(ConsumptionFunction(), byte_limit, id, real_parent)); + real_parent->AddChildTracker(tracker); tracker->Init(); return tracker; @@ -213,18 +206,27 @@ string MemTracker::ToString() const { bool MemTracker::FindTracker(const string& id, shared_ptr<MemTracker>* tracker, const shared_ptr<MemTracker>& parent) { - shared_ptr<MemTracker> real_parent = parent ? parent : GetRootTracker(); - MutexLock l(real_parent->child_trackers_lock_); - return FindTrackerUnlocked(id, tracker, real_parent); + return FindTrackerInternal(id, tracker, parent ? parent : GetRootTracker()); } -bool MemTracker::FindTrackerUnlocked(const string& id, +bool MemTracker::FindTrackerInternal(const string& id, shared_ptr<MemTracker>* tracker, const shared_ptr<MemTracker>& parent) { DCHECK(parent != NULL); - parent->child_trackers_lock_.AssertAcquired(); + + list<weak_ptr<MemTracker>> children; + { + MutexLock l(parent->child_trackers_lock_); + children = parent->child_trackers_; + } + + // Search for the matching child without holding the parent's lock. + // + // If the lock were held while searching, it'd be possible for 'child' to be + // the last live ref to a tracker, which would lead to a recursive + // acquisition of the parent lock during the 'child' destructor call. vector<shared_ptr<MemTracker>> found; - for (const auto& child_weak : parent->child_trackers_) { + for (const auto& child_weak : children) { shared_ptr<MemTracker> child = child_weak.lock(); if (child && child->id() == id) { found.emplace_back(std::move(child)); @@ -243,16 +245,22 @@ bool MemTracker::FindTrackerUnlocked(const string& id, return false; } -shared_ptr<MemTracker> MemTracker::FindOrCreateTracker(int64_t byte_limit, - const string& id, - const shared_ptr<MemTracker>& parent) { - shared_ptr<MemTracker> real_parent = parent ? parent : GetRootTracker(); - MutexLock l(real_parent->child_trackers_lock_); +shared_ptr<MemTracker> MemTracker::FindOrCreateGlobalTracker( + int64_t byte_limit, + const string& id) { + // The calls below comprise a critical section, but we can't use the root + // tracker's child_trackers_lock_ to synchronize it as the lock must be + // released during FindTrackerInternal(). Since this function creates + // globally-visible MemTrackers which are the exception rather than the rule, + // it's reasonable to synchronize their creation on a singleton lock. + static Mutex find_or_create_lock; + MutexLock l(find_or_create_lock); + shared_ptr<MemTracker> found; - if (FindTrackerUnlocked(id, &found, real_parent)) { + if (FindTrackerInternal(id, &found, GetRootTracker())) { return found; } - return CreateTrackerUnlocked(byte_limit, id, real_parent); + return CreateTracker(byte_limit, id, GetRootTracker()); } void MemTracker::ListTrackers(vector<shared_ptr<MemTracker>>* trackers) { @@ -555,8 +563,8 @@ void MemTracker::Init() { DCHECK_EQ(all_trackers_[0], this); } -void MemTracker::AddChildTrackerUnlocked(const shared_ptr<MemTracker>& tracker) { - child_trackers_lock_.AssertAcquired(); +void MemTracker::AddChildTracker(const shared_ptr<MemTracker>& tracker) { + MutexLock l(child_trackers_lock_); tracker->child_tracker_it_ = child_trackers_.insert(child_trackers_.end(), tracker); } http://git-wip-us.apache.org/repos/asf/kudu/blob/64f9ab34/src/kudu/util/mem_tracker.h ---------------------------------------------------------------------- diff --git a/src/kudu/util/mem_tracker.h b/src/kudu/util/mem_tracker.h index ae6de74..cd00e22 100644 --- a/src/kudu/util/mem_tracker.h +++ b/src/kudu/util/mem_tracker.h @@ -118,18 +118,14 @@ class MemTracker : public std::enable_shared_from_this<MemTracker> { std::shared_ptr<MemTracker>* tracker, const std::shared_ptr<MemTracker>& parent = std::shared_ptr<MemTracker>()); - // If a tracker with the specified 'id' and 'parent' exists in the tree, - // returns a shared_ptr to that instance. Otherwise, creates a new - // MemTracker with the specified byte_limit, id, and parent. - // - // Use the two argument form if there is no parent. + // If a global tracker with the specified 'id' exists in the tree, returns a + // shared_ptr to that instance. Otherwise, creates a new MemTracker with the + // specified byte_limit and id, parented to the root MemTracker. // // Note: this function will enforce that 'id' is unique amongst the children - // of 'parent'. - static std::shared_ptr<MemTracker> FindOrCreateTracker( - int64_t byte_limit, - const std::string& id, - const std::shared_ptr<MemTracker>& parent = std::shared_ptr<MemTracker>()); + // of the root MemTracker. + static std::shared_ptr<MemTracker> FindOrCreateGlobalTracker( + int64_t byte_limit, const std::string& id); // Returns a list of all the valid trackers. static void ListTrackers(std::vector<std::shared_ptr<MemTracker> >* trackers); @@ -258,9 +254,7 @@ class MemTracker : public std::enable_shared_from_this<MemTracker> { void Init(); // Adds tracker to child_trackers_. - // - // child_trackers_lock_ must be held. - void AddChildTrackerUnlocked(const std::shared_ptr<MemTracker>& tracker); + void AddChildTracker(const std::shared_ptr<MemTracker>& tracker); // Logs the stack of the current consume/release. Used for debugging only. void LogUpdate(bool is_consume, int64_t bytes) const; @@ -268,18 +262,8 @@ class MemTracker : public std::enable_shared_from_this<MemTracker> { static std::string LogUsage(const std::string& prefix, const std::list<std::weak_ptr<MemTracker>>& trackers); - // Variant of CreateTracker() that: - // 1. Must be called with a non-NULL parent, and - // 2. Must be called with parent->child_trackers_lock_ held. - static std::shared_ptr<MemTracker> CreateTrackerUnlocked( - int64_t byte_limit, - const std::string& id, - const std::shared_ptr<MemTracker>& parent); - - // Variant of FindTracker() that: - // 1. Must be called with a non-NULL parent, and - // 2. Must be called with parent->child_trackers_lock_ held. - static bool FindTrackerUnlocked( + // Variant of FindTracker() that must be called with a non-NULL parent. + static bool FindTrackerInternal( const std::string& id, std::shared_ptr<MemTracker>* tracker, const std::shared_ptr<MemTracker>& parent);
