This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit fe73f584140cc1376418479421ae35c702f69326
Author: Joe McDonnell <joemcdonn...@cloudera.com>
AuthorDate: Wed May 24 11:08:49 2023 -0700

    IMPALA-12154: Trim recency list appropriately for LIRS ToUninitialize()
    
    The LIRS implementation relies on the invariant that the
    last entry on the recency list is PROTECTED. This is not
    being enforced properly when the code removes an entry
    from the recency list in ToUninitialized(). This comes
    up when an entry is erased or when a protected element
    is overwritten.
    
    This modifies ToUninitialized() to call TrimRecencyList()
    when removing the last entry on the recency list. To
    avoid recursion, it avoids this logic when TrimRecencyList()
    itself is calling ToUninitialized().
    
    Tests:
     - Added a unit tests for erasing/overwriting the last entry
       on the recency list
    
    Change-Id: I83298e0a042174a09f0144aa336b2eb2b28bfee8
    Reviewed-on: http://gerrit.cloudera.org:8080/19929
    Reviewed-by: Joe McDonnell <joemcdonn...@cloudera.com>
    Reviewed-by: Michael Smith <michael.sm...@cloudera.com>
    Tested-by: Michael Smith <michael.sm...@cloudera.com>
---
 be/src/util/cache/lirs-cache-test.cc | 99 +++++++++++++++++++++++++++++++++++-
 be/src/util/cache/lirs-cache.cc      | 32 +++++++++---
 2 files changed, 123 insertions(+), 8 deletions(-)

diff --git a/be/src/util/cache/lirs-cache-test.cc 
b/be/src/util/cache/lirs-cache-test.cc
index 769512c01..9753352e9 100644
--- a/be/src/util/cache/lirs-cache-test.cc
+++ b/be/src/util/cache/lirs-cache-test.cc
@@ -285,7 +285,7 @@ TEST_F(LIRSCacheTest, InsertExistingProtected) {
 
   FillCache();
 
-  // Replace an unprotected key with a new value
+  // Replace a protected key with a new value
   Insert(25, 1025);
   ASSERT_EQ(1, evicted_keys_.size());
   ASSERT_EQ(evicted_keys_[0], 25);
@@ -314,6 +314,56 @@ TEST_F(LIRSCacheTest, InsertExistingProtected) {
   }
 }
 
+TEST_F(LIRSCacheTest, InsertExistingProtectedNeedsTrim) {
+  // This is the same as InsertExistingProtected, except that it is verifying 
that
+  // replacing an existing protected key that is the last entry on the recency
+  // list will trim the recency list.
+
+  FillCache();
+
+  // Lookup every protected value except #25
+  // This doesn't change which keys are protected vs unprotected, but
+  // it changes the order of the elements on the recency list.
+  // 25 will be the oldest element, then there will be all of the
+  // unprotected elements, then all the other protected elements.
+  for (int i = 0; i < 95; ++i) {
+    if (i != 25) {
+      ASSERT_EQ(Lookup(i), i);
+    }
+  }
+
+  // Replace this last protected key with a new value. When this entry is 
inserted,
+  // it will remove the old protected key. This makes the unprotected entries 
the
+  // last on the list, so they will be trimmed off of the recency list.
+  Insert(25, 1025);
+  ASSERT_EQ(1, evicted_keys_.size());
+  ASSERT_EQ(evicted_keys_[0], 25);
+  ASSERT_EQ(evicted_values_[0], 25);
+  evicted_keys_.clear();
+  evicted_values_.clear();
+
+  // If the recency list wasn't trimmed appropriately, then this would hit an 
assert.
+  FlushCache();
+
+  // There were 5 unprotected elements (95-99). They were not impacted by 
changes in
+  // the protected elements.
+  for (int i = 0; i < 5; ++i) {
+    ASSERT_EQ(evicted_keys_[i], 95+i);
+    ASSERT_EQ(evicted_values_[i], 95+i);
+  }
+  // The only thing we guarantee is that key 25 is still protected. None of 
the other
+  // protected elements moved around, but the exact ordering is not specified.
+  for (int i = 5; i < 100; ++i) {
+    ASSERT_LT(evicted_keys_[i], 95);
+    ASSERT_GE(evicted_keys_[i], 0);
+    if (evicted_keys_[i] == 25) {
+      ASSERT_EQ(evicted_values_[i], 1025);
+    } else {
+      ASSERT_EQ(evicted_values_[i], evicted_keys_[i]);
+    }
+  }
+}
+
 TEST_F(LIRSCacheTest, UnprotectedToProtected) {
   // This tests the behavior of lookup of an unprotected key that is more 
recent than
   // the oldest protected key (i.e. it should be promoted to be protected).
@@ -495,6 +545,53 @@ TEST_F(LIRSCacheTest, Erase) {
   }
 }
 
+TEST_F(LIRSCacheTest, EraseNeedsTrim) {
+  // This is the same as InsertExistingProtectedNeedsTrim, except that it is 
verifying
+  // that erasing the last protected key on the recency list will trim the 
recency
+  // list.
+
+  FillCache();
+
+  // Lookup every protected value except #25
+  // This doesn't change which keys are protected vs unprotected, but
+  // it changes the order of the elements on the recency list.
+  // 25 will be the oldest element, then there will be all of the
+  // unprotected elements, then all the other protected elements.
+  for (int i = 0; i < 95; ++i) {
+    if (i != 25) {
+      ASSERT_EQ(Lookup(i), i);
+    }
+  }
+
+  // Replace this last protected key with a new value. When this entry is 
inserted,
+  // it will remove the old protected key. This makes the unprotected entries 
the
+  // last on the list, so they will be trimmed off of the recency list.
+  Erase(25);
+  ASSERT_EQ(1, evicted_keys_.size());
+  ASSERT_EQ(evicted_keys_[0], 25);
+  ASSERT_EQ(evicted_values_[0], 25);
+  evicted_keys_.clear();
+  evicted_values_.clear();
+
+  // If the recency list wasn't trimmed appropriately, then this would hit an 
assert.
+  FlushCache();
+
+  // There were 5 unprotected elements (95-99). They were not impacted by 
changes in
+  // the protected elements.
+  for (int i = 0; i < 5; ++i) {
+    ASSERT_EQ(evicted_keys_[i], 95+i);
+    ASSERT_EQ(evicted_values_[i], 95+i);
+  }
+  // The only thing we guarantee is that key 25 is gone. None of the other
+  // protected elements moved around, but the exact ordering is not specified.
+  for (int i = 5; i < 99; ++i) {
+    ASSERT_NE(evicted_keys_[i], 25);
+    ASSERT_LT(evicted_keys_[i], 95);
+    ASSERT_GE(evicted_keys_[i], 0);
+    ASSERT_EQ(evicted_values_[i], evicted_keys_[i]);
+  }
+}
+
 TEST_F(LIRSCacheTest, TombstoneLimit1) {
   // This tests the enforcement of the tombstone limit. The 
lirs_tombstone_multiple is
   // 2.0, so we expect a cache with 100 elements to maintain at most 200 
tombstones.
diff --git a/be/src/util/cache/lirs-cache.cc b/be/src/util/cache/lirs-cache.cc
index 6b73e7414..9a83d7df5 100644
--- a/be/src/util/cache/lirs-cache.cc
+++ b/be/src/util/cache/lirs-cache.cc
@@ -406,7 +406,7 @@ class LIRSCacheShard : public CacheShard {
   void UnprotectedToTombstone(LIRSThreadState* tstate, LIRSHandle* e);
 
   // Exit the cache
-  void ToUninitialized(LIRSThreadState* tstate, LIRSHandle* e);
+  void ToUninitialized(LIRSThreadState* tstate, LIRSHandle* e, bool is_trim = 
false);
 
   // Functions move evictions and frees outside the critical section for 
mutex_ by
   // placing the affected entries on the LIRSThreadState. This function 
handles the
@@ -478,7 +478,7 @@ LIRSCacheShard::~LIRSCacheShard() {
   LIRSThreadState tstate;
   // Start with the recency list.
   while (!recency_list_.empty()) {
-    LIRSHandle* e = &*recency_list_.begin();
+    LIRSHandle* e = &recency_list_.front();
     DCHECK_NE(e->state(), UNINITIALIZED);
     DCHECK_EQ(e->num_references(), 0);
     // This removes it from the recency list and the unprotected list (if 
appropriate)
@@ -548,7 +548,7 @@ void 
LIRSCacheShard::EnforceProtectedCapacity(LIRSThreadState* tstate) {
   while (protected_usage_ > protected_capacity_) {
     DCHECK(!recency_list_.empty());
     // Get pointer to oldest entry and remove it
-    LIRSHandle* oldest = &*recency_list_.begin();
+    LIRSHandle* oldest = &recency_list_.front();
     // The oldest entry must be protected (i.e. the recency list must be 
trimmed)
     DCHECK_EQ(oldest->state(), PROTECTED);
     ProtectedToUnprotected(tstate, oldest);
@@ -562,9 +562,9 @@ void LIRSCacheShard::TrimRecencyList(LIRSThreadState* 
tstate) {
   // deleted. Unprotected entries still exist in the unprotected list, so 
UNPROTECTED
   // entries should only be removed from the recency list.
   while (!recency_list_.empty() && recency_list_.front().state() != PROTECTED) 
{
-    LIRSHandle* oldest = &*recency_list_.begin();
+    LIRSHandle* oldest = &recency_list_.front();
     if (oldest->state() == TOMBSTONE) {
-      ToUninitialized(tstate, oldest);
+      ToUninitialized(tstate, oldest, /* is_trim */ true);
     } else {
       DCHECK_EQ(oldest->state(), UNPROTECTED);
       recency_list_.pop_front();
@@ -622,7 +622,7 @@ void LIRSCacheShard::MoveToRecencyListBack(LIRSThreadState* 
tstate, LIRSHandle *
   bool need_trim = false;
   if (in_list) {
     // Is this the oldest entry in the list (the front)?
-    LIRSHandle* oldest = &*recency_list_.begin();
+    LIRSHandle* oldest = &recency_list_.front();
     // Invariant: the oldest entry in the list is always a protected entry
     CHECK_EQ(oldest->state(), PROTECTED);
     if (oldest == e) {
@@ -692,6 +692,8 @@ void 
LIRSCacheShard::UnprotectedToProtected(LIRSThreadState* tstate, LIRSHandle
 void LIRSCacheShard::ProtectedToUnprotected(LIRSThreadState* tstate, 
LIRSHandle* e) {
   DCHECK(!e->unprotected_tombstone_list_hook_.is_linked());
   DCHECK(e->recency_list_hook_.is_linked());
+  // Must be the oldest on the recency list
+  DCHECK(&recency_list_.front() == e);
   recency_list_.erase(recency_list_.iterator_to(*e));
   // Trim the recency list after the removal
   TrimRecencyList(tstate);
@@ -788,13 +790,29 @@ bool 
LIRSCacheShard::UninitializedToUnprotected(LIRSThreadState* tstate, LIRSHan
   return true;
 }
 
-void LIRSCacheShard::ToUninitialized(LIRSThreadState* tstate, LIRSHandle* e) {
+void LIRSCacheShard::ToUninitialized(LIRSThreadState* tstate, LIRSHandle* e,
+    bool is_trim) {
   DCHECK_NE(e->state(), UNINITIALIZED);
   LIRSHandle* removed_elem = static_cast<LIRSHandle*>(table_.Remove(e->key(), 
e->hash()));
   DCHECK(e == removed_elem || removed_elem == nullptr);
   // Remove from the list (if it is in the list)
   if (e->recency_list_hook_.is_linked()) {
+    // If we are removing the last entry on the recency list, then we may need 
to call
+    // TrimRecencyList to maintain our invariant that the last entry on the 
recency list
+    // is a PROTECTED element. TrimRecencyList itself calls this function 
while enforcing
+    // the invariant, so this passes is_trim=true from TrimRecencyList to 
avoid the
+    // unnecessary recursion.
+    bool need_trim = false;
+    // Is this the oldest entry in the list (the front)?
+    LIRSHandle* oldest = &recency_list_.front();
+    if (!is_trim and oldest == e) {
+      DCHECK_EQ(e->state(), PROTECTED);
+      need_trim = true;
+    }
     recency_list_.erase(recency_list_.iterator_to(*e));
+    if (need_trim) {
+      TrimRecencyList(tstate);
+    }
   }
   if (e->state() == UNPROTECTED) {
     if (e->unprotected_tombstone_list_hook_.is_linked()) {

Reply via email to