This is an automated email from the ASF dual-hosted git repository.
domgarguilo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push:
new 7e843611d2 Improve locking mechanism in ClientTabletCacheImpl (#5538)
7e843611d2 is described below
commit 7e843611d21a7c39b6846e56c434f8b4172e5f0c
Author: Dom G. <[email protected]>
AuthorDate: Tue Jun 3 16:22:30 2025 -0400
Improve locking mechanism in ClientTabletCacheImpl (#5538)
* Improve locking mechanism in ClientTabletCacheImpl
* Make ClientTabletCacheImplTest more concurrently strenuous
---
.../core/clientImpl/ClientTabletCacheImpl.java | 31 +++++------
.../core/clientImpl/ClientTabletCacheImplTest.java | 62 +++++++++++++---------
2 files changed, 50 insertions(+), 43 deletions(-)
diff --git
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
index 803188b5a2..9d7345998d 100644
---
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
+++
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
@@ -690,25 +690,20 @@ public class ClientTabletCacheImpl extends
ClientTabletCache {
metadataRow.append(row.getBytes(), 0, row.getLength());
CachedTablet ptl = parent.findTablet(context, metadataRow, false,
LocationNeed.REQUIRED);
- if (ptl != null) {
- // Only allow a single lookup at time per parent tablet. For example if
a tables tablets are
- // all stored in three metadata tablets, then that table could have up
to three concurrent
- // metadata lookups.
- Timer timer = Timer.startNew();
- try (var unused = lookupLocks.lock(ptl.getExtent())) {
- // See if entry was added to cache by another thread while we were
waiting on the lock
- var cached = findTabletInCache(row);
- if (cached != null && cached.getCreationTimer().startedAfter(timer)) {
- // This cache entry was added after we started waiting on the lock
so lets use it and not
- // go to the metadata table. This means another thread was holding
the lock and doing
- // metadata lookups when we requested the lock.
- return;
- }
- // Lookup tablets in metadata table and update cache. Also updating
the cache while holding
- // the lock is important as it ensures other threads that are waiting
on the lock will see
- // what this thread found and may be able to avoid metadata lookups.
- lookupTablet(context, lcSession, ptl, metadataRow);
+ if (ptl == null) {
+ return;
+ }
+ // detect if another thread populated cache while waiting for lock
+ CachedTablet before = findTabletInCache(row);
+ try (var unused = lookupLocks.lock(ptl.getExtent())) {
+ CachedTablet after = findTabletInCache(row);
+ if (after != null && after != before && lcSession.checkLock(after) !=
null) {
+ return;
}
+ // Lookup tablets in metadata table and update cache. Also updating the
cache while holding
+ // the lock is important as it ensures other threads that are waiting on
the lock will see
+ // what this thread found and may be able to avoid metadata lookups.
+ lookupTablet(context, lcSession, ptl, metadataRow);
}
}
diff --git
a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
index 87855d2e12..372af80083 100644
---
a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
+++
b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
@@ -19,6 +19,7 @@
package org.apache.accumulo.core.clientImpl;
import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.accumulo.core.util.LazySingletons.RANDOM;
import static org.easymock.EasyMock.replay;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -1986,45 +1987,56 @@ public class ClientTabletCacheImplTest {
setLocation(tservers, "tserver3", mte2, ke3, "tserver9");
var executor = Executors.newCachedThreadPool();
- List<Future<CachedTablet>> futures = new ArrayList<>();
+ final int lookupCount = 128;
+ final int roundCount = 8;
- // start 64 threads all trying to lookup data in the cache, should see
only two threads do a
- // concurrent lookup in the metadata table and no more or less.
- List<String> rowsToLookup = new ArrayList<>();
+ List<Future<CachedTablet>> futures = new ArrayList<>(roundCount *
lookupCount);
- for (int i = 0; i < 64; i++) {
- String lookup = (char) ('a' + (i % 26)) + "";
- rowsToLookup.add(lookup);
- }
+ // multiple rounds to increase the chance of contention
+ for (int round = 0; round < roundCount; round++) {
- Collections.shuffle(rowsToLookup);
-
- for (var lookup : rowsToLookup) {
- var future = executor.submit(() -> {
- var loc = metaCache.findTablet(context, new Text(lookup), false,
LocationNeed.REQUIRED);
- if (lookup.compareTo("m") <= 0) {
- assertEquals("tserver7", loc.getTserverLocation().orElseThrow());
- } else if (lookup.compareTo("q") <= 0) {
- assertEquals("tserver8", loc.getTserverLocation().orElseThrow());
- } else {
- assertEquals("tserver9", loc.getTserverLocation().orElseThrow());
- }
- return loc;
- });
- futures.add(future);
+ // start a bunch of threads all trying to lookup data in the cache
+ // should see exactly 2 threads doing metadata lookups at a time
+ List<String> rowsToLookup = new ArrayList<>(lookupCount);
+
+ for (int i = 0; i < lookupCount; i++) {
+ String lookup = (char) ('a' + (i % 26)) + "";
+ rowsToLookup.add(lookup);
+ }
+
+ Collections.shuffle(rowsToLookup, RANDOM.get());
+
+ for (var lookup : rowsToLookup) {
+ var future = executor.submit(() -> {
+ if (RANDOM.get().nextInt(10) < 3) {
+ Thread.yield();
+ }
+ var loc = metaCache.findTablet(context, new Text(lookup), false,
LocationNeed.REQUIRED);
+ if (lookup.compareTo("m") <= 0) {
+ assertEquals("tserver7", loc.getTserverLocation().orElseThrow());
+ } else if (lookup.compareTo("q") <= 0) {
+ assertEquals("tserver8", loc.getTserverLocation().orElseThrow());
+ } else {
+ assertEquals("tserver9", loc.getTserverLocation().orElseThrow());
+ }
+ return loc;
+ });
+ futures.add(future);
+ }
}
for (var future : futures) {
assertNotNull(future.get());
}
- assertTrue(sawTwoActive.get());
+ assertTrue(sawTwoActive.get(), "Expected to see exactly two lookups.");
// The second metadata tablet (mte2) contains two user tablets (ke2 and
ke3). Depending on which
// of these two user tablets is looked up in the metadata table first will
see a total of 2 or 3
// lookups. If the location of ke2 is looked up first then it will get the
locations of ke2 and
// ke3 from mte2 and put them in the cache. If the location of ke3 is
looked up first then it
// will only get the location of ke3 from mte2 and not ke2.
- assertTrue(lookups.size() == 2 || lookups.size() == 3, lookups::toString);
+ assertTrue(lookups.size() == 2 || lookups.size() == 3,
+ "Expected 2 or 3 lookups, got " + lookups.size() + " : " + lookups);
assertEquals(1, lookups.stream().filter(metadataExtent ->
metadataExtent.equals(mte1)).count(),
lookups::toString);
var mte2Lookups =