This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push: new 53d0297655 fixes race condition in ClientTabletCacheImpl (#5756) 53d0297655 is described below commit 53d0297655b0d93f43344dd6ee698bc62b4616e9 Author: Keith Turner <ktur...@apache.org> AuthorDate: Thu Jul 24 17:36:51 2025 -0400 fixes race condition in ClientTabletCacheImpl (#5756) Fixes the following race condition. 1. Thread 1 reads the cache and finds it stale/missing 2. Thread 2 reads the cache and finds it stale/missing 3. Thread 2 goes through the entire process of reading metadata and updating the cache 4. Thread 1 does the pre lock read of the cache (this reread of the cache causes the race condition). 5. Thread 1 does the post lock read of the cache and finds its the same and unessecairly reads the metadata table This change avoids reading from the cache twice before locking and updating and only reads once. This way the cache entry used to make a decision after the lock is obtianed is the same one that was seen as stale. * format code --- .../core/clientImpl/ClientTabletCacheImpl.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java index 9d7345998d..8e4e89b435 100644 --- a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java +++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java @@ -682,9 +682,9 @@ public class ClientTabletCacheImpl extends ClientTabletCache { } } - private void lookupTablet(ClientContext context, Text row, LockCheckerSession lcSession) - throws AccumuloException, AccumuloSecurityException, TableNotFoundException, - InvalidTabletHostingRequestException { + private void lookupTablet(ClientContext context, Text row, LockCheckerSession lcSession, + CachedTablet before) throws AccumuloException, AccumuloSecurityException, + TableNotFoundException, InvalidTabletHostingRequestException { Text metadataRow = new Text(tableId.canonical()); metadataRow.append(new byte[] {';'}, 0, 1); metadataRow.append(row.getBytes(), 0, row.getLength()); @@ -693,9 +693,10 @@ public class ClientTabletCacheImpl extends ClientTabletCache { if (ptl == null) { return; } - // detect if another thread populated cache while waiting for lock - CachedTablet before = findTabletInCache(row); + try (var unused = lookupLocks.lock(ptl.getExtent())) { + // Now that the lock is acquired, detect if another thread populated cache since the last time + // the cache was read. If so then do not need to read from metadata store. CachedTablet after = findTabletInCache(row); if (after != null && after != before && lcSession.checkLock(after) != null) { return; @@ -855,7 +856,7 @@ public class ClientTabletCacheImpl extends ClientTabletCache { // not in cache OR the cutoff timer was started after when the cached entry timer was started, // so obtain info from metadata table - tl = lookupTabletLocationAndCheckLock(context, row, lcSession); + tl = lookupTabletLocationAndCheckLock(context, row, lcSession, tl); } @@ -863,9 +864,9 @@ public class ClientTabletCacheImpl extends ClientTabletCache { } private CachedTablet lookupTabletLocationAndCheckLock(ClientContext context, Text row, - LockCheckerSession lcSession) throws AccumuloException, AccumuloSecurityException, - TableNotFoundException, InvalidTabletHostingRequestException { - lookupTablet(context, row, lcSession); + LockCheckerSession lcSession, CachedTablet before) throws AccumuloException, + AccumuloSecurityException, TableNotFoundException, InvalidTabletHostingRequestException { + lookupTablet(context, row, lcSession, before); return lcSession.checkLock(findTabletInCache(row)); }