This is an automated email from the ASF dual-hosted git repository.
kturner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push:
new 0480adf941 remove server invalidation from tablet cache (#5431)
0480adf941 is described below
commit 0480adf94116e2e98f5d64e9df3a1b8d8ef6e6a1
Author: Keith Turner <[email protected]>
AuthorDate: Thu Mar 27 11:52:44 2025 -0400
remove server invalidation from tablet cache (#5431)
The client side tablet cache supports invalidating all entries in the
cache related a to tablet server. This is usually called when client
code sees an unexpected exception on a server. There are two cases that
could cause this. In one case the tablet server is healthy and
something like a user iterator caused an exception. For this case
invalidating everything in the cache related to the server may cause
uneeded metadata reads. The other case is that the tablet server is
unhealthy and any interaction with it will fail. In this case
invalidating everything in the cache makes sense. However, until that
unhealthy server no longer has tablet locations set in the metadata
table, the optimization to clear all servers from the cache is not going
to help overall.
So clearing servers in the cache is sometimes harmful and probably not
really helpful. Therefore this change removes the ability to clear
everything for a server. The code that used to call this was modified
to clear the extents it was trying to access on the server, instead of
everything for the server.
Another reason to remove server invalidation is the cache supports
removing entries where the tserver no longer has a lock. This removal
is lazy in that it happens when something tries to access the entry.
One nice benefit of this is that clearing extents from the cache is
always fast because the cache is indexed by extent. However clearing a
server required a full scan of the cache, which could be slow on a table
with lots of tablets.
Co-authored-by: Christopher Tubbs <[email protected]>
---
.../core/clientImpl/ClientTabletCache.java | 5 ---
.../core/clientImpl/ClientTabletCacheImpl.java | 43 +---------------------
.../core/clientImpl/ConditionalWriterImpl.java | 9 +----
.../core/clientImpl/RootClientTabletCache.java | 5 ---
.../core/clientImpl/SyncingClientTabletCache.java | 5 ---
.../TabletServerBatchReaderIterator.java | 2 +-
.../core/clientImpl/TabletServerBatchWriter.java | 15 +++-----
.../accumulo/core/clientImpl/ThriftScanner.java | 3 +-
.../metadata/MetadataCachedTabletObtainer.java | 4 +-
.../core/clientImpl/ClientTabletCacheImplTest.java | 20 ++++------
.../core/clientImpl/RootClientTabletCacheTest.java | 1 -
11 files changed, 20 insertions(+), 92 deletions(-)
diff --git
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java
index 05c018fae9..3224eb5e32 100644
---
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java
+++
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCache.java
@@ -180,11 +180,6 @@ public abstract class ClientTabletCache {
*/
public abstract void invalidateCache();
- /**
- * Invalidate all metadata entries that point to server
- */
- public abstract void invalidateCache(ClientContext context, String server);
-
public long getTabletHostingRequestCount() {
return 0L;
}
diff --git
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
index 5962c23e98..b1f0f913d3 100644
---
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
+++
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImpl.java
@@ -101,7 +101,6 @@ public class ClientTabletCacheImpl extends
ClientTabletCache {
protected final Text lastTabletRow;
private final TreeSet<KeyExtent> badExtents = new TreeSet<>();
- private final HashSet<String> badServers = new HashSet<>();
private final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();
private final Lock rLock = rwLock.readLock();
private final Lock wLock = rwLock.writeLock();
@@ -503,23 +502,6 @@ public class ClientTabletCacheImpl extends
ClientTabletCache {
}
}
- @Override
- public void invalidateCache(ClientContext context, String server) {
-
- wLock.lock();
- try {
- badServers.add(server);
- } finally {
- wLock.unlock();
- }
-
- lockChecker.invalidateCache(server);
-
- if (log.isTraceEnabled()) {
- log.trace("queued invalidation for table={} server={}", tableId, server);
- }
- }
-
@Override
public void invalidateCache() {
int invalidatedCount;
@@ -919,7 +901,7 @@ public class ClientTabletCacheImpl extends
ClientTabletCache {
throws AccumuloSecurityException, AccumuloException,
TableNotFoundException,
InvalidTabletHostingRequestException {
- if (badExtents.isEmpty() && badServers.isEmpty()) {
+ if (badExtents.isEmpty()) {
return;
}
@@ -928,7 +910,7 @@ public class ClientTabletCacheImpl extends
ClientTabletCache {
if (!writeLockHeld) {
rLock.unlock();
wLock.lock();
- if (badExtents.isEmpty() && badServers.isEmpty()) {
+ if (badExtents.isEmpty()) {
return;
}
}
@@ -940,27 +922,6 @@ public class ClientTabletCacheImpl extends
ClientTabletCache {
removeOverlapping(metaCache, be);
}
- if (!badServers.isEmpty()) {
- int removedCount = 0;
- var locationIterator = metaCache.values().iterator();
- while (locationIterator.hasNext()) {
- var cacheEntry = locationIterator.next();
- if (cacheEntry.getTserverLocation().isPresent()
- &&
badServers.contains(cacheEntry.getTserverLocation().orElseThrow())) {
- locationIterator.remove();
- lookups.add(cacheEntry.getExtent().toMetaRange());
- removedCount++;
- }
- }
-
- if (log.isTraceEnabled()) {
- log.trace("Invalidated {} cache entries for table {} related to
servers {}", removedCount,
- tableId, badServers);
- }
-
- badServers.clear();
- }
-
lookups = Range.mergeOverlapping(lookups);
Map<String,Map<KeyExtent,List<Range>>> binnedRanges = new HashMap<>();
diff --git
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ConditionalWriterImpl.java
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ConditionalWriterImpl.java
index b804e8ab49..e7b87d3f07 100644
---
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ConditionalWriterImpl.java
+++
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ConditionalWriterImpl.java
@@ -624,7 +624,7 @@ public class ConditionalWriterImpl implements
ConditionalWriter {
} catch (TApplicationException tae) {
queueException(location, cmidToCm, new
AccumuloServerException(location.toString(), tae));
} catch (TException e) {
- locator.invalidateCache(context, location.toString());
+ locator.invalidateCache(mutations.getMutations().keySet());
invalidateSession(location, cmidToCm, sessionId);
} catch (Exception e) {
queueException(location, cmidToCm, e);
@@ -686,10 +686,6 @@ public class ConditionalWriterImpl implements
ConditionalWriter {
while (true) {
if (!ServiceLock.isLockHeld(context.getZooCache(), lid)) {
- // ACCUMULO-1152 added a tserver lock check to the tablet location
cache, so this
- // invalidation prevents future attempts to contact the
- // tserver even its gone zombie and is still running w/o a lock
- locator.invalidateCache(context, location.toString());
log.trace("tablet server {} {} is dead, so no need to invalidate {}",
location,
sessionId.lockId, sessionId.sessionID);
return;
@@ -705,7 +701,6 @@ public class ConditionalWriterImpl implements
ConditionalWriter {
} catch (TApplicationException tae) {
throw new AccumuloServerException(location.toString(), tae);
} catch (TException e) {
- locator.invalidateCache(context, location.toString());
log.trace("Failed to invalidate {} at {} {}", sessionId.sessionID,
location,
e.getMessage());
}
@@ -716,9 +711,7 @@ public class ConditionalWriterImpl implements
ConditionalWriter {
sleepUninterruptibly(sleepTime, MILLISECONDS);
sleepTime = Math.min(2 * sleepTime, MAX_SLEEP);
-
}
-
}
private void invalidateSession(long sessionId, HostAndPort location) throws
TException {
diff --git
a/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java
b/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java
index 16eca25bc8..4eac34a7bb 100644
---
a/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java
+++
b/core/src/main/java/org/apache/accumulo/core/clientImpl/RootClientTabletCache.java
@@ -107,11 +107,6 @@ public class RootClientTabletCache extends
ClientTabletCache {
// no-op see class level javadoc
}
- @Override
- public void invalidateCache(ClientContext context, String server) {
- // no-op see class level javadoc
- }
-
@Override
public void invalidateCache() {
// no-op see class level javadoc
diff --git
a/core/src/main/java/org/apache/accumulo/core/clientImpl/SyncingClientTabletCache.java
b/core/src/main/java/org/apache/accumulo/core/clientImpl/SyncingClientTabletCache.java
index 571d9704df..ffc2c0c548 100644
---
a/core/src/main/java/org/apache/accumulo/core/clientImpl/SyncingClientTabletCache.java
+++
b/core/src/main/java/org/apache/accumulo/core/clientImpl/SyncingClientTabletCache.java
@@ -111,9 +111,4 @@ public class SyncingClientTabletCache extends
ClientTabletCache {
public void invalidateCache() {
syncLocator().invalidateCache();
}
-
- @Override
- public void invalidateCache(ClientContext context, String server) {
- syncLocator().invalidateCache(context, server);
- }
}
diff --git
a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchReaderIterator.java
b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchReaderIterator.java
index 2a1e786408..a7de7f5e76 100644
---
a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchReaderIterator.java
+++
b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchReaderIterator.java
@@ -442,7 +442,7 @@ public class TabletServerBatchReaderIterator implements
Iterator<Entry<Key,Value
failures.putAll(unscanned);
}
- locator.invalidateCache(context, tsLocation);
+ locator.invalidateCache(tabletsRanges.keySet());
}
log.debug("IOException thrown", e);
diff --git
a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchWriter.java
b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchWriter.java
index 505cf3569c..c1c4ef1a5d 100644
---
a/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchWriter.java
+++
b/core/src/main/java/org/apache/accumulo/core/clientImpl/TabletServerBatchWriter.java
@@ -49,6 +49,7 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
@@ -919,14 +920,8 @@ public class TabletServerBatchWriter implements
AutoCloseable {
} catch (IOException e) {
log.debug("failed to send mutations to {}", location, e);
- HashSet<TableId> tables = new HashSet<>();
- for (KeyExtent ke : mutationBatch.keySet()) {
- tables.add(ke.tableId());
- }
-
- for (TableId table : tables) {
- getLocator(table).invalidateCache(context, location);
- }
+
mutationBatch.keySet().stream().collect(Collectors.groupingBy(KeyExtent::tableId))
+ .forEach((k, v) -> getLocator(k).invalidateCache(v));
failedMutations.add(tsm);
} finally {
@@ -990,7 +985,7 @@ public class TabletServerBatchWriter implements
AutoCloseable {
// the write completed successfully so no need to close the session
sessionCloser.clearSession();
- // @formatter:off
+ // @formatter:off
Map<KeyExtent,Long> failures =
updateErrors.failedExtents.entrySet().stream().collect(toMap(
entry -> KeyExtent.fromThrift(entry.getKey()),
Entry::getValue
@@ -998,7 +993,7 @@ public class TabletServerBatchWriter implements
AutoCloseable {
// @formatter:on
updatedConstraintViolations(updateErrors.violationSummaries.stream()
.map(ConstraintViolationSummary::new).collect(toList()));
- // @formatter:off
+ // @formatter:off
updateAuthorizationFailures(updateErrors.authorizationFailures.entrySet().stream().collect(toMap(
entry -> KeyExtent.fromThrift(entry.getKey()),
Entry::getValue
diff --git
a/core/src/main/java/org/apache/accumulo/core/clientImpl/ThriftScanner.java
b/core/src/main/java/org/apache/accumulo/core/clientImpl/ThriftScanner.java
index f67d56077e..4d6c792c0d 100644
--- a/core/src/main/java/org/apache/accumulo/core/clientImpl/ThriftScanner.java
+++ b/core/src/main/java/org/apache/accumulo/core/clientImpl/ThriftScanner.java
@@ -774,8 +774,7 @@ public class ThriftScanner {
e.getCause() != null &&
e.getCause().getClass().equals(InterruptedIOException.class)
&& scanState.closeInitiated;
if (!wasInterruptedAfterClose) {
-
context.getTabletLocationCache(scanState.tableId).invalidateCache(context,
- addr.serverAddress);
+
context.getTabletLocationCache(scanState.tableId).invalidateCache(addr.getExtent());
}
}
error = "Scan failed, thrift error " + e.getClass().getName() + " "
+ e.getMessage()
diff --git
a/core/src/main/java/org/apache/accumulo/core/metadata/MetadataCachedTabletObtainer.java
b/core/src/main/java/org/apache/accumulo/core/metadata/MetadataCachedTabletObtainer.java
index a14c8209f0..00a7739716 100644
---
a/core/src/main/java/org/apache/accumulo/core/metadata/MetadataCachedTabletObtainer.java
+++
b/core/src/main/java/org/apache/accumulo/core/metadata/MetadataCachedTabletObtainer.java
@@ -147,7 +147,7 @@ public class MetadataCachedTabletObtainer implements
CachedTabletObtainer {
if (log.isTraceEnabled()) {
log.trace("{} lookup failed", src.getExtent().tableId(), e);
}
- parent.invalidateCache(context, src.getTserverLocation().orElseThrow());
+ parent.invalidateCache(src.getExtent());
}
return null;
@@ -209,7 +209,7 @@ public class MetadataCachedTabletObtainer implements
CachedTabletObtainer {
}
} catch (IOException e) {
log.trace("lookupTablets failed server={}", tserver, e);
- parent.invalidateCache(context, tserver);
+ parent.invalidateCache(tabletsRanges.keySet());
} catch (AccumuloServerException e) {
log.trace("lookupTablets failed server={}", tserver, e);
throw e;
diff --git
a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
index 26abce5159..601dff11bb 100644
---
a/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
+++
b/core/src/test/java/org/apache/accumulo/core/clientImpl/ClientTabletCacheImplTest.java
@@ -521,7 +521,7 @@ public class ClientTabletCacheImplTest {
tservers.get(src.getTserverLocation().orElseThrow());
if (tablets == null) {
- parent.invalidateCache(context,
src.getTserverLocation().orElseThrow());
+ parent.invalidateCache(src.getExtent());
return null;
}
@@ -552,7 +552,7 @@ public class ClientTabletCacheImplTest {
Map<KeyExtent,SortedMap<Key,Value>> tablets = tservers.get(tserver);
if (tablets == null) {
- parent.invalidateCache(context, tserver);
+ parent.invalidateCache(map.keySet());
return list;
}
@@ -619,10 +619,6 @@ public class ClientTabletCacheImplTest {
return new CachedTablet(RootTable.EXTENT, rootTabletLoc, "1",
TabletAvailability.HOSTED,
false);
}
-
- @Override
- public void invalidateCache(ClientContext context, String server) {}
-
}
static void createEmptyTablet(TServers tservers, String server, KeyExtent
tablet) {
@@ -784,7 +780,7 @@ public class ClientTabletCacheImplTest {
// simulate a server failure
setLocation(tservers, "tserver2", METADATA_TABLE_EXTENT, tab1e21,
"tserver9");
- tab1TabletCache.invalidateCache(context, "tserver8");
+ tab1TabletCache.invalidateCache(tab1e21);
locateTabletTest(tab1TabletCache, "r1", tab1e22, "tserver6");
locateTabletTest(tab1TabletCache, "h", tab1e21, "tserver9");
locateTabletTest(tab1TabletCache, "a", tab1e1, "tserver4");
@@ -792,9 +788,9 @@ public class ClientTabletCacheImplTest {
// simulate all servers failing
deleteServer(tservers, "tserver1");
deleteServer(tservers, "tserver2");
- tab1TabletCache.invalidateCache(context, "tserver4");
- tab1TabletCache.invalidateCache(context, "tserver6");
- tab1TabletCache.invalidateCache(context, "tserver9");
+ tab1TabletCache.invalidateCache(tab1e22);
+ tab1TabletCache.invalidateCache(tab1e21);
+ tab1TabletCache.invalidateCache(tab1e1);
locateTabletTest(tab1TabletCache, "r1", null, null);
locateTabletTest(tab1TabletCache, "h", null, null);
@@ -849,7 +845,7 @@ public class ClientTabletCacheImplTest {
// simulate metadata and regular server down and the reassigned
deleteServer(tservers, "tserver5");
- tab1TabletCache.invalidateCache(context, "tserver7");
+ tab1TabletCache.invalidateCache(tab1e1);
locateTabletTest(tab1TabletCache, "a", null, null);
locateTabletTest(tab1TabletCache, "h", tab1e21, "tserver8");
locateTabletTest(tab1TabletCache, "r", tab1e22, "tserver9");
@@ -861,7 +857,7 @@ public class ClientTabletCacheImplTest {
locateTabletTest(tab1TabletCache, "a", tab1e1, "tserver7");
locateTabletTest(tab1TabletCache, "h", tab1e21, "tserver8");
locateTabletTest(tab1TabletCache, "r", tab1e22, "tserver9");
- tab1TabletCache.invalidateCache(context, "tserver7");
+ tab1TabletCache.invalidateCache(tab1e1);
setLocation(tservers, "tserver10", mte1, tab1e1, "tserver2");
locateTabletTest(tab1TabletCache, "a", tab1e1, "tserver2");
locateTabletTest(tab1TabletCache, "h", tab1e21, "tserver8");
diff --git
a/core/src/test/java/org/apache/accumulo/core/clientImpl/RootClientTabletCacheTest.java
b/core/src/test/java/org/apache/accumulo/core/clientImpl/RootClientTabletCacheTest.java
index 37412f0e21..eed53e5212 100644
---
a/core/src/test/java/org/apache/accumulo/core/clientImpl/RootClientTabletCacheTest.java
+++
b/core/src/test/java/org/apache/accumulo/core/clientImpl/RootClientTabletCacheTest.java
@@ -51,7 +51,6 @@ public class RootClientTabletCacheTest {
public void testInvalidateCache_Noop() {
var rtl = new RootClientTabletCache(lockChecker);
// it's not expected that any of the validate functions will do anything
with the mock objects
- rtl.invalidateCache(context, "server");
rtl.invalidateCache(RootTable.EXTENT);
rtl.invalidateCache();
rtl.invalidateCache(List.of(RootTable.EXTENT));