This is an automated email from the ASF dual-hosted git repository.
kturner pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push:
new 61ca7d8070 Handles RPC errors when requesting tablet unload (#4497)
61ca7d8070 is described below
commit 61ca7d8070f2eedabf987f5fb471b475babe7da5
Author: Keith Turner <[email protected]>
AuthorDate: Wed May 1 18:55:51 2024 -0400
Handles RPC errors when requesting tablet unload (#4497)
The tablet server group watcher loop will not make progress when it
encounters
an RPC error on a single tablet server. It should continue communicating
with
the tablet servers it can inorder to make progress in its loop that assigns
and unassigns tablets.
---
.../accumulo/manager/TabletGroupWatcher.java | 35 ++++++++++++++--------
1 file changed, 23 insertions(+), 12 deletions(-)
diff --git
a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
index 81744441aa..216526d328 100644
---
a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
+++
b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
@@ -344,12 +344,17 @@ abstract class TabletGroupWatcher extends
AccumuloDaemonThread {
TServerConnection client =
manager.tserverSet.getConnection(location.getServerInstance());
if (client != null) {
- Manager.log.trace("[{}] Requesting TabletServer {} unload {}
{}", store.name(),
- location.getServerInstance(), tls.extent,
goal.howUnload());
- client.unloadTablet(manager.managerLock, tls.extent,
goal.howUnload(),
- manager.getSteadyTime());
- unloaded++;
- totalUnloaded++;
+ try {
+ Manager.log.trace("[{}] Requesting TabletServer {} unload
{} {}", store.name(),
+ location.getServerInstance(), tls.extent,
goal.howUnload());
+ client.unloadTablet(manager.managerLock, tls.extent,
goal.howUnload(),
+ manager.getSteadyTime());
+ unloaded++;
+ totalUnloaded++;
+ } catch (TException tException) {
+ Manager.log.warn("[{}] Failed to request tablet unload {}
{} {}", store.name(),
+ location.getServerInstance(), tls.extent,
goal.howUnload(), tException);
+ }
} else {
Manager.log.warn("Could not connect to server {}", location);
}
@@ -1036,13 +1041,19 @@ abstract class TabletGroupWatcher extends
AccumuloDaemonThread {
}
tLists.assignments.addAll(tLists.assigned);
for (Assignment a : tLists.assignments) {
- TServerConnection client = manager.tserverSet.getConnection(a.server);
- if (client != null) {
- client.assignTablet(manager.managerLock, a.tablet);
- } else {
- Manager.log.warn("Could not connect to server {}", a.server);
+ try {
+ TServerConnection client = manager.tserverSet.getConnection(a.server);
+ if (client != null) {
+ client.assignTablet(manager.managerLock, a.tablet);
+ manager.assignedTablet(a.tablet);
+ } else {
+ Manager.log.warn("Could not connect to server {} for assignment of
{}", a.server,
+ a.tablet);
+ }
+ } catch (TException tException) {
+ Manager.log.warn("Could not connect to server {} for assignment of
{}", a.server, a.tablet,
+ tException);
}
- manager.assignedTablet(a.tablet);
}
}