keith-turner commented on code in PR #4709:
URL: https://github.com/apache/accumulo/pull/4709#discussion_r1664662025
##########
server/manager/src/main/java/org/apache/accumulo/manager/Manager.java:
##########
@@ -959,27 +957,54 @@ private void
checkForHeldServer(SortedMap<TServerInstance,TabletServerStatus> ts
}
private long balanceTablets() {
- BalanceParamsImpl params =
BalanceParamsImpl.fromThrift(tserverStatusForBalancer,
- tserverStatus, migrationsSnapshot());
- long wait = tabletBalancer.balance(params);
-
- for (TabletMigration m :
checkMigrationSanity(tserverStatusForBalancer.keySet(),
- params.migrationsOut())) {
- KeyExtent ke = KeyExtent.fromTabletId(m.getTablet());
- if (migrations.containsKey(ke)) {
- log.warn("balancer requested migration more than once, skipping {}",
m);
+
+ Map<DataLevel,Set<KeyExtent>> partitionedMigrations =
+ new HashMap<>(DataLevel.values().length);
+ migrationsSnapshot().forEach(ke -> {
+ partitionedMigrations.computeIfAbsent(DataLevel.of(ke.tableId()), f ->
new HashSet<>())
+ .add(ke);
+ });
+
+ final int tabletsNotHosted = notHosted();
+ BalanceParamsImpl params = null;
+ long wait = 0;
+ long totalMigrationsOut = 0;
+ for (DataLevel dl : DataLevel.values()) {
+ Set<KeyExtent> migrationsForLevel = partitionedMigrations.get(dl);
+ if (migrationsForLevel == null) {
+ continue;
+ }
+ if (dl == DataLevel.USER && tabletsNotHosted > 0) {
+ log.debug("not balancing user tablets because there are {} unhosted
tablets",
+ tabletsNotHosted);
continue;
}
- TServerInstance tserverInstance =
TabletServerIdImpl.toThrift(m.getNewTabletServer());
- migrations.put(ke, tserverInstance);
- log.debug("migration {}", m);
+ params = BalanceParamsImpl.fromThrift(tserverStatusForBalancer,
tserverStatus,
+ migrationsSnapshot());
+ totalMigrationsOut += params.migrationsOut().size();
+ do {
+ log.debug("Balancing for tables at level: {}", dl);
+ wait = Math.max(tabletBalancer.balance(params), wait);
+ for (TabletMigration m :
checkMigrationSanity(tserverStatusForBalancer.keySet(),
+ params.migrationsOut())) {
+ KeyExtent ke = KeyExtent.fromTabletId(m.getTablet());
+ if (migrations.containsKey(ke)) {
+ log.warn("balancer requested migration more than once, skipping
{}", m);
+ continue;
+ }
+ TServerInstance tserverInstance =
TabletServerIdImpl.toThrift(m.getNewTabletServer());
+ migrations.put(ke, tserverInstance);
+ log.debug("migration {}", m);
+ }
+ } while (totalMigrationsOut > 0 && (dl == DataLevel.ROOT || dl ==
DataLevel.METADATA));
Review Comment:
There still seems to be a problem here although a bit different with the
changes in 1010954. If the following happens it seems we will be stuck in an
infinite loop.
* data level is root
* on iteration of loop `totalMigrationsOut` is set to 5
* on all subsequent iterations of the loop `totalMigrationsOut` continue
to be 5, its no longer increasing.
This happens because the loop only adds to `totalMigrationsOut`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]