DomGarguilo commented on code in PR #4733:
URL: https://github.com/apache/accumulo/pull/4733#discussion_r1679748604


##########
server/manager/src/main/java/org/apache/accumulo/manager/Manager.java:
##########
@@ -960,39 +957,92 @@ private void 
checkForHeldServer(SortedMap<TServerInstance,TabletServerStatus> ts
       }
     }
 
-    private long balanceTablets() {
-
-      Map<DataLevel,Set<KeyExtent>> partitionedMigrations =
+    /**
+     * balanceTablets() balances tables by DataLevel. Return the current set 
of migrations
+     * partitioned by DataLevel
+     */
+    private Map<DataLevel,Set<KeyExtent>> partitionMigrations(final 
Set<KeyExtent> migrations) {
+      final Map<DataLevel,Set<KeyExtent>> partitionedMigrations =
           new HashMap<>(DataLevel.values().length);
-      migrationsSnapshot().forEach(ke -> {
-        partitionedMigrations.computeIfAbsent(DataLevel.of(ke.tableId()), f -> 
new HashSet<>())
-            .add(ke);
+      // populate to prevent NPE
+      for (DataLevel dl : DataLevel.values()) {
+        partitionedMigrations.put(dl, new HashSet<>());
+      }
+      migrations.forEach(ke -> {
+        partitionedMigrations.get(DataLevel.of(ke.tableId())).add(ke);
       });
+      return partitionedMigrations;
+    }
+
+    /**
+     * Given the current tserverStatus map and a DataLevel, return a view of 
the tserverStatus map
+     * that only contains entries for tables in the DataLevel
+     */
+    private SortedMap<TServerInstance,TabletServerStatus> 
createTServerStatusView(
+        final DataLevel dl, final 
SortedMap<TServerInstance,TabletServerStatus> status) {
+      final SortedMap<TServerInstance,TabletServerStatus> 
tserverStatusForLevel = new TreeMap<>();
+      status.forEach((tsi, tss) -> {
+        final TabletServerStatus copy = tss.deepCopy();
+        final Map<String,TableInfo> oldTableMap = copy.getTableMap();
+        final Map<String,TableInfo> newTableMap =
+            new HashMap<>(dl == DataLevel.USER ? oldTableMap.size() : 1);
+        if (dl == DataLevel.ROOT) {
+          if (oldTableMap.containsKey(RootTable.NAME)) {
+            newTableMap.put(RootTable.NAME, oldTableMap.get(RootTable.NAME));
+          }
+        } else if (dl == DataLevel.METADATA) {
+          if (oldTableMap.containsKey(MetadataTable.NAME)) {
+            newTableMap.put(MetadataTable.NAME, 
oldTableMap.get(MetadataTable.NAME));
+          }
+        } else if (dl == DataLevel.USER) {
+          oldTableMap.forEach((table, info) -> {
+            if (!table.equals(RootTable.NAME) && 
!table.equals(MetadataTable.NAME)) {
+              newTableMap.put(table, info);
+            }
+          });
+        } else {
+          throw new IllegalArgumentException("Unhandled DataLevel value: " + 
dl);
+        }
+        copy.setTableMap(newTableMap);
+        tserverStatusForLevel.put(tsi, copy);
+      });
+      return tserverStatusForLevel;
+    }
+
+    private long balanceTablets() {
 
       final int tabletsNotHosted = notHosted();
       BalanceParamsImpl params = null;
       long wait = 0;
       long totalMigrationsOut = 0;
+      final Map<DataLevel,Set<KeyExtent>> partitionedMigrations =
+          partitionMigrations(migrationsSnapshot());
+
       for (DataLevel dl : DataLevel.values()) {
-        final Set<KeyExtent> migrationsForLevel = 
partitionedMigrations.get(dl);
-        if (migrationsForLevel == null) {
-          continue;
-        }
         if (dl == DataLevel.USER && tabletsNotHosted > 0) {
           log.debug("not balancing user tablets because there are {} unhosted 
tablets",
               tabletsNotHosted);
           continue;
         }
+        // Create a view of the tserver status such that it only contains the 
tables
+        // for this level in the tableMap.
+        final SortedMap<TServerInstance,TabletServerStatus> 
tserverStatusForLevel =
+            createTServerStatusView(dl, tserverStatus);

Review Comment:
   Instead of calling this for each data level I wonder if it would be better 
to create all of the views in one pass over `tserverStatus`. Maybe creating a 
map of maps where the key is the datalevel or just 3 separate maps.



##########
server/manager/src/main/java/org/apache/accumulo/manager/Manager.java:
##########
@@ -960,39 +957,92 @@ private void 
checkForHeldServer(SortedMap<TServerInstance,TabletServerStatus> ts
       }
     }
 
-    private long balanceTablets() {
-
-      Map<DataLevel,Set<KeyExtent>> partitionedMigrations =
+    /**
+     * balanceTablets() balances tables by DataLevel. Return the current set 
of migrations
+     * partitioned by DataLevel
+     */
+    private Map<DataLevel,Set<KeyExtent>> partitionMigrations(final 
Set<KeyExtent> migrations) {
+      final Map<DataLevel,Set<KeyExtent>> partitionedMigrations =
           new HashMap<>(DataLevel.values().length);
-      migrationsSnapshot().forEach(ke -> {
-        partitionedMigrations.computeIfAbsent(DataLevel.of(ke.tableId()), f -> 
new HashSet<>())
-            .add(ke);
+      // populate to prevent NPE
+      for (DataLevel dl : DataLevel.values()) {
+        partitionedMigrations.put(dl, new HashSet<>());
+      }
+      migrations.forEach(ke -> {
+        partitionedMigrations.get(DataLevel.of(ke.tableId())).add(ke);
       });
+      return partitionedMigrations;
+    }

Review Comment:
   Could do something like this instead:
   ```java
         Map<DataLevel,Set<KeyExtent>> partitionedMigrations = new 
EnumMap<>(DataLevel.class);
         migrations.forEach(ke -> partitionedMigrations
             .computeIfAbsent(DataLevel.of(ke.tableId()), k -> new 
HashSet<>()).add(ke));
         return partitionedMigrations;
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to