dlmarion commented on code in PR #5530:
URL: https://github.com/apache/accumulo/pull/5530#discussion_r2081561194


##########
core/src/main/java/org/apache/accumulo/core/spi/balancer/TableLoadBalancer.java:
##########
@@ -74,35 +85,16 @@ protected TabletBalancer getBalancerForTable(TableId 
tableId) {
     if (clazzName == null) {
       clazzName = SimpleLoadBalancer.class.getName();
     }
-    if (balancer != null) {
-      if (!clazzName.equals(balancer.getClass().getName())) {
-        // the balancer class for this table does not match the class 
specified in the configuration
-        try {
-          balancer = constructNewBalancerForTable(clazzName, tableId);
-          perTableBalancers.put(tableId, balancer);
-          balancer.init(environment);
-
-          log.info("Loaded new class {} for table {}", clazzName, tableId);
-        } catch (Exception e) {
-          log.warn("Failed to load table balancer class {} for table {}", 
clazzName, tableId, e);
-        }
-      }
-    }
-    if (balancer == null) {
-      try {
-        balancer = constructNewBalancerForTable(clazzName, tableId);
-        log.info("Loaded class {} for table {}", clazzName, tableId);
-      } catch (Exception e) {
-        log.warn("Failed to load table balancer class {} for table {}", 
clazzName, tableId, e);
-      }
 
+    if (balancer == null || !clazzName.equals(balancer.getClass().getName())) {
+      balancer = constructAndInitializeBalancer(clazzName, tableId);
       if (balancer == null) {
-        log.info("Creating balancer {} limited to balancing table {}",
-            SimpleLoadBalancer.class.getName(), tableId);
-        balancer = new SimpleLoadBalancer(tableId);
+        balancer = 
constructAndInitializeBalancer(DoNothingBalancer.class.getName(), tableId);
+        log.warn("Fell back to balancer {} for table {}", 
DoNothingBalancer.class.getName(),

Review Comment:
   The other logging in this change is set to `warn`. I'm thinking that this 
log statement should be at `error`.



##########
test/src/main/java/org/apache/accumulo/test/BrokenBalancerIT.java:
##########
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.concurrent.Executors;
+
+import org.apache.accumulo.core.client.Accumulo;
+import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.data.TableId;
+import org.apache.accumulo.core.spi.balancer.BalancerEnvironment;
+import org.apache.accumulo.core.spi.balancer.SimpleLoadBalancer;
+import org.apache.accumulo.core.spi.balancer.TableLoadBalancer;
+import org.apache.accumulo.core.util.UtilWaitThread;
+import org.apache.accumulo.minicluster.ServerType;
+import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
+import org.apache.accumulo.test.functional.ConfigurableMacBase;
+import org.apache.accumulo.test.util.Wait;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class BrokenBalancerIT extends ConfigurableMacBase {
+
+  private static final Logger log = 
LoggerFactory.getLogger(BrokenBalancerIT.class);
+
+  public static class BrokenBalancer extends SimpleLoadBalancer {
+    public BrokenBalancer() {
+      super();
+    }
+
+    public BrokenBalancer(TableId tableId) {
+      super(tableId);
+    }
+
+    @Override
+    public void init(BalancerEnvironment balancerEnvironment) {
+      throw new IllegalStateException();
+    }
+  }
+
+  @Override
+  public void configure(MiniAccumuloConfigImpl cfg, Configuration 
hadoopCoreSite) {
+    Map<String,String> siteConfig = cfg.getSiteConfig();
+    siteConfig.put(Property.TSERV_MAXMEM.getKey(), "10K");
+    siteConfig.put(Property.TSERV_MAJC_DELAY.getKey(), "50ms");
+    siteConfig.put(Property.MANAGER_TABLET_GROUP_WATCHER_INTERVAL.getKey(), 
"3s");
+    cfg.setSiteConfig(siteConfig);
+    // ensure we have two tservers
+    if (cfg.getNumTservers() != 2) {
+      cfg.setNumTservers(2);
+    }
+  }
+
+  @Test
+  public void testBalancerException() throws Exception {
+    String tableName = getUniqueNames(1)[0];
+    testBadBalancer(BrokenBalancer.class.getName(), tableName);
+  }
+
+  @Test
+  public void testBalancerNotFound() throws Exception {
+    String tableName = getUniqueNames(1)[0];
+    testBadBalancer("org.apache.accumulo.abc.NonExistentBalancer", tableName);
+  }
+
+  private void testBadBalancer(String balancerClass, String tableName) throws 
Exception {
+    var executor = Executors.newCachedThreadPool();
+    try (AccumuloClient c = 
Accumulo.newClient().from(getClientProperties()).build()) {
+      SortedSet<Text> splits = new TreeSet<>();
+      for (int i = 0; i < 10; i++) {
+        splits.add(new Text("" + i));
+      }
+      var props = Map.of(Property.TABLE_LOAD_BALANCER.getKey(), balancerClass);
+      NewTableConfiguration ntc =
+          new NewTableConfiguration().withSplits(splits).setProperties(props);
+      c.tableOperations().create(tableName, ntc);
+
+      var scanFuture = executor.submit(() -> {
+        try (var scanner = c.createScanner(tableName)) {
+          scanner.forEach((k, v) -> System.out.println(k + " " + v));
+        }
+        return 0;
+      });
+
+      UtilWaitThread.sleep(5000);
+      // scan should not be able to complete because the tablet should not be 
assigned
+      Assertions.assertFalse(scanFuture.isDone());
+
+      // fix the balancer config
+      log.info("fixing per tablet balancer");
+      c.tableOperations().setProperty(tableName, 
Property.TABLE_LOAD_BALANCER.getKey(),
+          SimpleLoadBalancer.class.getName());
+
+      scanFuture.get();
+
+      // break the balancer at the system level
+      log.info("breaking manager balancer");
+      
c.instanceOperations().setProperty(Property.MANAGER_TABLET_BALANCER.getKey(), 
balancerClass);
+
+      // add some tablet servers
+      assertEquals(2, getCluster().getConfig().getNumTservers());
+      getCluster().getConfig().setNumTservers(5);
+      getCluster().getClusterControl().start(ServerType.TABLET_SERVER);
+      getCluster().getClusterControl().start(ServerType.TABLET_SERVER);

Review Comment:
   Is there a reason for the duplicate line? Looking at the code, calling 
`start(ServerType)` once might end up starting all of them as it passes 
Integer.MAX_VALUE for the number of processes to start (seems like a bug).



##########
test/src/main/java/org/apache/accumulo/test/BrokenBalancerIT.java:
##########
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import java.util.concurrent.Executors;
+
+import org.apache.accumulo.core.client.Accumulo;
+import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.data.TableId;
+import org.apache.accumulo.core.spi.balancer.BalancerEnvironment;
+import org.apache.accumulo.core.spi.balancer.SimpleLoadBalancer;
+import org.apache.accumulo.core.spi.balancer.TableLoadBalancer;
+import org.apache.accumulo.core.util.UtilWaitThread;
+import org.apache.accumulo.minicluster.ServerType;
+import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
+import org.apache.accumulo.test.functional.ConfigurableMacBase;
+import org.apache.accumulo.test.util.Wait;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class BrokenBalancerIT extends ConfigurableMacBase {
+
+  private static final Logger log = 
LoggerFactory.getLogger(BrokenBalancerIT.class);
+
+  public static class BrokenBalancer extends SimpleLoadBalancer {
+    public BrokenBalancer() {
+      super();
+    }
+
+    public BrokenBalancer(TableId tableId) {
+      super(tableId);
+    }
+
+    @Override
+    public void init(BalancerEnvironment balancerEnvironment) {
+      throw new IllegalStateException();
+    }
+  }
+
+  @Override
+  public void configure(MiniAccumuloConfigImpl cfg, Configuration 
hadoopCoreSite) {
+    Map<String,String> siteConfig = cfg.getSiteConfig();
+    siteConfig.put(Property.TSERV_MAXMEM.getKey(), "10K");
+    siteConfig.put(Property.TSERV_MAJC_DELAY.getKey(), "50ms");
+    siteConfig.put(Property.MANAGER_TABLET_GROUP_WATCHER_INTERVAL.getKey(), 
"3s");
+    cfg.setSiteConfig(siteConfig);
+    // ensure we have two tservers
+    if (cfg.getNumTservers() != 2) {
+      cfg.setNumTservers(2);
+    }
+  }
+
+  @Test
+  public void testBalancerException() throws Exception {
+    String tableName = getUniqueNames(1)[0];
+    testBadBalancer(BrokenBalancer.class.getName(), tableName);
+  }
+
+  @Test
+  public void testBalancerNotFound() throws Exception {
+    String tableName = getUniqueNames(1)[0];
+    testBadBalancer("org.apache.accumulo.abc.NonExistentBalancer", tableName);
+  }
+
+  private void testBadBalancer(String balancerClass, String tableName) throws 
Exception {
+    var executor = Executors.newCachedThreadPool();
+    try (AccumuloClient c = 
Accumulo.newClient().from(getClientProperties()).build()) {
+      SortedSet<Text> splits = new TreeSet<>();
+      for (int i = 0; i < 10; i++) {
+        splits.add(new Text("" + i));
+      }
+      var props = Map.of(Property.TABLE_LOAD_BALANCER.getKey(), balancerClass);
+      NewTableConfiguration ntc =
+          new NewTableConfiguration().withSplits(splits).setProperties(props);
+      c.tableOperations().create(tableName, ntc);
+
+      var scanFuture = executor.submit(() -> {
+        try (var scanner = c.createScanner(tableName)) {
+          scanner.forEach((k, v) -> System.out.println(k + " " + v));
+        }
+        return 0;
+      });
+
+      UtilWaitThread.sleep(5000);
+      // scan should not be able to complete because the tablet should not be 
assigned
+      Assertions.assertFalse(scanFuture.isDone());

Review Comment:
   Curious if you could replace with:
   ```
         assertEquals(0, BalanceIT.countLocations(c, tableName).size());
   ```



##########
server/manager/src/main/java/org/apache/accumulo/manager/Manager.java:
##########
@@ -1918,10 +1918,20 @@ public boolean isUpgrading() {
   }
 
   void initializeBalancer() {
-    var localTabletBalancer = 
Property.createInstanceFromPropertyName(getConfiguration(),
-        Property.MANAGER_TABLET_BALANCER, TabletBalancer.class, new 
TableLoadBalancer());
-    localTabletBalancer.init(balancerEnvironment);
-    tabletBalancer = localTabletBalancer;
+    try {
+      getContext().getPropStore().getCache().removeAll();

Review Comment:
   Are there methods to make the invalidation more narrowly focused vs nuking 
everything in the cache?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscr...@accumulo.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to