This is an automated email from the ASF dual-hosted git repository.
ddanielr pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push:
new 99af30f283 Add Balancer Metric for Migrations Needed (#4699)
99af30f283 is described below
commit 99af30f283132fbba451a3808f579ce6961082c3
Author: Daniel Roberts <[email protected]>
AuthorDate: Sun Jul 28 15:31:51 2024 -0400
Add Balancer Metric for Migrations Needed (#4699)
Refactored the metric names for readability and added javadoc entries
for each of the new balancer metrics
Produces a single metric for balancing that emits the current number of
migrations needed for the system to be balanced.
Switched metric to use a LongSupplier and handled the inital null condition.
---------
Co-authored-by: Ed Coleman <[email protected]>
---
.../accumulo/core/metrics/MetricsProducer.java | 10 +++++
.../java/org/apache/accumulo/manager/Manager.java | 8 ++++
.../accumulo/manager/metrics/BalancerMetrics.java | 51 ++++++++++++++++++++++
.../java/org/apache/accumulo/test/BalanceIT.java | 18 ++++++++
.../BalanceInPresenceOfOfflineTableIT.java | 2 +
.../apache/accumulo/test/metrics/MetricsIT.java | 5 ++-
6 files changed, 92 insertions(+), 2 deletions(-)
diff --git
a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
index 8cf2ffc956..84bf20b16c 100644
--- a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
+++ b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
@@ -596,6 +596,14 @@ import io.micrometer.core.instrument.MeterRegistry;
* <td>Distribution Summary</td>
* <td></td>
* </tr>
+ * <!-- Balancing -->
+ * <tr>
+ * <td>N/A</td>
+ * <td>N/A</td>
+ * <td>{@value METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED}</td>
+ * <td>Gauge</td>
+ * <td>The number of migrations that need to complete before the system is
balanced</td>
+ * </tr>
* </table>
*
* @since 2.1.0
@@ -708,6 +716,8 @@ public interface MetricsProducer {
String METRICS_BLOCKCACHE_SUMMARY_REQUESTCOUNT =
METRICS_BLOCKCACHE_PREFIX + "summary.requestcount";
+ String METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED =
"accumulo.manager.balancer.migrations.needed";
+
/**
* Build Micrometer Meter objects and register them with the registry
*/
diff --git
a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
index 16548ea6d0..a72485af01 100644
--- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
+++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
@@ -109,6 +109,7 @@ import org.apache.accumulo.core.util.Halt;
import org.apache.accumulo.core.util.Retry;
import org.apache.accumulo.core.util.threads.ThreadPools;
import org.apache.accumulo.core.util.threads.Threads;
+import org.apache.accumulo.manager.metrics.BalancerMetrics;
import org.apache.accumulo.manager.metrics.ManagerMetrics;
import org.apache.accumulo.manager.recovery.RecoveryManager;
import org.apache.accumulo.manager.state.TableCounts;
@@ -205,6 +206,7 @@ public class Manager extends AbstractServer
private TServer clientService = null;
private volatile TabletBalancer tabletBalancer;
private final BalancerEnvironment balancerEnvironment;
+ private final BalancerMetrics balancerMetrics = new BalancerMetrics();
private ManagerState state = ManagerState.INITIAL;
@@ -561,6 +563,10 @@ public class Manager extends AbstractServer
}
}
+ public MetricsProducer getBalancerMetrics() {
+ return balancerMetrics;
+ }
+
enum TabletGoalState {
HOSTED(TUnloadTabletGoal.UNKNOWN),
UNASSIGNED(TUnloadTabletGoal.UNASSIGNED),
@@ -1068,6 +1074,7 @@ public class Manager extends AbstractServer
} while (migrationsOutForLevel > 0 && (dl == DataLevel.ROOT || dl ==
DataLevel.METADATA));
totalMigrationsOut += migrationsOutForLevel;
}
+ balancerMetrics.assignMigratingCount(migrations::size);
if (totalMigrationsOut == 0) {
synchronized (balancedNotifier) {
@@ -1244,6 +1251,7 @@ public class Manager extends AbstractServer
metricsInfo.addServiceTags(getApplicationName(), sa.getAddress());
var producers = ManagerMetrics.getProducers(getConfiguration(), this);
+ producers.add(balancerMetrics);
metricsInfo.addMetricsProducers(producers.toArray(new MetricsProducer[0]));
metricsInfo.init();
diff --git
a/server/manager/src/main/java/org/apache/accumulo/manager/metrics/BalancerMetrics.java
b/server/manager/src/main/java/org/apache/accumulo/manager/metrics/BalancerMetrics.java
new file mode 100644
index 0000000000..01aa9e3052
--- /dev/null
+++
b/server/manager/src/main/java/org/apache/accumulo/manager/metrics/BalancerMetrics.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.manager.metrics;
+
+import java.util.function.LongSupplier;
+
+import org.apache.accumulo.core.metrics.MetricsProducer;
+
+import io.micrometer.core.instrument.Gauge;
+import io.micrometer.core.instrument.MeterRegistry;
+
+public class BalancerMetrics implements MetricsProducer {
+
+ LongSupplier migratingCount;
+
+ public void assignMigratingCount(LongSupplier f) {
+ migratingCount = f;
+ }
+
+ public long getMigratingCount() {
+ // Handle inital NaN value state when balance has never been called
+ if (migratingCount == null) {
+ return 0;
+ }
+ return migratingCount.getAsLong();
+ }
+
+ @Override
+ public void registerMetrics(MeterRegistry registry) {
+ Gauge
+ .builder(METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED, this,
+ BalancerMetrics::getMigratingCount)
+ .description("Overall total migrations that need to
complete").register(registry);
+ }
+}
diff --git a/test/src/main/java/org/apache/accumulo/test/BalanceIT.java
b/test/src/main/java/org/apache/accumulo/test/BalanceIT.java
index 27ae709699..0164463903 100644
--- a/test/src/main/java/org/apache/accumulo/test/BalanceIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/BalanceIT.java
@@ -19,12 +19,16 @@
package org.apache.accumulo.test;
import java.time.Duration;
+import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.accumulo.core.client.Accumulo;
import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.harness.AccumuloClusterHarness;
+import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
@@ -33,6 +37,20 @@ import org.slf4j.LoggerFactory;
public class BalanceIT extends AccumuloClusterHarness {
private static final Logger log = LoggerFactory.getLogger(BalanceIT.class);
+ @Override
+ public void configureMiniCluster(MiniAccumuloConfigImpl cfg, Configuration
hadoopCoreSite) {
+ Map<String,String> siteConfig = cfg.getSiteConfig();
+ siteConfig.put(Property.TSERV_MAXMEM.getKey(), "10K");
+ siteConfig.put(Property.TSERV_MAJC_DELAY.getKey(), "50ms");
+ siteConfig.put(Property.GENERAL_MICROMETER_ENABLED.getKey(), "true");
+ siteConfig.put("general.custom.metrics.opts.logging.step", "0.5s");
+ cfg.setSiteConfig(siteConfig);
+ // ensure we have two tservers
+ if (cfg.getNumTservers() < 2) {
+ cfg.setNumTservers(2);
+ }
+ }
+
@Override
protected Duration defaultTimeout() {
return Duration.ofMinutes(1);
diff --git
a/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java
b/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java
index 72889e0a2d..fe32572022 100644
---
a/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java
+++
b/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java
@@ -69,6 +69,8 @@ public class BalanceInPresenceOfOfflineTableIT extends
AccumuloClusterHarness {
Map<String,String> siteConfig = cfg.getSiteConfig();
siteConfig.put(Property.TSERV_MAXMEM.getKey(), "10K");
siteConfig.put(Property.TSERV_MAJC_DELAY.getKey(), "50ms");
+ siteConfig.put(Property.GENERAL_MICROMETER_ENABLED.getKey(), "true");
+ siteConfig.put("general.custom.metrics.opts.logging.step", "0.5s");
cfg.setSiteConfig(siteConfig);
// ensure we have two tservers
if (cfg.getNumTservers() < 2) {
diff --git a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
index fc0ecdb881..77c76f41ad 100644
--- a/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/metrics/MetricsIT.java
@@ -108,12 +108,13 @@ public class MetricsIT extends ConfigurableMacBase
implements MetricsProducer {
// add sserver as flaky until scan server included in mini tests.
Set<String> flakyMetrics = Set.of(METRICS_FATE_TYPE_IN_PROGRESS,
- METRICS_SERVER_IDLE,
+ METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED,
METRICS_SCAN_BUSY_TIMEOUT_COUNTER,
METRICS_SCAN_RESERVATION_CONFLICT_COUNTER,
METRICS_SCAN_RESERVATION_TOTAL_TIMER,
METRICS_SCAN_RESERVATION_WRITEOUT_TIMER,
- METRICS_SCAN_TABLET_METADATA_CACHE);
+ METRICS_SCAN_TABLET_METADATA_CACHE,
+ METRICS_SERVER_IDLE);
// @formatter:on
Map<String,String> expectedMetricNames = this.getMetricFields();