ibessonov commented on code in PR #7722: URL: https://github.com/apache/ignite-3/pull/7722#discussion_r2940314746
########## modules/raft/src/main/java/org/apache/ignite/internal/metrics/sources/LogManagerMetricSource.java: ########## @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.metrics.sources; + +import java.util.List; +import org.apache.ignite.internal.metrics.AbstractMetricSource; +import org.apache.ignite.internal.metrics.AtomicLongMetric; +import org.apache.ignite.internal.metrics.Metric; + +/** Metrics of log manager. */ +public class LogManagerMetricSource extends AbstractMetricSource<LogManagerMetricSource.Holder> { + public static final String SOURCE_NAME = "raft.logmanager"; + + /** + * Constructor. + */ + public LogManagerMetricSource(String groupId) { + super(sourceName(groupId)); + } + + private static String sourceName(String groupId) { + return SOURCE_NAME + '.' + groupId; + } + + @Override + protected Holder createHolder() { + return new Holder(); + } + + /** + * Called when log suffix is truncated. + * + * @param duration Duration of the truncate operation. + */ + public void onTruncateLogSuffix(long duration) { + Holder holder = holder(); + + if (holder != null) { + holder.lastTruncateLogSuffixTime.value(duration); + } + } + + /** + * Called when log prefix is truncated. + * + * @param duration Duration of the truncate operation. + */ + public void onTruncateLogPrefix(long duration) { + Holder holder = holder(); + + if (holder != null) { + holder.lastTruncateLogPrefixTime.value(duration); + } + } + + /** + * Called when logs are appended. + * + * @param entriesCount Number of entries appended. + * @param writtenSize Written size in bytes. + * @param duration Duration of the append operation. + */ + public void onAppendLogs(int entriesCount, int writtenSize, long duration) { + Holder holder = holder(); + + if (holder != null) { + holder.appendLogsCount.add(entriesCount); + holder.appendLogsSize.add(writtenSize); + holder.lastAppendLogsDuration.value(duration); + } + } + + /** Metric holder for log manager metrics. */ + static class Holder implements AbstractMetricSource.Holder<Holder> { + private final AtomicLongMetric lastTruncateLogSuffixTime = new AtomicLongMetric( + "TruncateLogSuffixDuration", + "The last duration of truncating log suffix" Review Comment: Applies to everything. What does this duration mean? Is it seconds, or minutes? The `Last` part is also not reflected in metric name, but it should, I think. Current name would make more sense for a distribution metric. Also, is `Last*` better than having a duration distribution? What would you prefer to see if you're debugging a strange cluster behavior? ########## modules/raft/src/main/java/org/apache/ignite/internal/metrics/sources/FsmCallerMetricSource.java: ########## @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.metrics.sources; + +import java.util.ArrayList; +import java.util.List; +import org.apache.ignite.internal.metrics.AbstractMetricSource; +import org.apache.ignite.internal.metrics.AtomicLongMetric; +import org.apache.ignite.internal.metrics.DistributionMetric; +import org.apache.ignite.internal.metrics.LongGauge; +import org.apache.ignite.internal.metrics.Metric; +import org.apache.ignite.raft.jraft.core.FSMCallerImpl; +import org.apache.ignite.raft.jraft.core.FSMCallerImpl.TaskType; + +/** + * Metrics of FSM caller. + */ +public class FsmCallerMetricSource extends AbstractMetricSource<FsmCallerMetricSource.Holder> { + public static final String SOURCE_NAME = "raft.fsmcaller"; + + /** + * Constructor. + */ + public FsmCallerMetricSource(String groupId) { + super(sourceName(groupId)); + } + + private static String sourceName(String groupId) { + return SOURCE_NAME + '.' + groupId; + } + + @Override + protected Holder createHolder() { + return new Holder(); + } + + /** + * Called on FSM commit. + * + * @param time Duration of the commit operation. + */ + public void onFsmCommit(long time) { + Holder holder = holder(); + + if (holder != null) { + holder.lastCommitTime.value(time); + } + } + + /** + * Called on applying tasks. + * + * @param time Duration of the apply operation. + * @param size Number of tasks applied. + */ + public void onApplyTasks(long time, long size) { + Holder holder = holder(); + + if (holder != null) { + holder.lastApplyTasksTime.value(time); + holder.applyTasksSize.add(size); + } + } + + /** Metric holder for FSM caller metrics. */ + static class Holder implements AbstractMetricSource.Holder<Holder> { + private final DistributionMetric applyTasksSize = new DistributionMetric( + "ApplyTasksSize", + "Sizes of applied tasks batches", + new long[] {10, 20, 30, 40, 50} + ); + + private final AtomicLongMetric lastApplyTasksTime = new AtomicLongMetric("ApplyTasksTime", "Time to apply tasks"); + + private final AtomicLongMetric lastCommitTime = new AtomicLongMetric("CommitTime", "Time to apply tasks"); + + private final List<Metric> metrics; + + Holder() { + metrics = new ArrayList<>(); + + metrics.add(applyTasksSize); + metrics.add(lastApplyTasksTime); + metrics.add(lastCommitTime); + + for (TaskType type : FSMCallerImpl.TaskType.values()) { + metrics.add(new LongGauge(type.metricName(), "Time to execute " + type.name() + " task", type::getApplyDuration)); + } Review Comment: Names like `IdleDuration` fit us better than `idle.Duration`. In other words, current naming system is inconsistent with what we already have, and looks weird in general. Please also get rid of underscores and use camel-case. ########## modules/cli/src/integrationTest/java/org/apache/ignite/internal/cli/CliIntegrationTest.java: ########## @@ -372,6 +342,64 @@ protected static void createAndPopulateTable() { ); } + /** Returns all metric sources that are expected to be present on all nodes combined. */ + public static MetricSource[] getExpectedClusterMetrics() { + Set<MetricSource> result = new HashSet<>(); + + for (var node : CLUSTER.nodes()) { + result.addAll(List.of(getExpectedNodeMetrics(node))); + } + + return result.toArray(MetricSource[]::new); + } + + /** Returns all metric sources that are expected to be present on a specific node. */ + public static MetricSource[] getExpectedNodeMetrics(Ignite ignite) { + MetricSource[] commonMetrics = {new MetricSource().name("jvm").enabled(true), + new MetricSource().name("os").enabled(true), + new MetricSource().name("raft").enabled(true), + new MetricSource().name("metastorage").enabled(true), + new MetricSource().name("client.handler").enabled(true), + new MetricSource().name("sql.client").enabled(true), + new MetricSource().name("sql.plan.cache").enabled(true), + new MetricSource().name("sql.queries").enabled(true), + new MetricSource().name("storage.aipersist").enabled(true), + new MetricSource().name("storage.aipersist.default").enabled(true), + new MetricSource().name("storage.aipersist.default_aipersist").enabled(true), + new MetricSource().name("storage.aipersist.checkpoint").enabled(true), + new MetricSource().name("storage.aipersist.io").enabled(true), + new MetricSource().name("topology.cluster").enabled(true), + new MetricSource().name("topology.local").enabled(true), + new MetricSource().name("thread.pools.partitions-executor").enabled(true), + new MetricSource().name("thread.pools.sql-executor").enabled(true), + new MetricSource().name("thread.pools.sql-planning-executor").enabled(true), + new MetricSource().name("transactions").enabled(true), + new MetricSource().name("placement-driver").enabled(true), + new MetricSource().name("resource.vacuum").enabled(true), + new MetricSource().name("zones.Default").enabled(true), + new MetricSource().name("clock.service").enabled(true), + new MetricSource().name("index.builder").enabled(true), + new MetricSource().name("raft.snapshots").enabled(true), + new MetricSource().name("messaging").enabled(true), + new MetricSource().name("log.storage").enabled(true), + new MetricSource().name(THREAD_POOLS_METRICS_SOURCE_NAME + "striped.messaging.inbound.default").enabled(true), + new MetricSource().name(THREAD_POOLS_METRICS_SOURCE_NAME + "striped.messaging.inbound.deploymentunits").enabled(true), + new MetricSource().name(THREAD_POOLS_METRICS_SOURCE_NAME + "striped.messaging.inbound.scalecube").enabled(true), + new MetricSource().name(THREAD_POOLS_METRICS_SOURCE_NAME + "messaging.outbound").enabled(true) Review Comment: Maybe we should inline these prefixes, every other metric source is an explicit string -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
