rpuch commented on code in PR #7845: URL: https://github.com/apache/ignite-3/pull/7845#discussion_r2981157778
########## modules/table/src/test/java/org/apache/ignite/internal/table/distributed/schema/SchemaSyncMetricSourceTest.java: ########## @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.table.distributed.schema; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import org.apache.ignite.internal.metrics.DistributionMetric; +import org.apache.ignite.internal.metrics.MetricRegistry; +import org.apache.ignite.internal.metrics.MetricSet; +import org.apache.ignite.internal.testframework.BaseIgniteAbstractTest; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** Tests for {@link SchemaSyncMetricSource}. */ +class SchemaSyncMetricSourceTest extends BaseIgniteAbstractTest { + private SchemaSyncMetricSource source; + private DistributionMetric waits; + + @BeforeEach + void setUp() { + source = new SchemaSyncMetricSource(); + MetricRegistry registry = new MetricRegistry(); Review Comment: Let's move this to field declarations (and make them final) ########## modules/table/src/main/java/org/apache/ignite/internal/table/distributed/schema/SchemaSyncServiceImpl.java: ########## @@ -31,17 +32,38 @@ public class SchemaSyncServiceImpl implements SchemaSyncService { private final LongSupplier delayDurationMs; + private final LongConsumer waitDurationMsRecorder; + /** * Constructor. */ public SchemaSyncServiceImpl(SchemaSafeTimeTracker schemaSafeTimeTracker, LongSupplier delayDurationMs) { + this(schemaSafeTimeTracker, delayDurationMs, durationMs -> {}); + } + + /** + * Constructor with metrics recording. + * + * @param schemaSafeTimeTracker Schema safe time tracker. + * @param delayDurationMs Supplier of the delay duration in milliseconds. + * @param waitDurationMsRecorder Consumer that receives the duration (in ms) of each completed wait. + */ + public SchemaSyncServiceImpl( + SchemaSafeTimeTracker schemaSafeTimeTracker, + LongSupplier delayDurationMs, + LongConsumer waitDurationMsRecorder + ) { this.schemaSafeTimeTracker = schemaSafeTimeTracker; this.delayDurationMs = delayDurationMs; + this.waitDurationMsRecorder = waitDurationMsRecorder; } @Override public CompletableFuture<Void> waitForMetadataCompleteness(HybridTimestamp ts) { - return schemaSafeTimeTracker.waitFor(metastoreSafeTimeToWait(ts)); + long startMs = System.currentTimeMillis(); Review Comment: I think we should optimize it: 1. Get the future (as it was in line 44) 2. If it's completed, just return it immediately 3. Only then, measure the wait Also, for measuring let's use `System.nanoTime()` ########## modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/metrics/MetaStorageMetricSource.java: ########## @@ -63,7 +75,7 @@ public void onIdempotentCacheSizeChange(int newSize) { protected class Holder implements AbstractMetricSource.Holder<Holder> { private final LongMetric safeTimeLag = new LongGauge( "SafeTimeLag", - "Number of milliseconds the local MetaStorage SafeTime lags behind the local logical clock.", + "Number of milliseconds the local Meta Storage SafeTime lags behind the local logical clock.", Review Comment: Do we ever write it like this (with a space?) ########## modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/impl/MetaStorageManagerImpl.java: ########## @@ -214,6 +219,24 @@ public class MetaStorageManagerImpl implements MetaStorageManager, MetastorageGr /** Tracks only reads from the leader, local reads are tracked by the storage itself. */ private final ReadOperationForCompactionTracker readOperationFromLeaderForCompactionTracker; + /** Current Meta Storage voting peers (consistent IDs), updated on each committed Raft configuration. */ + private volatile Set<String> currentVotingPeers = Set.of(); + + /** + * MetaStorage availability flag: 1 if MS majority can execute commands, 0 otherwise. + * Updated by the periodic availability check. + */ + private volatile int mgAvailable = 0; Review Comment: Should it be a boolean here? ########## modules/table/src/test/java/org/apache/ignite/internal/table/distributed/schema/SchemaSyncServiceImplTest.java: ########## @@ -70,4 +75,42 @@ void waitsOnSchemaSafeTimeTillSchemaCompletenessSubtractingDelayDuration() { safeTimeFuture.complete(null); assertThat(waitFuture, willCompleteSuccessfully()); } + + @Test + void waitRecorderIsCalledWithDurationOnCompletion() { + List<Long> recorded = new ArrayList<>(); + schemaSyncService = new SchemaSyncServiceImpl(schemaSafeTimeTracker, delayDurationMs, recorded::add); + + HybridTimestamp ts = clock.now(); + var safeTimeFuture = new CompletableFuture<Void>(); + + when(schemaSafeTimeTracker.waitFor(ts.subtractPhysicalTime(delayDurationMs.getAsLong()))).thenReturn(safeTimeFuture); + + schemaSyncService.waitForMetadataCompleteness(ts); + + assertThat(recorded, empty()); + + safeTimeFuture.complete(null); + + assertThat(recorded, hasSize(1)); + assertThat(recorded.get(0), greaterThanOrEqualTo(0L)); + } + + @Test + void waitRecorderIsCalledEvenWhenFutureCompletesExceptionally() { + List<Long> recorded = new ArrayList<>(); + schemaSyncService = new SchemaSyncServiceImpl(schemaSafeTimeTracker, delayDurationMs, recorded::add); + + HybridTimestamp ts = clock.now(); + var safeTimeFuture = new CompletableFuture<Void>(); + + when(schemaSafeTimeTracker.waitFor(ts.subtractPhysicalTime(delayDurationMs.getAsLong()))).thenReturn(safeTimeFuture); + + schemaSyncService.waitForMetadataCompleteness(ts); + + safeTimeFuture.completeExceptionally(new RuntimeException("test error")); + + assertThat(recorded, hasSize(1)); + assertThat(recorded.get(0), greaterThanOrEqualTo(0L)); + } Review Comment: I would also add a test making sure that, if there is no wait at all (i.e. the wait future is returned completed), the metric is not updated ########## modules/table/src/main/java/org/apache/ignite/internal/table/distributed/schema/SchemaSyncMetricSource.java: ########## @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.table.distributed.schema; + +import java.util.List; +import org.apache.ignite.internal.metrics.AbstractMetricSource; +import org.apache.ignite.internal.metrics.DistributionMetric; +import org.apache.ignite.internal.metrics.Metric; + +/** + * Metric source for schema synchronization metrics. + */ +public class SchemaSyncMetricSource extends AbstractMetricSource<SchemaSyncMetricSource.Holder> { + private static final String SOURCE_NAME = "schemaSync"; Review Comment: ```suggestion private static final String SOURCE_NAME = "schema.sync"; ``` ########## modules/table/src/main/java/org/apache/ignite/internal/table/distributed/schema/SchemaSyncMetricSource.java: ########## @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.ignite.internal.table.distributed.schema; + +import java.util.List; +import org.apache.ignite.internal.metrics.AbstractMetricSource; +import org.apache.ignite.internal.metrics.DistributionMetric; +import org.apache.ignite.internal.metrics.Metric; + +/** + * Metric source for schema synchronization metrics. + */ +public class SchemaSyncMetricSource extends AbstractMetricSource<SchemaSyncMetricSource.Holder> { + private static final String SOURCE_NAME = "schemaSync"; + + /** + * Constructor. + */ + public SchemaSyncMetricSource() { + super(SOURCE_NAME); + } + + /** + * Histogram bounds (in milliseconds) for schema sync wait time distribution. + * Buckets: [0..1], [1..5], [5..10], [10..50], [50..100], [100..500], [500..1000], [1000..5000], [5000..inf]. + */ + private static final long[] WAIT_BOUNDS_MS = {1, 5, 10, 50, 100, 500, 1000, 5000}; + + /** + * Records a completed schema sync wait with the given duration. + * + * @param durationMs Duration of the wait in milliseconds. + */ + public void recordWait(long durationMs) { + Holder holder = holder(); + + if (holder != null) { + holder.waits.add(durationMs); + } + } + + @Override + protected Holder createHolder() { + return new Holder(); + } + + /** Holder. */ + protected static class Holder implements AbstractMetricSource.Holder<Holder> { + private final DistributionMetric waits = new DistributionMetric( + "SchemaSyncWaits", Review Comment: So the full name is `schema.sync.SchemaSyncWaits`, there is some duplication. Should it be called `WaitDurations` or simply `Waits`? ########## modules/metastorage/src/main/java/org/apache/ignite/internal/metastorage/metrics/MetaStorageMetricSource.java: ########## @@ -72,9 +84,23 @@ protected class Holder implements AbstractMetricSource.Holder<Holder> { "The current size of the cache of idempotent commands' results." ); + private final IntGauge availablePeers = new IntGauge( + "AvailablePeers", + "Number of available members of the Meta Storage voting set based on the current logical topology.", + availablePeersSupplier + ); + + private final IntGauge availableMajority = new IntGauge( + "AvailableMajority", Review Comment: Should it be `MajorityAvailable`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
