This is an automated email from the ASF dual-hosted git repository.
williamsong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ratis.git
The following commit(s) were added to refs/heads/master by this push:
new c2181e5fa RATIS-1766. Add descriptions to metrics entries (#804)
c2181e5fa is described below
commit c2181e5fab51254452db094693d5bddbda78d5ba
Author: William Song <[email protected]>
AuthorDate: Fri Jan 13 19:43:35 2023 +0800
RATIS-1766. Add descriptions to metrics entries (#804)
---
ratis-docs/src/site/markdown/metrics.md | 145 +++++++++++++++++++++
.../server/metrics/SegmentedRaftLogMetrics.java | 4 +-
2 files changed, 147 insertions(+), 2 deletions(-)
diff --git a/ratis-docs/src/site/markdown/metrics.md
b/ratis-docs/src/site/markdown/metrics.md
new file mode 100644
index 000000000..10c78ccbb
--- /dev/null
+++ b/ratis-docs/src/site/markdown/metrics.md
@@ -0,0 +1,145 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+
+# Metrics
+
+## Ratis Server
+
+### StateMachine Metrics
+
+| Application | Component | Name | Type | Description
|
+|-------------|---------------|---------------------|-------|--------------------------------------------------------------|
+| ratis | state_machine | appliedIndex | Gauge | Applied index of
state machine |
+| ratis | state_machine | applyCompletedIndex | Gauge | Last log index
which completely applied to the state machine |
+| ratis | state_machine | takeSnapshot | Timer | Time taken for
state machine to take a snapshot |
+
+
+### Leader Election Metrics
+
+| Application | Component | Name | Type |
Description |
+|-------------|-----------------|-------------------------------|---------|-------------------------------------------------------|
+| ratis | leader_election | electionCount | Counter |
Number of leader elections of this group |
+| ratis | leader_election | timeoutCount | Counter |
Number of election timeouts of this peer |
+| ratis | leader_election | electionTime | Timer |
Time spent on leader election |
+| ratis | leader_election | lastLeaderElapsedTime | Gauge |
Time elapsed since last hearing from an active leader |
+| ratis | leader_election | transferLeadershipCount | Counter |
Number of transferLeader requests |
+| ratis | leader_election | lastLeaderElectionElapsedTime | Gauge |
Time elapsed since last leader election |
+
+### Log Appender Metrics
+
+| Application | Component | Name | Type |
Description |
+|-------------|--------------|-----------------------------------|-------|---------------------------------------------|
+| ratis | log_appender | follower_{peer}_next_index | Gauge |
Next index of peer |
+| ratis | log_appender | follower_{peer}_match_index | Gauge |
Match index of peer |
+| ratis | log_appender | follower_{peer}_rpc_response_time | Gauge |
Time elapsed since peer's last rpc response |
+
+### Raft Log Metrics
+
+| Application | Component | Name | Type |
Description
|
+|-------------|------------|---------------------------------|---------|---------------------------------------------------------------------------------------------------------------|
+| ratis | log_worker | metadataLogEntryCount | Counter |
Number of metadata(term-index) log entries
|
+| ratis | log_worker | configLogEntryCount | Counter |
Number of configuration log entries
|
+| ratis | log_worker | stateMachineLogEntryCount | Counter |
Number of statemachine log entries
|
+| ratis | log_worker | flushTime | Timer | Time
taken to flush log
|
+| ratis | log_worker | flushCount | Counter |
Number of times of log-flush invoked
|
+| ratis | log_worker | syncTime | Timer | Time
taken to log sync (fsync)
|
+| ratis | log_worker | dataQueueSize | Gauge | Raft
log data queue size which at any time gives the number of log related
operations in the queue |
+| ratis | log_worker | workerQueueSize | Gauge | Raft
log worker queue size which at any time gives number of committed entries that
are to be synced |
+| ratis | log_worker | syncBatchSize | Gauge |
Number of raft log entries synced in each flush call
|
+| ratis | log_worker | cacheMissCount | Counter | Count
of RaftLogCache Misses
|
+| ratis | log_worker | cacheHitCount | Counter | Count
of RaftLogCache Hits
|
+| ratis | log_worker | closedSegmentsNum | Gauge |
Number of closed raft log segments
|
+| ratis | log_worker | closedSegmentsSizeInBytes | Gauge | Size
of closed raft log segments in bytes
|
+| ratis | log_worker | openSegmentSizeInBytes | Gauge | Size
of open raft log segment in bytes
|
+| ratis | log_worker | appendEntryLatency | Timer | Total
time taken to append a raft log entry
|
+| ratis | log_worker | enqueuedTime | Timer | Time
spent by a Raft log operation in the queue
|
+| ratis | log_worker | queueingDelay | Timer | Time
taken for a Raft log operation to get into the queue after being requested,
waiting queue to be non-full |
+| ratis | log_worker | {operation}ExecutionTime | Timer | Time
taken for a Raft log operation(open/close/flush/write/purge) to complete
execution |
+| ratis | log_worker | appendEntryCount | Counter |
Number of entries appended to the raft log
|
+| ratis | log_worker | purgeLog | Timer | Time
taken for Raft log purge operation to complete execution
|
+| ratis | log_worker | numStateMachineDataWriteTimeout | Counter |
Number of statemachine dataApi write timeouts
|
+| ratis | log_worker | numStateMachineDataReadTimeout | Counter |
Number of statemachine dataApi read timeouts
|
+| ratis | log_worker | readEntryLatency | Timer | Time
required to read a raft log entry from actual raft log file and create a raft
log entry |
+| ratis | log_worker | segmentLoadLatency | Timer | Time
required to load and process raft log segments during restart
|
+
+
+### Raft Server Metrics
+
+| Application | Component | Name | Type |
Description |
+|-------------|-----------|----------------------------------|---------|---------------------------------------------------------------------|
+| ratis | server | {peer}_lastHeartbeatElapsedTime | Gauge | Time
elapsed since last heartbeat rpc response |
+| ratis | server | follower_append_entry_latency | Timer | Time
taken for followers to append log entries |
+| ratis | server | {peer}_peerCommitIndex | Gauge |
Commit index of peer |
+| ratis | server | clientReadRequest | Timer | Time
taken to process read requests from client |
+| ratis | server | clientStaleReadRequest | Timer | Time
taken to process stale-read requests from client |
+| ratis | server | clientWriteRequest | Timer | Time
taken to process write requests from client |
+| ratis | server | clientWatch{level}Request | Timer | Time
taken to process watch(replication_level) requests from client |
+| ratis | server | numRequestQueueLimitHits | Counter |
Number of (total client requests in queue) limit hits |
+| ratis | server | numRequestsByteSizeLimitHits | Counter |
Number of (total size of client requests in queue) limit hits |
+| ratis | server | numResourceLimitHits | Counter | Sum
of numRequestQueueLimitHits and numRequestsByteSizeLimitHits |
+| ratis | server | numPendingRequestInQueue | Gauge |
Number of pending client requests in queue |
+| ratis | server | numPendingRequestMegaByteSize | Gauge | Total
size of pending client requests in queue |
+| ratis | server | retryCacheEntryCount | Gauge |
Number of entries in retry cache |
+| ratis | server | retryCacheHitCount | Gauge |
Number of retry cache hits |
+| ratis | server | retryCacheHitRate | Gauge | Retry
cache hit rate |
+| ratis | server | retryCacheMissCount | Gauge |
Number of retry cache misses |
+| ratis | server | retryCacheMissRate | Gauge | Retry
cache miss rate |
+| ratis | server | numFailedClientStaleReadOnServer | Counter |
Number of failed stale-read requests |
+| ratis | server | numFailedClientReadOnServer | Counter |
Number of failed read requests |
+| ratis | server | numFailedClientWriteOnServer | Counter |
Number of failed write requests |
+| ratis | server | numFailedClientWatchOnServer | Counter |
Number of failed watch requests |
+| ratis | server | numFailedClientStreamOnServer | Counter |
Number of failed stream requests |
+| ratis | server | numInstallSnapshot | Counter |
Number of install-snapshot requests |
+
+
+## Ratis Netty Metrics
+
+| Application | Component | Name | Type |
Description |
+|-------------|---------------|-------------------------------|---------|-------------------------------------------|
+| ratis_netty | stream_server | {request}_latency | timer | Time
taken to process data stream request |
+| ratis_netty | stream_server | {request}_success_reply_count | Counter |
Number of success replies of request |
+| ratis_netty | stream_server | {request}_fail_reply_count | Counter |
Number of fail replies of request |
+| ratis_netty | stream_server | num_requests_{request} | Counter |
Number of total data stream requests |
+
+## Ratis gRPC Metrics
+
+### Message Metrics
+
+| Application | Component | Name | Type
| Description |
+|-------------|------------------------|----------------------------|---------|--------------------------------------------------|
+| ratis | client_message_metrics | {method}_started_total | Counter
| total messages started of {method} |
+| ratis | client_message_metrics | {method}_completed_total | Counter
| total messages completed of {method} |
+| ratis | client_message_metrics | {method}_received_executed | Counter
| total messages received and executed of {method} |
+| ratis | server_message_metrics | {method}_started_total | Counter
| total messages started of {method} |
+| ratis | server_message_metrics | {method}_completed_total | Counter
| total messages completed of {method} |
+| ratis | server_message_metrics | {method}_received_executed | Counter
| total messages received and executed of {method} |
+
+### gRPC Log Appender Metrics
+
+
+| Application | Component | Name | Type
| Description |
+|-------------|--------------|---------------------------------------|---------|---------------------------------------------|
+| ratis_grpc | log_appender | {appendEntries}_latency | Timer
| Latency of method (appendEntries/heartbeat) |
+| ratis_grpc | log_appender | {follower}_success_reply_count | Counter
| Number of success replies |
+| ratis_grpc | log_appender | {follower}_not_leader_reply_count | Counter
| Number of NotLeader replies |
+| ratis_grpc | log_appender | {follower}_inconsistency_reply_count | Counter
| Number of Inconsistency replies |
+| ratis_grpc | log_appender | {follower}_append_entry_timeout_count | Counter
| Number of appendEntries timeouts |
+| ratis_grpc | log_appender | {follower}_pending_log_requests_count | Counter
| Number of pending requests |
+| ratis_grpc | log_appender | num_retries | Counter
| Number of request retries |
+| ratis_grpc | log_appender | num_requests | Counter
| Number of requests in total |
+| ratis_grpc | log_appender | num_install_snapshot | Counter
| Number of install snapshot requests |
diff --git
a/ratis-server/src/main/java/org/apache/ratis/server/metrics/SegmentedRaftLogMetrics.java
b/ratis-server/src/main/java/org/apache/ratis/server/metrics/SegmentedRaftLogMetrics.java
index a6a1af0ac..865bdcde9 100644
---
a/ratis-server/src/main/java/org/apache/ratis/server/metrics/SegmentedRaftLogMetrics.java
+++
b/ratis-server/src/main/java/org/apache/ratis/server/metrics/SegmentedRaftLogMetrics.java
@@ -68,9 +68,9 @@ public class SegmentedRaftLogMetrics extends
RaftLogMetricsBase {
/** Number of entries appended to the raft log */
public static final String RAFT_LOG_APPEND_ENTRY_COUNT = "appendEntryCount";
public static final String RAFT_LOG_PURGE_METRIC = "purgeLog";
- /** Time taken for a Raft log operation to complete write state machine
data. */
+ /** Number of statemachine dataApi write timeouts */
public static final String RAFT_LOG_STATEMACHINE_DATA_WRITE_TIMEOUT_COUNT =
"numStateMachineDataWriteTimeout";
- /** Time taken for a Raft log operation to complete read state machine data.
*/
+ /** Number of statemachine dataApi read timeouts */
public static final String RAFT_LOG_STATEMACHINE_DATA_READ_TIMEOUT_COUNT =
"numStateMachineDataReadTimeout";
//////////////////////////////