This is an automated email from the ASF dual-hosted git repository.

tanxinyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 344962356b [IOTDB-5480] IoTConsensus sync lag may be negative under 
single copy (#9031)
344962356b is described below

commit 344962356ba7a72d4291ad7df80f363dd5700b00
Author: Xiangpeng Hu <[email protected]>
AuthorDate: Mon Feb 13 14:47:47 2023 +0800

    [IOTDB-5480] IoTConsensus sync lag may be negative under single copy (#9031)
---
 .../consensus/iot/IoTConsensusServerImpl.java      |  5 +++++
 .../consensus/iot/IoTConsensusServerMetrics.java   | 23 ++++++++++++++++++++++
 docs/UserGuide/Monitor-Alert/Metric-Tool.md        |  1 +
 docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md     |  1 +
 4 files changed, 30 insertions(+)

diff --git 
a/consensus/src/main/java/org/apache/iotdb/consensus/iot/IoTConsensusServerImpl.java
 
b/consensus/src/main/java/org/apache/iotdb/consensus/iot/IoTConsensusServerImpl.java
index e903289177..75a2fc2f98 100644
--- 
a/consensus/src/main/java/org/apache/iotdb/consensus/iot/IoTConsensusServerImpl.java
+++ 
b/consensus/src/main/java/org/apache/iotdb/consensus/iot/IoTConsensusServerImpl.java
@@ -722,6 +722,11 @@ public class IoTConsensusServerImpl {
     return searchIndex.get();
   }
 
+  public long getSyncLag() {
+    long safeIndex = getCurrentSafelyDeletedSearchIndex();
+    return getSearchIndex() - safeIndex;
+  }
+
   public IoTConsensusConfig getConfig() {
     return config;
   }
diff --git 
a/consensus/src/main/java/org/apache/iotdb/consensus/iot/IoTConsensusServerMetrics.java
 
b/consensus/src/main/java/org/apache/iotdb/consensus/iot/IoTConsensusServerMetrics.java
index 82814d071f..ff241bc7e1 100644
--- 
a/consensus/src/main/java/org/apache/iotdb/consensus/iot/IoTConsensusServerMetrics.java
+++ 
b/consensus/src/main/java/org/apache/iotdb/consensus/iot/IoTConsensusServerMetrics.java
@@ -60,6 +60,19 @@ public class IoTConsensusServerMetrics implements IMetricSet 
{
             impl.getThisNode().getGroupId().toString(),
             Tag.TYPE.toString(),
             "safeIndex");
+    // TODO: Consider adding topological order to the traversal of metricEntry.
+    MetricService.getInstance()
+        .createAutoGauge(
+            Metric.IOT_CONSENSUS.toString(),
+            MetricLevel.IMPORTANT,
+            impl,
+            IoTConsensusServerImpl::getSyncLag,
+            Tag.NAME.toString(),
+            "ioTConsensusServerImpl",
+            Tag.REGION.toString(),
+            impl.getThisNode().getGroupId().toString(),
+            Tag.TYPE.toString(),
+            "syncLag");
     MetricService.getInstance()
         .createAutoGauge(
             Metric.IOT_CONSENSUS.toString(),
@@ -108,6 +121,16 @@ public class IoTConsensusServerMetrics implements 
IMetricSet {
             impl.getThisNode().getGroupId().toString(),
             Tag.TYPE.toString(),
             "safeIndex");
+    MetricService.getInstance()
+        .remove(
+            MetricType.AUTO_GAUGE,
+            Metric.IOT_CONSENSUS.toString(),
+            Tag.NAME.toString(),
+            "ioTConsensusServerImpl",
+            Tag.REGION.toString(),
+            impl.getThisNode().getGroupId().toString(),
+            Tag.TYPE.toString(),
+            "syncLag");
     MetricService.getInstance()
         .remove(
             MetricType.AUTO_GAUGE,
diff --git a/docs/UserGuide/Monitor-Alert/Metric-Tool.md 
b/docs/UserGuide/Monitor-Alert/Metric-Tool.md
index dfc68deb52..89e4cf9aaa 100644
--- a/docs/UserGuide/Monitor-Alert/Metric-Tool.md
+++ b/docs/UserGuide/Monitor-Alert/Metric-Tool.md
@@ -171,6 +171,7 @@ carefully evaluated. The current Core-level metrics are as 
follows:
 | mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", 
type="cachedRequestInMemoryQueue" | AutoGauge | The size of cache requests of 
synchronization thread in replica group |
 | mutli_leader | name="IoTConsensusServerImpl", region="{{region}}", 
type="searchIndex"                       | AutoGauge | The write process of 
main process in replica group                    |
 | mutli_leader | name="IoTConsensusServerImpl", region="{{region}}", 
type="safeIndex"                         | AutoGauge | The sync index of 
replica group                                       |
+| mutli_leader | name="IoTConsensusServerImpl", region="{{region}}", 
type="syncLag"                           | AutoGauge | The sync lag of replica 
group                                         |
 | mutli_leader | name="IoTConsensusServerImpl", region="{{region}}", 
type="LogEntriesFromWAL"                 | AutoGauge | The number of logEntries 
from wal in Batch                            |
 | mutli_leader | name="IoTConsensusServerImpl", region="{{region}}", 
type="LogEntriesFromQueue"               | AutoGauge | The number of logEntries 
from queue in Batch                          |
 | stage        | name="iot_consensus", region="{{region}}", 
type="getStateMachineLock"                        | Histogram | The time 
consumed to get statemachine lock in main process            |
diff --git a/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md 
b/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md
index 0c131a9af7..f7147fe096 100644
--- a/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md
+++ b/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md
@@ -151,6 +151,7 @@ Core 级别的监控指标在系统运行中默认开启,每一个 Core 级别
 | iot_consensus | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", 
type="cachedRequestInMemoryQueue" | AutoGauge | 副本组同步线程缓存队列请求总大小     |
 | iot_consensus | name="IoTConsensusServerImpl", region="{{region}}", 
type="searchIndex"                       | AutoGauge | 副本组主流程写入进度           |
 | iot_consensus | name="IoTConsensusServerImpl", region="{{region}}", 
type="safeIndex"                         | AutoGauge | 副本组同步进度              |
+| iot_consensus | name="IoTConsensusServerImpl", region="{{region}}", 
type="syncLag"                           | AutoGauge | 副本组写入进度与同步进度差        |
 | iot_consensus | name="IoTConsensusServerImpl", region="{{region}}", 
type="LogEntriesFromWAL"                 | AutoGauge | 副本组Batch中来自WAL的日志项数量 |
 | iot_consensus | name="IoTConsensusServerImpl", region="{{region}}", 
type="LogEntriesFromQueue"               | AutoGauge | 副本组Batch中来自队列的日志项数量  |
 | stage         | name="iot_consensus", region="{{region}}", 
type="getStateMachineLock"                        | Histogram | 主流程获取状态机锁耗时     
     |

Reply via email to