This is an automated email from the ASF dual-hosted git repository. haonan pushed a commit to branch revert-7973-feature/metric_module_enable in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 1d5ee2db43e4c2b529e7ad62774b1a6bf5f12db3 Author: Haonan <[email protected]> AuthorDate: Mon Nov 21 08:49:15 2022 +0800 Revert "[IOTDB-4923] Enable metric module in default config (#7973)" This reverts commit b84845de2d46eb6fb30d0661bb414594bae456af. --- .../thrift/ConfigNodeRPCServiceHandlerMetrics.java | 2 +- .../thrift/ConfigNodeRPCServiceMetrics.java | 2 +- .../multileader/MultiLeaderServerImpl.java | 10 ++--- .../multileader/MultiLeaderServerMetrics.java | 4 +- .../multileader/client/DispatchLogHandler.java | 2 +- .../multileader/logdispatcher/LogDispatcher.java | 2 +- .../MultiLeaderMemoryManagerMetrics.java | 2 +- docs/UserGuide/Monitor-Alert/Metric-Tool.md | 42 +++++++++---------- docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md | 48 +++++++++++----------- .../resources/conf/iotdb-confignode-metric.yml | 6 ++- .../resources/conf/iotdb-datanode-metric.yml | 6 ++- .../apache/iotdb/metrics/config/MetricConfig.java | 9 ++-- .../micrometer/MicrometerMetricManager.java | 2 - .../exchange/MPPDataExchangeServiceMetrics.java | 2 +- ...MppDataExchangeServiceThriftHandlerMetrics.java | 2 +- .../service/DataNodeInternalRPCServiceMetrics.java | 2 +- .../apache/iotdb/db/service/RPCServiceMetrics.java | 2 +- .../iotdb/db/service/metrics/ProcessMetrics.java | 10 ++--- .../InternalServiceThriftHandlerMetrics.java | 2 +- .../handler/RPCServiceThriftHandlerMetrics.java | 2 +- 20 files changed, 80 insertions(+), 79 deletions(-) diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceHandlerMetrics.java b/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceHandlerMetrics.java index 90b9bd2df0..327e854687 100644 --- a/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceHandlerMetrics.java +++ b/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceHandlerMetrics.java @@ -38,7 +38,7 @@ public class ConfigNodeRPCServiceHandlerMetrics implements IMetricSet { public void bindTo(AbstractMetricService metricService) { metricService.getOrCreateAutoGauge( Metric.THRIFT_CONNECTIONS.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, thriftConnectionNumber, AtomicLong::get, Tag.NAME.toString(), diff --git a/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceMetrics.java b/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceMetrics.java index ce57c811cf..f266b49844 100644 --- a/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceMetrics.java +++ b/confignode/src/main/java/org/apache/iotdb/confignode/service/thrift/ConfigNodeRPCServiceMetrics.java @@ -39,7 +39,7 @@ public class ConfigNodeRPCServiceMetrics implements IMetricSet { public void bindTo(AbstractMetricService metricService) { metricService.getOrCreateAutoGauge( Metric.THRIFT_ACTIVE_THREADS.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, thriftServiceThread, AbstractThriftServiceThread::getActiveThreadCount, Tag.NAME.toString(), diff --git a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerImpl.java b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerImpl.java index 7eaa61db2c..e965973459 100644 --- a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerImpl.java +++ b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerImpl.java @@ -166,7 +166,7 @@ public class MultiLeaderServerImpl { MetricService.getInstance() .getOrCreateHistogram( Metric.STAGE.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, Tag.NAME.toString(), Metric.MULTI_LEADER.toString(), Tag.TYPE.toString(), @@ -200,7 +200,7 @@ public class MultiLeaderServerImpl { MetricService.getInstance() .getOrCreateHistogram( Metric.STAGE.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, Tag.NAME.toString(), Metric.MULTI_LEADER.toString(), Tag.TYPE.toString(), @@ -222,7 +222,7 @@ public class MultiLeaderServerImpl { MetricService.getInstance() .getOrCreateHistogram( Metric.STAGE.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, Tag.NAME.toString(), Metric.MULTI_LEADER.toString(), Tag.TYPE.toString(), @@ -245,7 +245,7 @@ public class MultiLeaderServerImpl { MetricService.getInstance() .getOrCreateHistogram( Metric.STAGE.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, Tag.NAME.toString(), Metric.MULTI_LEADER.toString(), Tag.TYPE.toString(), @@ -264,7 +264,7 @@ public class MultiLeaderServerImpl { MetricService.getInstance() .getOrCreateHistogram( Metric.STAGE.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, Tag.NAME.toString(), Metric.MULTI_LEADER.toString(), Tag.TYPE.toString(), diff --git a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerMetrics.java b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerMetrics.java index 2d92d0aa29..0f0a3a34a4 100644 --- a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerMetrics.java +++ b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/MultiLeaderServerMetrics.java @@ -39,7 +39,7 @@ public class MultiLeaderServerMetrics implements IMetricSet { MetricService.getInstance() .getOrCreateAutoGauge( Metric.MULTI_LEADER.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, impl, MultiLeaderServerImpl::getIndex, Tag.NAME.toString(), @@ -51,7 +51,7 @@ public class MultiLeaderServerMetrics implements IMetricSet { MetricService.getInstance() .getOrCreateAutoGauge( Metric.MULTI_LEADER.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, impl, MultiLeaderServerImpl::getCurrentSafelyDeletedSearchIndex, Tag.NAME.toString(), diff --git a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/client/DispatchLogHandler.java b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/client/DispatchLogHandler.java index 468bbc1e27..e552acafe2 100644 --- a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/client/DispatchLogHandler.java +++ b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/client/DispatchLogHandler.java @@ -67,7 +67,7 @@ public class DispatchLogHandler implements AsyncMethodCallback<TSyncLogRes> { MetricService.getInstance() .getOrCreateHistogram( Metric.STAGE.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, Tag.NAME.toString(), Metric.MULTI_LEADER.toString(), Tag.TYPE.toString(), diff --git a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/LogDispatcher.java b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/LogDispatcher.java index f38c0db801..606731e6a7 100644 --- a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/LogDispatcher.java +++ b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/LogDispatcher.java @@ -308,7 +308,7 @@ public class LogDispatcher { MetricService.getInstance() .getOrCreateHistogram( Metric.STAGE.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, Tag.NAME.toString(), Metric.MULTI_LEADER.toString(), Tag.TYPE.toString(), diff --git a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/MultiLeaderMemoryManagerMetrics.java b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/MultiLeaderMemoryManagerMetrics.java index c2b43f448d..326d3dc1ad 100644 --- a/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/MultiLeaderMemoryManagerMetrics.java +++ b/consensus/src/main/java/org/apache/iotdb/consensus/multileader/logdispatcher/MultiLeaderMemoryManagerMetrics.java @@ -37,7 +37,7 @@ public class MultiLeaderMemoryManagerMetrics implements IMetricSet { public void bindTo(AbstractMetricService metricService) { metricService.getOrCreateAutoGauge( Metric.MEM.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, multiLeaderMemoryManager, MultiLeaderMemoryManager::getMemorySizeInByte, Tag.NAME.toString(), diff --git a/docs/UserGuide/Monitor-Alert/Metric-Tool.md b/docs/UserGuide/Monitor-Alert/Metric-Tool.md index 0f9c1dddd4..12345ebfc9 100644 --- a/docs/UserGuide/Monitor-Alert/Metric-Tool.md +++ b/docs/UserGuide/Monitor-Alert/Metric-Tool.md @@ -82,13 +82,13 @@ Next, we will choose Prometheus format data as samples to describe each kind of #### 1.3.3.1. API | Metric | Tag | level | Description | Sample | -| --------------------- | ------------------------ |-----------| ---------------------------------------- | -------------------------------------------- | +| --------------------- | ------------------------ | --------- | ---------------------------------------- | -------------------------------------------- | | entry_seconds_count | name="{{interface}}" | important | The total request count of the interface | entry_seconds_count{name="openSession",} 1.0 | | entry_seconds_sum | name="{{interface}}" | important | The total cost seconds of the interface | entry_seconds_sum{name="openSession",} 0.024 | | entry_seconds_max | name="{{interface}}" | important | The max latency of the interface | entry_seconds_max{name="openSession",} 0.024 | | quantity_total | name="pointsIn" | important | The total points inserted into IoTDB | quantity_total{name="pointsIn",} 1.0 | -| thrift_connections | name="{{thriftService}}" | important | current number of thrift connections | thrift_connections{name="RPC",} 1.0 | -| thrift_active_threads | name="{{thriftThread}}" | important | current number if thrift worker threads | thrift_active_threads{name="RPC",} 1.0 | +| thrift_connections | name="{{thriftService}}" | core | current number of thrift connections | thrift_connections{name="RPC",} 1.0 | +| thrift_active_threads | name="{{thriftThread}}" | core | current number if thrift worker threads | thrift_active_threads{name="RPC",} 1.0 | #### 1.3.3.2. Task | Metric | Tag | level | Description | Sample | @@ -143,13 +143,13 @@ Next, we will choose Prometheus format data as samples to describe each kind of | mutli_leader | name="multiLeaderServerImpl", region="{{region}}", type="searchIndex/safeIndex" | core | The searchIndex and safeIndex of region in multiLeader | multi_leader{name="multiLeaderServerImpl",region="DataRegion[7]",type="searchIndex",} 1945.0 | | mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="currentSyncIndex" | important | The currentSyncIndex of LogDispatcherThread of related region | multi_leader{name="logDispatcher-127.0.0.1:40014",region="DataRegion[7]",type="currentSyncIndex",} 1945.0 | | mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="cachedRequestInMemoryQueue" | important | The total size of queues that buffers requests in LogDispatcher | multi_leader{name="logDispatcher-127.0.0.1:40014",region="DataRegion[9]",type="cachedRequestInMemoryQueue",} 0.0 | -| stage | name="multi_leader", region="{{region}}", type="getStateMachineLock" | important | The time consumed to get lock of statemachine in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="getStateMachineLock",quantile="0.5",} 0.0 | -| stage | name="multi_leader", region="{{region}}", type="checkingBeforeWrite" | important | The time consumed to check before write in multiLeader | stage{name="multi_leader",region="DataRegion[5]",type="checkingBeforeWrite",quantile="0.5",} 0.0 | -| stage | name="multi_leader", region="{{region}}", type="writeStateMachine" | important | The time consumed to write consensus request into statemachine in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="writeStateMachine",quantile="0.5",} 1.0 | -| stage | name="multi_leader", region="{{region}}", type="offerRequestToQueue" | important | The time consumed to try to offer request into queue in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="offerRequestToQueue",quantile="0.5",} 1.0 | -| stage | name="multi_leader", region="{{region}}", type="consensusWrite" | important | The total time consumed to write a consensus request in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="consensusWrite",quantile="0.5",} 2.0625 | -| stage | name="multi_leader", region="{{region}}", type="constructBatch" | important | The time consumed to construct batch in LogDispatcher per request | stage{name="multi_leader",region="DataRegion[7]",type="constructBatch",quantile="0.5",} 0.0 | -| stage | name="multi_leader", region="{{region}}", type="syncLogTimePerRequest" | important | The time consumed to sync one request in multiLeader | stage{name="multi_leader",region="DataRegion[7]",type="syncLogTimePerRequest",quantile="0.5",} 0.0 | +| stage | name="multi_leader", region="{{region}}", type="getStateMachineLock" | core | The time consumed to get lock of statemachine in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="getStateMachineLock",quantile="0.5",} 0.0 | +| stage | name="multi_leader", region="{{region}}", type="checkingBeforeWrite" | core | The time consumed to check before write in multiLeader | stage{name="multi_leader",region="DataRegion[5]",type="checkingBeforeWrite",quantile="0.5",} 0.0 | +| stage | name="multi_leader", region="{{region}}", type="writeStateMachine" | core | The time consumed to write consensus request into statemachine in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="writeStateMachine",quantile="0.5",} 1.0 | +| stage | name="multi_leader", region="{{region}}", type="offerRequestToQueue" | core | The time consumed to try to offer request into queue in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="offerRequestToQueue",quantile="0.5",} 1.0 | +| stage | name="multi_leader", region="{{region}}", type="consensusWrite" | core | The total time consumed to write a consensus request in multiLeader | stage{name="multi_leader",region="DataRegion[6]",type="consensusWrite",quantile="0.5",} 2.0625 | +| stage | name="multi_leader", region="{{region}}", type="constructBatch" | core | The time consumed to construct batch in LogDispatcher per request | stage{name="multi_leader",region="DataRegion[7]",type="constructBatch",quantile="0.5",} 0.0 | +| stage | name="multi_leader", region="{{region}}", type="syncLogTimePerRequest" | core | The time consumed to sync one request in multiLeader | stage{name="multi_leader",region="DataRegion[7]",type="syncLogTimePerRequest",quantile="0.5",} 0.0 | ### 1.3.4. IoTDB PreDefined Metrics Set @@ -209,17 +209,17 @@ Next, we will choose Prometheus format data as samples to describe each kind of | logback_events_total | {level="trace/debug/info/warn/error",} | Important | The count of trace/debug/info/warn/error log events till now | logback_events_total{level="warn",} 0.0 | #### 1.3.4.4. Process -| Metric | Tag | level | Description | 示例 | -| --------------------- | -------------- |-----------| ----------------------------------------------------------------------------- | ----------------------------------------------- | -| process_cpu_load | name="cpu" | core | current process CPU Usage (%) | process_cpu_load{name="process",} 5.0 | -| process_cpu_time | name="cpu" | core | total Process CPU Time Occupied (ns) | process_cpu_time{name="process",} 3.265625E9 | -| process_max_mem | name="memory" | core | The maximum available memory for the JVM | process_max_mem{name="process",} 3.545759744E9 | -| process_used_mem | name="memory" | important | The current available memory for the JVM | process_used_mem{name="process",} 4.6065456E7 | -| process_total_mem | name="memory" | core | The current requested memory for the JVM | process_total_mem{name="process",} 2.39599616E8 | -| process_free_mem | name="memory" | core | The free available memory for the JVM | process_free_mem{name="process",} 1.94035584E8 | -| process_mem_ratio | name="memory" | important | Memory footprint ratio of process | process_mem_ratio{name="process",} 0.0 | -| process_threads_count | name="process" | important | The current number of threads | process_threads_count{name="process",} 11.0 | -| process_status | name="process" | important | The process survivor status, 1.0 means survivorship, and 0.0 means terminated | process_status{name="process",} 1.0 | +| Metric | Tag | level | Description | 示例 | +| --------------------- | -------------- | ----- | ----------------------------------------------------------------------------- | ----------------------------------------------- | +| process_cpu_load | name="cpu" | core | current process CPU Usage (%) | process_cpu_load{name="process",} 5.0 | +| process_cpu_time | name="cpu" | core | total Process CPU Time Occupied (ns) | process_cpu_time{name="process",} 3.265625E9 | +| process_max_mem | name="memory" | core | The maximum available memory for the JVM | process_max_mem{name="process",} 3.545759744E9 | +| process_used_mem | name="memory" | core | The current available memory for the JVM | process_used_mem{name="process",} 4.6065456E7 | +| process_total_mem | name="memory" | core | The current requested memory for the JVM | process_total_mem{name="process",} 2.39599616E8 | +| process_free_mem | name="memory" | core | The free available memory for the JVM | process_free_mem{name="process",} 1.94035584E8 | +| process_mem_ratio | name="memory" | core | Memory footprint ratio of process | process_mem_ratio{name="process",} 0.0 | +| process_threads_count | name="process" | core | The current number of threads | process_threads_count{name="process",} 11.0 | +| process_status | name="process" | core | The process survivor status, 1.0 means survivorship, and 0.0 means terminated | process_status{name="process",} 1.0 | #### 1.3.4.5. System | Metric | Tag | level | Description | 示例 | diff --git a/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md b/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md index 5902062276..459912c228 100644 --- a/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md +++ b/docs/zh/UserGuide/Monitor-Alert/Metric-Tool.md @@ -79,13 +79,13 @@ IoTDB对外提供JMX和Prometheus格式的监控指标,对于JMX,可以通 #### 1.3.3.1. Interface | Metric | Tag | level | 说明 | 示例 | -| --------------------- | ------------------------ |-----------| ------------------- | -------------------------------------------- | +| --------------------- | ------------------------ | --------- | ------------------- | -------------------------------------------- | | entry_seconds_count | name="{{interface}}" | important | 接口累计访问次数 | entry_seconds_count{name="openSession",} 1.0 | | entry_seconds_sum | name="{{interface}}" | important | 接口累计耗时(s) | entry_seconds_sum{name="openSession",} 0.024 | | entry_seconds_max | name="{{interface}}" | important | 接口最大耗时(s) | entry_seconds_max{name="openSession",} 0.024 | | quantity_total | name="pointsIn" | important | 系统累计写入点数 | quantity_total{name="pointsIn",} 1.0 | -| thrift_connections | name="{{thriftService}}" | important | thrift当前连接数 | thrift_connections{name="RPC",} 1.0 | -| thrift_active_threads | name="{{thriftThread}}" | important | thrift worker线程数 | thrift_active_threads{name="RPC",} 1.0 | +| thrift_connections | name="{{thriftService}}" | core | thrift当前连接数 | thrift_connections{name="RPC",} 1.0 | +| thrift_active_threads | name="{{thriftThread}}" | core | thrift worker线程数 | thrift_active_threads{name="RPC",} 1.0 | #### 1.3.3.2. Task @@ -136,18 +136,18 @@ IoTDB对外提供JMX和Prometheus格式的监控指标,对于JMX,可以通 | slot | name="{{storageGroupName}}",type="schemaSlotNumber/dataSlotNumber" | normal | database 的 schemaSlot/dataSlot个数 | slot{name="root.schema.sg1",type="schemaSlotNumber",} 2.0 | ##### 1.3.3.6.2. 弱一致性 -| Metric | Tag | level | 说明 | 示例 | -| ------------ | -------------------------------------------------------------------------------------------- | -------- | ---------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | -| mutli_leader | name="multiLeaderServerImpl", region="{{region}}", type="searchIndex/safeIndex" | core | 弱一致性对应region的写入index和同步index | multi_leader{name="multiLeaderServerImpl",region="DataRegion[7]",type="searchIndex",} 1945.0 | +| Metric | Tag | level | 说明 | 示例 | +| ------------ | -------------------------------------------------------------------------------------------- | --------- | ---------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------- | +| mutli_leader | name="multiLeaderServerImpl", region="{{region}}", type="searchIndex/safeIndex" | core | 弱一致性对应region的写入index和同步index | multi_leader{name="multiLeaderServerImpl",region="DataRegion[7]",type="searchIndex",} 1945.0 | | mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="currentSyncIndex" | important | 弱一致性对应region的同步线程当前的同步index | multi_leader{name="logDispatcher-127.0.0.1:40014",region="DataRegion[7]",type="currentSyncIndex",} 1945.0 | | mutli_leader | name="logDispatcher-{{IP}}:{{Port}}", region="{{region}}", type="cachedRequestInMemoryQueue" | important | 弱一致性对应region的同步线程缓存的队列总大小 | multi_leader{name="logDispatcher-127.0.0.1:40014",region="DataRegion[9]",type="cachedRequestInMemoryQueue",} 0.0 | -| stage | name="multi_leader", region="{{region}}", type="getStateMachineLock" | important | 弱一致性对应region获取状态机锁的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="getStateMachineLock",quantile="0.5",} 0.0 | -| stage | name="multi_leader", region="{{region}}", type="checkingBeforeWrite" | important | 弱一致性对应region状态机完成写前检查的耗时 | stage{name="multi_leader",region="DataRegion[5]",type="checkingBeforeWrite",quantile="0.5",} 0.0 | -| stage | name="multi_leader", region="{{region}}", type="writeStateMachine" | important | 弱一致性对应region状态机写入请求的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="writeStateMachine",quantile="0.5",} 1.0 | -| stage | name="multi_leader", region="{{region}}", type="offerRequestToQueue" | important | 弱一致性对应region状态机尝试将请求放入同步队列的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="offerRequestToQueue",quantile="0.5",} 1.0 | -| stage | name="multi_leader", region="{{region}}", type="consensusWrite" | important | 弱一致性对应region状态机处理共识层请求的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="consensusWrite",quantile="0.5",} 2.0625 | -| stage | name="multi_leader", region="{{region}}", type="constructBatch" | important | 弱一致性对应同步线程完成一个请求构造的耗时 | stage{name="multi_leader",region="DataRegion[7]",type="constructBatch",quantile="0.5",} 0.0 | -| stage | name="multi_leader", region="{{region}}", type="syncLogTimePerRequest" | important | 弱一致性对应同步线程完成一个请求同步的耗时 | stage{name="multi_leader",region="DataRegion[7]",type="syncLogTimePerRequest",quantile="0.5",} 0.0 | +| stage | name="multi_leader", region="{{region}}", type="getStateMachineLock" | core | 弱一致性对应region获取状态机锁的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="getStateMachineLock",quantile="0.5",} 0.0 | +| stage | name="multi_leader", region="{{region}}", type="checkingBeforeWrite" | core | 弱一致性对应region状态机完成写前检查的耗时 | stage{name="multi_leader",region="DataRegion[5]",type="checkingBeforeWrite",quantile="0.5",} 0.0 | +| stage | name="multi_leader", region="{{region}}", type="writeStateMachine" | core | 弱一致性对应region状态机写入请求的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="writeStateMachine",quantile="0.5",} 1.0 | +| stage | name="multi_leader", region="{{region}}", type="offerRequestToQueue" | core | 弱一致性对应region状态机尝试将请求放入同步队列的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="offerRequestToQueue",quantile="0.5",} 1.0 | +| stage | name="multi_leader", region="{{region}}", type="consensusWrite" | core | 弱一致性对应region状态机处理共识层请求的耗时 | stage{name="multi_leader",region="DataRegion[6]",type="consensusWrite",quantile="0.5",} 2.0625 | +| stage | name="multi_leader", region="{{region}}", type="constructBatch" | core | 弱一致性对应同步线程完成一个请求构造的耗时 | stage{name="multi_leader",region="DataRegion[7]",type="constructBatch",quantile="0.5",} 0.0 | +| stage | name="multi_leader", region="{{region}}", type="syncLogTimePerRequest" | core | 弱一致性对应同步线程完成一个请求同步的耗时 | stage{name="multi_leader",region="DataRegion[7]",type="syncLogTimePerRequest",quantile="0.5",} 0.0 | ### 1.3.4. IoTDB 预定义指标集 @@ -208,17 +208,17 @@ IoTDB对外提供JMX和Prometheus格式的监控指标,对于JMX,可以通 | logback_events_total | {level="trace/debug/info/warn/error",} | important | trace/debug/info/warn/error日志累计数量 | logback_events_total{level="warn",} 0.0 | #### 1.3.4.4. 进程(Process) -| Metric | Tag | level | 说明 | 示例 | -| --------------------- | -------------- |-----------| ---------------------------------- | ----------------------------------------------- | -| process_cpu_load | name="cpu" | core | process当前CPU占用率(%) | process_cpu_load{name="process",} 5.0 | -| process_cpu_time | name="cpu" | core | process累计占用CPU时间(ns) | process_cpu_time{name="process",} 3.265625E9 | -| process_max_mem | name="memory" | core | JVM最大可用内存 | process_max_mem{name="process",} 3.545759744E9 | -| process_used_mem | name="memory" | important | JVM当前使用内存 | process_used_mem{name="process",} 4.6065456E7 | -| process_total_mem | name="memory" | core | JVM当前已申请内存 | process_total_mem{name="process",} 2.39599616E8 | -| process_free_mem | name="memory" | core | JVM当前剩余可用内存 | process_free_mem{name="process",} 1.94035584E8 | -| process_mem_ratio | name="memory" | important | 进程的内存占用比例 | process_mem_ratio{name="process",} 0.0 | -| process_threads_count | name="process" | important | 当前线程数 | process_threads_count{name="process",} 11.0 | -| process_status | name="process" | important | 进程存活状态,1.0为存活,0.0为终止 | process_status{name="process",} 1.0 | +| Metric | Tag | level | 说明 | 示例 | +| --------------------- | -------------- | ----- | ---------------------------------- | ----------------------------------------------- | +| process_cpu_load | name="cpu" | core | process当前CPU占用率(%) | process_cpu_load{name="process",} 5.0 | +| process_cpu_time | name="cpu" | core | process累计占用CPU时间(ns) | process_cpu_time{name="process",} 3.265625E9 | +| process_max_mem | name="memory" | core | JVM最大可用内存 | process_max_mem{name="process",} 3.545759744E9 | +| process_used_mem | name="memory" | core | JVM当前使用内存 | process_used_mem{name="process",} 4.6065456E7 | +| process_total_mem | name="memory" | core | JVM当前已申请内存 | process_total_mem{name="process",} 2.39599616E8 | +| process_free_mem | name="memory" | core | JVM当前剩余可用内存 | process_free_mem{name="process",} 1.94035584E8 | +| process_mem_ratio | name="memory" | core | 进程的内存占用比例 | process_mem_ratio{name="process",} 0.0 | +| process_threads_count | name="process" | core | 当前线程数 | process_threads_count{name="process",} 11.0 | +| process_status | name="process" | core | 进程存活状态,1.0为存活,0.0为终止 | process_status{name="process",} 1.0 | #### 1.3.4.5. 系统(System) | Metric | Tag | level | 说明 | 示例 | diff --git a/metrics/interface/src/main/assembly/resources/conf/iotdb-confignode-metric.yml b/metrics/interface/src/main/assembly/resources/conf/iotdb-confignode-metric.yml index 7262a478da..f85dfece66 100644 --- a/metrics/interface/src/main/assembly/resources/conf/iotdb-confignode-metric.yml +++ b/metrics/interface/src/main/assembly/resources/conf/iotdb-confignode-metric.yml @@ -18,19 +18,21 @@ # # whether enable the module -enableMetric: true +enableMetric: false # Is stat performance of operation latency enablePerformanceStat: false # Multiple reporter, options: [JMX, PROMETHEUS, IOTDB], IOTDB is off by default metricReporterList: + - JMX + - PROMETHEUS # Type of monitor frame, options: [MICROMETER, DROPWIZARD] monitorType: MICROMETER # Level of metric level, options: [CORE, IMPORTANT, NORMAL, ALL] -metricLevel: CORE +metricLevel: IMPORTANT # The period of the collection of some metrics in asynchronous way, such as tsfile size. asyncCollectPeriodInSecond: 5 diff --git a/metrics/interface/src/main/assembly/resources/conf/iotdb-datanode-metric.yml b/metrics/interface/src/main/assembly/resources/conf/iotdb-datanode-metric.yml index 7262a478da..f85dfece66 100644 --- a/metrics/interface/src/main/assembly/resources/conf/iotdb-datanode-metric.yml +++ b/metrics/interface/src/main/assembly/resources/conf/iotdb-datanode-metric.yml @@ -18,19 +18,21 @@ # # whether enable the module -enableMetric: true +enableMetric: false # Is stat performance of operation latency enablePerformanceStat: false # Multiple reporter, options: [JMX, PROMETHEUS, IOTDB], IOTDB is off by default metricReporterList: + - JMX + - PROMETHEUS # Type of monitor frame, options: [MICROMETER, DROPWIZARD] monitorType: MICROMETER # Level of metric level, options: [CORE, IMPORTANT, NORMAL, ALL] -metricLevel: CORE +metricLevel: IMPORTANT # The period of the collection of some metrics in asynchronous way, such as tsfile size. asyncCollectPeriodInSecond: 5 diff --git a/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfig.java b/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfig.java index 311b3238ed..74505cabf4 100644 --- a/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfig.java +++ b/metrics/interface/src/main/java/org/apache/iotdb/metrics/config/MetricConfig.java @@ -23,13 +23,13 @@ import org.apache.iotdb.metrics.utils.MetricLevel; import org.apache.iotdb.metrics.utils.MonitorType; import org.apache.iotdb.metrics.utils.ReporterType; -import java.util.Collections; +import java.util.Arrays; import java.util.List; import java.util.Objects; public class MetricConfig { /** Is metric service enabled */ - private Boolean enableMetric = true; + private Boolean enableMetric = false; /** Is stat performance of operations enabled */ private Boolean enablePerformanceStat = false; @@ -38,10 +38,11 @@ public class MetricConfig { private MonitorType monitorType = MonitorType.MICROMETER; /** The list of reporters provide data for external system */ - private List<ReporterType> metricReporterList = Collections.emptyList(); + private List<ReporterType> metricReporterList = + Arrays.asList(ReporterType.JMX, ReporterType.PROMETHEUS); /** The level of metric service */ - private MetricLevel metricLevel = MetricLevel.CORE; + private MetricLevel metricLevel = MetricLevel.IMPORTANT; private Integer asyncCollectPeriodInSecond = 5; diff --git a/metrics/micrometer-metrics/src/main/java/org/apache/iotdb/metrics/micrometer/MicrometerMetricManager.java b/metrics/micrometer-metrics/src/main/java/org/apache/iotdb/metrics/micrometer/MicrometerMetricManager.java index b56e2e5596..a1c6eddcb0 100644 --- a/metrics/micrometer-metrics/src/main/java/org/apache/iotdb/metrics/micrometer/MicrometerMetricManager.java +++ b/metrics/micrometer-metrics/src/main/java/org/apache/iotdb/metrics/micrometer/MicrometerMetricManager.java @@ -37,7 +37,6 @@ import org.apache.iotdb.metrics.utils.MetricType; import io.micrometer.core.instrument.Meter; import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.Tags; -import io.micrometer.core.instrument.simple.SimpleMeterRegistry; import java.util.concurrent.atomic.AtomicLong; import java.util.function.ToLongFunction; @@ -50,7 +49,6 @@ public class MicrometerMetricManager extends AbstractMetricManager { public MicrometerMetricManager() { meterRegistry = Metrics.globalRegistry; - Metrics.globalRegistry.add(new SimpleMeterRegistry()); } @Override diff --git a/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MPPDataExchangeServiceMetrics.java b/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MPPDataExchangeServiceMetrics.java index 6e9f5141ec..b97fe91149 100644 --- a/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MPPDataExchangeServiceMetrics.java +++ b/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MPPDataExchangeServiceMetrics.java @@ -39,7 +39,7 @@ public class MPPDataExchangeServiceMetrics implements IMetricSet { public void bindTo(AbstractMetricService metricService) { metricService.getOrCreateAutoGauge( Metric.THRIFT_ACTIVE_THREADS.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, thriftServiceThread, AbstractThriftServiceThread::getActiveThreadCount, Tag.NAME.toString(), diff --git a/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MppDataExchangeServiceThriftHandlerMetrics.java b/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MppDataExchangeServiceThriftHandlerMetrics.java index fe63d041ea..85483dd8fd 100644 --- a/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MppDataExchangeServiceThriftHandlerMetrics.java +++ b/server/src/main/java/org/apache/iotdb/db/mpp/execution/exchange/MppDataExchangeServiceThriftHandlerMetrics.java @@ -42,7 +42,7 @@ public class MppDataExchangeServiceThriftHandlerMetrics implements IMetricSet { MetricService.getInstance() .getOrCreateAutoGauge( Metric.THRIFT_CONNECTIONS.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, thriftConnectionNumber, AtomicLong::get, Tag.NAME.toString(), diff --git a/server/src/main/java/org/apache/iotdb/db/service/DataNodeInternalRPCServiceMetrics.java b/server/src/main/java/org/apache/iotdb/db/service/DataNodeInternalRPCServiceMetrics.java index 89715708b7..9cbef97bd1 100644 --- a/server/src/main/java/org/apache/iotdb/db/service/DataNodeInternalRPCServiceMetrics.java +++ b/server/src/main/java/org/apache/iotdb/db/service/DataNodeInternalRPCServiceMetrics.java @@ -41,7 +41,7 @@ public class DataNodeInternalRPCServiceMetrics implements IMetricSet { public void bindTo(AbstractMetricService metricService) { metricService.getOrCreateAutoGauge( Metric.THRIFT_ACTIVE_THREADS.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, thriftServiceThread, AbstractThriftServiceThread::getActiveThreadCount, Tag.NAME.toString(), diff --git a/server/src/main/java/org/apache/iotdb/db/service/RPCServiceMetrics.java b/server/src/main/java/org/apache/iotdb/db/service/RPCServiceMetrics.java index 59dbb96fcb..b6f76636f7 100644 --- a/server/src/main/java/org/apache/iotdb/db/service/RPCServiceMetrics.java +++ b/server/src/main/java/org/apache/iotdb/db/service/RPCServiceMetrics.java @@ -40,7 +40,7 @@ public class RPCServiceMetrics implements IMetricSet { public void bindTo(AbstractMetricService metricService) { metricService.getOrCreateAutoGauge( Metric.THRIFT_ACTIVE_THREADS.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, thriftServiceThread, AbstractThriftServiceThread::getActiveThreadCount, Tag.NAME.toString(), diff --git a/server/src/main/java/org/apache/iotdb/db/service/metrics/ProcessMetrics.java b/server/src/main/java/org/apache/iotdb/db/service/metrics/ProcessMetrics.java index 7c1cea0a46..2c95c59eae 100644 --- a/server/src/main/java/org/apache/iotdb/db/service/metrics/ProcessMetrics.java +++ b/server/src/main/java/org/apache/iotdb/db/service/metrics/ProcessMetrics.java @@ -105,17 +105,16 @@ public class ProcessMetrics implements IMetricSet { a -> runtime.freeMemory(), Tag.NAME.toString(), "process"); - // TODO maybe following metrics can be removed metricService.getOrCreateAutoGauge( Metric.PROCESS_USED_MEM.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, this, a -> getProcessUsedMemory(), Tag.NAME.toString(), "process"); metricService.getOrCreateAutoGauge( Metric.PROCESS_MEM_RATIO.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, this, a -> Math.round(getProcessMemoryRatio()), Tag.NAME.toString(), @@ -136,10 +135,9 @@ public class ProcessMetrics implements IMetricSet { } private void collectThreadInfo(AbstractMetricService metricService) { - // TODO maybe duplicated with thread info in jvm related metrics metricService.getOrCreateAutoGauge( Metric.PROCESS_THREADS_COUNT.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, this, a -> getThreadsCount(), Tag.NAME.toString(), @@ -154,7 +152,7 @@ public class ProcessMetrics implements IMetricSet { private void collectProcessStatusInfo(AbstractMetricService metricService) { metricService.getOrCreateAutoGauge( Metric.PROCESS_STATUS.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, this, a -> (getProcessStatus()), Tag.NAME.toString(), diff --git a/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/InternalServiceThriftHandlerMetrics.java b/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/InternalServiceThriftHandlerMetrics.java index dcd00603fc..94dcb0f78a 100644 --- a/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/InternalServiceThriftHandlerMetrics.java +++ b/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/InternalServiceThriftHandlerMetrics.java @@ -40,7 +40,7 @@ public class InternalServiceThriftHandlerMetrics implements IMetricSet { public void bindTo(AbstractMetricService metricService) { metricService.getOrCreateAutoGauge( Metric.THRIFT_CONNECTIONS.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, thriftConnectionNumber, AtomicLong::get, Tag.NAME.toString(), diff --git a/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/RPCServiceThriftHandlerMetrics.java b/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/RPCServiceThriftHandlerMetrics.java index 588337c88d..74fc4f4312 100644 --- a/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/RPCServiceThriftHandlerMetrics.java +++ b/server/src/main/java/org/apache/iotdb/db/service/thrift/handler/RPCServiceThriftHandlerMetrics.java @@ -37,7 +37,7 @@ public class RPCServiceThriftHandlerMetrics implements IMetricSet { public void bindTo(AbstractMetricService metricService) { metricService.getOrCreateAutoGauge( Metric.THRIFT_CONNECTIONS.toString(), - MetricLevel.IMPORTANT, + MetricLevel.CORE, thriftConnectionNumber, AtomicLong::get, Tag.NAME.toString(),
