[
https://issues.apache.org/jira/browse/HDDS-8382?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17711243#comment-17711243
]
Attila Doroszlai edited comment on HDDS-8382 at 4/12/23 9:37 AM:
-----------------------------------------------------------------
Looks like log appender in OM/SCM leaders has high CPU usage.
{code:title=leader SCM thread dump excerpt}
"0b0e0b8b-92dc-4240-b36d-9811584877ad@group-46A6A1DE24D7->d9ea5819-fc0f-4ac1-8ffd-b37ed8d69a94-GrpcLogAppender-LogAppenderDaemon"
#386 daemon prio=5 os_prio=0 cpu=88831.26ms elapsed=807.35s
tid=0x000056534121b000 nid=0x2dc runnable [0x00007efe3e63a000]
java.lang.Thread.State: RUNNABLE
at
org.apache.ratis.server.leader.LogAppender.shouldSendAppendEntries(LogAppender.java:161)
at
org.apache.ratis.server.leader.LogAppenderBase.shouldSendAppendEntries(LogAppenderBase.java:92)
at
org.apache.ratis.grpc.server.GrpcLogAppender.shouldSendAppendEntries(GrpcLogAppender.java:209)
at
org.apache.ratis.grpc.server.GrpcLogAppender.run(GrpcLogAppender.java:149)
at
org.apache.ratis.server.leader.LogAppenderDaemon.run(LogAppenderDaemon.java:78)
at
org.apache.ratis.server.leader.LogAppenderDaemon$$Lambda$739/0x00000008406d5040.run(Unknown
Source)
at java.lang.Thread.run([email protected]/Thread.java:829)
...
"0b0e0b8b-92dc-4240-b36d-9811584877ad@group-46A6A1DE24D7->f469f1a6-a129-46b5-9458-85ed3b60342b-GrpcLogAppender-LogAppenderDaemon"
#400 daemon prio=5 os_prio=0 cpu=83859.67ms elapsed=789.27s
tid=0x00007efe89bc9800 nid=0x338 runnable [0x00007efe3d02a000]
java.lang.Thread.State: RUNNABLE
at
org.apache.ratis.server.leader.LogAppender.shouldSendAppendEntries(LogAppender.java:161)
at
org.apache.ratis.server.leader.LogAppenderBase.shouldSendAppendEntries(LogAppenderBase.java:92)
at
org.apache.ratis.grpc.server.GrpcLogAppender.shouldSendAppendEntries(GrpcLogAppender.java:209)
at
org.apache.ratis.grpc.server.GrpcLogAppender.run(GrpcLogAppender.java:149)
at
org.apache.ratis.server.leader.LogAppenderDaemon.run(LogAppenderDaemon.java:78)
at
org.apache.ratis.server.leader.LogAppenderDaemon$$Lambda$739/0x00000008406d5040.run(Unknown
Source)
at java.lang.Thread.run([email protected]/Thread.java:829)
{code}
{code:title=leader OM thread dump excerpt}
"om3@group-D66704EFC61C->om1-GrpcLogAppender-LogAppenderDaemon" #172 daemon
prio=5 os_prio=0 cpu=145841.30ms elapsed=697.93s tid=0x00007f10f62b7800
nid=0x233 runnable [0x00007f10bae8a000]
java.lang.Thread.State: RUNNABLE
at
java.util.concurrent.ConcurrentHashMap.putVal([email protected]/ConcurrentHashMap.java:1012)
at
java.util.concurrent.ConcurrentHashMap.put([email protected]/ConcurrentHashMap.java:1006)
at
org.apache.ratis.server.metrics.RaftServerMetricsImpl.recordFollowerHeartbeatElapsedTime(RaftServerMetricsImpl.java:182)
at
org.apache.ratis.server.impl.LeaderStateImpl.checkHealth(LeaderStateImpl.java:1230)
at
org.apache.ratis.grpc.server.GrpcLogAppender.run(GrpcLogAppender.java:168)
at
org.apache.ratis.server.leader.LogAppenderDaemon.run(LogAppenderDaemon.java:78)
at
org.apache.ratis.server.leader.LogAppenderDaemon$$Lambda$585/0x00000008407c9840.run(Unknown
Source)
at java.lang.Thread.run([email protected]/Thread.java:829)
"om3@group-D66704EFC61C->om2-GrpcLogAppender-LogAppenderDaemon" #173 daemon
prio=5 os_prio=0 cpu=144648.95ms elapsed=697.93s tid=0x00007f10f58a3000
nid=0x234 runnable [0x00007f10b9f84000]
java.lang.Thread.State: RUNNABLE
at java.lang.Long.valueOf([email protected]/Long.java:1180)
at
org.apache.ratis.server.metrics.RaftServerMetricsImpl.recordFollowerHeartbeatElapsedTime(RaftServerMetricsImpl.java:182)
at
org.apache.ratis.server.impl.LeaderStateImpl.checkHealth(LeaderStateImpl.java:1230)
at
org.apache.ratis.grpc.server.GrpcLogAppender.run(GrpcLogAppender.java:168)
at
org.apache.ratis.server.leader.LogAppenderDaemon.run(LogAppenderDaemon.java:78)
at
org.apache.ratis.server.leader.LogAppenderDaemon$$Lambda$585/0x00000008407c9840.run(Unknown
Source)
at java.lang.Thread.run([email protected]/Thread.java:829)
{code}
CC [~szetszwo]
was (Author: adoroszlai):
Looks like log appender in OM/SCM leaders has high CPU usage.
{code:title=leader SCM thread dump excerpt}
"0b0e0b8b-92dc-4240-b36d-9811584877ad@group-46A6A1DE24D7->d9ea5819-fc0f-4ac1-8ffd-b37ed8d69a94-GrpcLogAppender-LogAppenderDaemon"
#386 daemon prio=5 os_prio=0 cpu=88831.26ms elapsed=807.35s
tid=0x000056534121b000 nid=0x2dc runnable [0x00007efe3e63a000]
java.lang.Thread.State: RUNNABLE
at
org.apache.ratis.server.leader.LogAppender.shouldSendAppendEntries(LogAppender.java:161)
at
org.apache.ratis.server.leader.LogAppenderBase.shouldSendAppendEntries(LogAppenderBase.java:92)
at
org.apache.ratis.grpc.server.GrpcLogAppender.shouldSendAppendEntries(GrpcLogAppender.java:209)
at
org.apache.ratis.grpc.server.GrpcLogAppender.run(GrpcLogAppender.java:149)
at
org.apache.ratis.server.leader.LogAppenderDaemon.run(LogAppenderDaemon.java:78)
at
org.apache.ratis.server.leader.LogAppenderDaemon$$Lambda$739/0x00000008406d5040.run(Unknown
Source)
at java.lang.Thread.run([email protected]/Thread.java:829)
...
"0b0e0b8b-92dc-4240-b36d-9811584877ad@group-46A6A1DE24D7->f469f1a6-a129-46b5-9458-85ed3b60342b-GrpcLogAppender-LogAppenderDaemon"
#400 daemon prio=5 os_prio=0 cpu=83859.67ms elapsed=789.27s
tid=0x00007efe89bc9800 nid=0x338 runnable [0x00007efe3d02a000]
java.lang.Thread.State: RUNNABLE
at
org.apache.ratis.server.leader.LogAppender.shouldSendAppendEntries(LogAppender.java:161)
at
org.apache.ratis.server.leader.LogAppenderBase.shouldSendAppendEntries(LogAppenderBase.java:92)
at
org.apache.ratis.grpc.server.GrpcLogAppender.shouldSendAppendEntries(GrpcLogAppender.java:209)
at
org.apache.ratis.grpc.server.GrpcLogAppender.run(GrpcLogAppender.java:149)
at
org.apache.ratis.server.leader.LogAppenderDaemon.run(LogAppenderDaemon.java:78)
at
org.apache.ratis.server.leader.LogAppenderDaemon$$Lambda$739/0x00000008406d5040.run(Unknown
Source)
at java.lang.Thread.run([email protected]/Thread.java:829)
{code}
{code:title=leader OM thread dump excerpt}
"om3@group-D66704EFC61C->om1-GrpcLogAppender-LogAppenderDaemon" #172 daemon
prio=5 os_prio=0 cpu=145841.30ms elapsed=697.93s tid=0x00007f10f62b7800
nid=0x233 runnable [0x00007f10bae8a000]
java.lang.Thread.State: RUNNABLE
at
java.util.concurrent.ConcurrentHashMap.putVal([email protected]/ConcurrentHashMap.java:1012)
at
java.util.concurrent.ConcurrentHashMap.put([email protected]/ConcurrentHashMap.java:1006)
at
org.apache.ratis.server.metrics.RaftServerMetricsImpl.recordFollowerHeartbeatElapsedTime(RaftServerMetricsImpl.java:182)
at
org.apache.ratis.server.impl.LeaderStateImpl.checkHealth(LeaderStateImpl.java:1230)
at
org.apache.ratis.grpc.server.GrpcLogAppender.run(GrpcLogAppender.java:168)
at
org.apache.ratis.server.leader.LogAppenderDaemon.run(LogAppenderDaemon.java:78)
at
org.apache.ratis.server.leader.LogAppenderDaemon$$Lambda$585/0x00000008407c9840.run(Unknown
Source)
at java.lang.Thread.run([email protected]/Thread.java:829)
"om3@group-D66704EFC61C->om2-GrpcLogAppender-LogAppenderDaemon" #173 daemon
prio=5 os_prio=0 cpu=144648.95ms elapsed=697.93s tid=0x00007f10f58a3000
nid=0x234 runnable [0x00007f10b9f84000]
java.lang.Thread.State: RUNNABLE
at java.lang.Long.valueOf([email protected]/Long.java:1180)
at
org.apache.ratis.server.metrics.RaftServerMetricsImpl.recordFollowerHeartbeatElapsedTime(RaftServerMetricsImpl.java:182)
at
org.apache.ratis.server.impl.LeaderStateImpl.checkHealth(LeaderStateImpl.java:1230)
at
org.apache.ratis.grpc.server.GrpcLogAppender.run(GrpcLogAppender.java:168)
at
org.apache.ratis.server.leader.LogAppenderDaemon.run(LogAppenderDaemon.java:78)
at
org.apache.ratis.server.leader.LogAppenderDaemon$$Lambda$585/0x00000008407c9840.run(Unknown
Source)
at java.lang.Thread.run([email protected]/Thread.java:829)
{code}
> HA acceptance tests timeout with Ratis 2.5.0
> --------------------------------------------
>
> Key: HDDS-8382
> URL: https://issues.apache.org/jira/browse/HDDS-8382
> Project: Apache Ozone
> Issue Type: Task
> Reporter: Attila Doroszlai
> Assignee: Attila Doroszlai
> Priority: Critical
>
> HA acceptance tests timed out when run CI with Ratis 2.5.0.
> https://github.com/adoroszlai/hadoop-ozone/actions/runs/4610785402
> https://github.com/adoroszlai/hadoop-ozone/actions/runs/4666495444
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]