[ 
https://issues.apache.org/jira/browse/HDDS-12193?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Hemant Kumar updated HDDS-12193:
--------------------------------
        Parent: HDDS-8544
    Issue Type: Sub-task  (was: Bug)

> Metric timer task is blocking installSnapshotFromLeader on follower node
> ------------------------------------------------------------------------
>
>                 Key: HDDS-12193
>                 URL: https://issues.apache.org/jira/browse/HDDS-12193
>             Project: Apache Ozone
>          Issue Type: Sub-task
>            Reporter: Hemant Kumar
>            Priority: Major
>         Attachments: ombootstrapjstack1, ombootstrapjstack2, 
> ombootstrapjstack3, ombootstrapjstack4, ombootstrapjstack5
>
>
> Prometheus blocking installSnapshotFromLeader thread can causing delay in 
> bootstrapping.
> InstallSnapshotFromLeader thread
> {code:java}
> "pool-33-thread-1" #7446 prio=5 os_prio=0 tid=0x00007fd874208800 nid=0x256ffa 
> waiting for monitor entry [0x00007fd89a1ba000]
>    java.lang.Thread.State: BLOCKED (on object monitor)
>         at 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl.unregisterSource(MetricsSystemImpl.java:247)
>         - waiting to lock <0x00007fda18946070> (a 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl)
>         at 
> org.apache.hadoop.hdds.utils.RocksDBStoreMetrics.unregister(RocksDBStoreMetrics.java:143)
>         at org.apache.hadoop.hdds.utils.db.RDBStore.close(RDBStore.java:223)
>         at org.apache.hadoop.ozone.om.OmSnapshot.close(OmSnapshot.java:262)
>         at 
> org.apache.hadoop.ozone.om.snapshot.SnapshotCache.invalidateAll(SnapshotCache.java:109)
>         at 
> org.apache.hadoop.ozone.om.OzoneManager.installCheckpoint(OzoneManager.java:3686)
>         at 
> org.apache.hadoop.ozone.om.OzoneManager.installCheckpoint(OzoneManager.java:3674)
>         at 
> org.apache.hadoop.ozone.om.OzoneManager.installSnapshotFromLeader(OzoneManager.java:3651)
>         - locked <0x00007fda1796bb50> (a 
> org.apache.hadoop.ozone.om.OzoneManager)
>         at 
> org.apache.hadoop.ozone.om.ratis.OzoneManagerStateMachine.lambda$5(OzoneManagerStateMachine.java:505)
>         at 
> org.apache.hadoop.ozone.om.ratis.OzoneManagerStateMachine$$Lambda$570/0x00007fd89a219428.get(Unknown
>  Source)
>         at 
> java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1604)
>         at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>         at java.lang.Thread.run(Thread.java:750)
> {code}
> Prometheus thread:
> {code:java}
> "qtp1931543168-27044" #27044 daemon prio=5 os_prio=0 tid=0x00007fd83d261800 
> nid=0x2383fb runnable [0x00007fd864bf2000]
>    java.lang.Thread.State: RUNNABLE
>         at org.rocksdb.RocksDB.getLiveFilesMetaData(Native Method)
>         at org.rocksdb.RocksDB.getLiveFilesMetaData(RocksDB.java:3988)
>         at 
> org.apache.hadoop.hdds.utils.db.RocksDatabase.getLiveFilesMetaData(RocksDatabase.java:611)
>         at 
> org.apache.hadoop.hdds.utils.RocksDBStoreMetrics.computeSstFileStat(RocksDBStoreMetrics.java:251)
>         at 
> org.apache.hadoop.hdds.utils.RocksDBStoreMetrics.getDBPropertyData(RocksDBStoreMetrics.java:235)
>         at 
> org.apache.hadoop.hdds.utils.RocksDBStoreMetrics.getMetrics(RocksDBStoreMetrics.java:151)
>         at 
> org.apache.hadoop.metrics2.impl.MetricsSourceAdapter.getMetrics(MetricsSourceAdapter.java:200)
>         at 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl.snapshotMetrics(MetricsSystemImpl.java:419)
>         at 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl.sampleMetrics(MetricsSystemImpl.java:406)
>         - locked <0x00007fda18946070> (a 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl)
>         at 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl.publishMetricsNow(MetricsSystemImpl.java:391)
>         - locked <0x00007fda18946070> (a 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl)
>         at 
> org.apache.hadoop.hdds.server.http.PrometheusServlet.doGet(PrometheusServlet.java:61)
>         at javax.servlet.http.HttpServlet.service(HttpServlet.java:687)
>         at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)
>         at 
> org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:799)
>         at 
> org.eclipse.jetty.servlet.ServletHandler$ChainEnd.doFilter(ServletHandler.java:1656)
>         at 
> org.apache.hadoop.http.lib.StaticUserWebFilter$StaticUserFilter.doFilter(StaticUserWebFilter.java:110)
>         at 
> org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193)
>         at 
> org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626)
>         at 
> org.apache.hadoop.hdds.server.http.HttpServer2$QuotingInputFilter.doFilter(HttpServer2.java:1681)
>         at 
> org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193)
>         at 
> org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626)
>         at 
> org.apache.hadoop.hdds.server.http.NoCacheFilter.doFilter(NoCacheFilter.java:48)
>         at 
> org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193)
>         at 
> org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626)
>         at 
> org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:552)
>         at 
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
>         at 
> org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:600)
>         at 
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
>         at 
> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:235)
>         at 
> org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1624)
>         at 
> org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:233)
>         at 
> org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1440)
>         at 
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:188)
>         at 
> org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:505)
>         at 
> org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1594)
>         at 
> org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:186)
>         at 
> org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1355)
>         at 
> org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
>         at 
> org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:146)
>         at 
> org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
>         at org.eclipse.jetty.server.Server.handle(Server.java:516)
>         at 
> org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:487)
>         at 
> org.eclipse.jetty.server.HttpChannel$$Lambda$654/0x00007fd8705d0c28.dispatch(Unknown
>  Source)
>         at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:732)
>         at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:479)
>         at 
> org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277)
>         at 
> org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)
>         at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105)
>         at 
> org.eclipse.jetty.io.ssl.SslConnection$DecryptedEndPoint.onFillable(SslConnection.java:555)
>         at 
> org.eclipse.jetty.io.ssl.SslConnection.onFillable(SslConnection.java:410)
>         at 
> org.eclipse.jetty.io.ssl.SslConnection$2.succeeded(SslConnection.java:164)
>         at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105)
>         at 
> org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104)
>         at 
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:338)
>         at 
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:315)
>         at 
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
>         at 
> org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
>         at 
> org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:409)
>         at 
> org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883)
>         at 
> org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034)
>         at java.lang.Thread.run(Thread.java:750){code}
> Or metric collector:
> {code}
> "Timer for 'OzoneManager' metrics system" #9607 daemon prio=5 os_prio=0 
> tid=0x00007f6994816800 nid=0x31b8f5 runnable [0x00007f69c5ffd000]
>    java.lang.Thread.State: RUNNABLE
>       at 
> org.apache.hadoop.hdds.utils.RocksDBStoreMetrics.computeSstFileStat(RocksDBStoreMetrics.java:279)
>       at 
> org.apache.hadoop.hdds.utils.RocksDBStoreMetrics.getDBPropertyData(RocksDBStoreMetrics.java:235)
>       at 
> org.apache.hadoop.hdds.utils.RocksDBStoreMetrics.getMetrics(RocksDBStoreMetrics.java:151)
>       at 
> org.apache.hadoop.metrics2.impl.MetricsSourceAdapter.getMetrics(MetricsSourceAdapter.java:200)
>       at 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl.snapshotMetrics(MetricsSystemImpl.java:419)
>       at 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl.sampleMetrics(MetricsSystemImpl.java:406)
>       - locked <0x00007f6b43a76ca0> (a 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl)
>       at 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl.onTimerEvent(MetricsSystemImpl.java:381)
>       - locked <0x00007f6b43a76ca0> (a 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl)
>       at 
> org.apache.hadoop.metrics2.impl.MetricsSystemImpl$4.run(MetricsSystemImpl.java:368)
>       at java.util.TimerThread.mainLoop(Timer.java:555)
>       at java.util.TimerThread.run(Timer.java:505)
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to