[
https://issues.apache.org/jira/browse/HDFS-12701?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16216795#comment-16216795
]
Weiwei Yang edited comment on HDFS-12701 at 10/24/17 12:20 PM:
---------------------------------------------------------------
In a jstack dump, there are more than 50 threads waiting on
{{ShortCircuitCache#unref()}}
{code}
RW.default.readRpcServer.handler=318,queue=21,port=16020" #369 daemon prio=5
os_prio=0 tid=0x00007fcf0814f000 nid=0x1bbf5 waiting on condition
[0x00007fce93cf9000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00007fd18d7c1300> (a
java.util.concurrent.locks.ReentrantLock$NonfairSync)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:189)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199)
at
java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209)
at java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285)
at
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.unref(ShortCircuitCache.java:415)
at
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplica.unref(ShortCircuitReplica.java:143)
at
org.apache.hadoop.hdfs.client.impl.BlockReaderLocal.close(BlockReaderLocal.java:627)
- locked <0x00007fcf89ac37b8> (a
org.apache.hadoop.hdfs.client.impl.BlockReaderLocal)
at
org.apache.hadoop.hdfs.DFSInputStream.actualGetFromOneDataNode(DFSInputStream.java:1328)
at
org.apache.hadoop.hdfs.DFSInputStream.actualGetFromOneDataNode(DFSInputStream.java:1245)
at
org.apache.hadoop.hdfs.DFSInputStream.fetchBlockByteRange(DFSInputStream.java:1206)
at org.apache.hadoop.hdfs.DFSInputStream.pread(DFSInputStream.java:1579)
at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:1543)
at
org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:92)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1455)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1616)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1495)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.readBlock(HFileReaderImpl.java:1453)
at
org.apache.hadoop.hbase.io.hfile.HFileBlockIndex$CellBasedKeyBlockIndexReader.loadDataBlockWithScanInfo(HFileBlockIndex.java:336)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.seekTo(HFileReaderImpl.java:816)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.reseekTo(HFileReaderImpl.java:797)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.reseekAtOrAfter(StoreFileScanner.java:263)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.reseek(StoreFileScanner.java:180)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.enforceSeek(StoreFileScanner.java:381)
at
org.apache.hadoop.hbase.regionserver.KeyValueHeap.pollRealKV(KeyValueHeap.java:377)
at
org.apache.hadoop.hbase.regionserver.KeyValueHeap.next(KeyValueHeap.java:140)
at
org.apache.hadoop.hbase.regionserver.StoreScanner.next(StoreScanner.java:613)
{code}
and more than 60 threads blocked waiting on
{{ShortCircuitCache#fetchOrCreate()}}
{code}
"RW.default.readRpcServer.handler=316,queue=19,port=16020" #367 daemon prio=5
os_prio=0 tid=0x00007fcf0814d000 nid=0x1bbf3 waiting on condition
[0x00007fce93d7b000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00007fd18d7c1300> (a
java.util.concurrent.locks.ReentrantLock$NonfairSync)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:189)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199)
at
java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209)
at java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285)
at
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:677)
at
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:486)
at
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:367)
at
org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:720)
at
org.apache.hadoop.hdfs.DFSInputStream.actualGetFromOneDataNode(DFSInputStream.java:1282)
at
org.apache.hadoop.hdfs.DFSInputStream.actualGetFromOneDataNode(DFSInputStream.java:1245)
at
org.apache.hadoop.hdfs.DFSInputStream.fetchBlockByteRange(DFSInputStream.java:1206)
at org.apache.hadoop.hdfs.DFSInputStream.pread(DFSInputStream.java:1579)
at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:1543)
at
org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:92)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1455)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1616)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1495)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.readBlock(HFileReaderImpl.java:1453)
at
org.apache.hadoop.hbase.io.hfile.HFileBlockIndex$CellBasedKeyBlockIndexReader.loadDataBlockWithScanInfo(HFileBlockIndex.java:336)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.seekTo(HFileReaderImpl.java:816)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.reseekTo(HFileReaderImpl.java:797)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.reseekAtOrAfter(StoreFileScanner.java:263)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.reseek(StoreFileScanner.java:180)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.enforceSeek(StoreFileScanner.java:381)
at
org.apache.hadoop.hbase.regionserver.KeyValueHeap.pollRealKV(KeyValueHeap.java:377)
at
org.apache.hadoop.hbase.regionserver.KeyValueHeap.next(KeyValueHeap.java:140)
at
org.apache.hadoop.hbase.regionserver.StoreScanner.next(StoreScanner.java:613)
{code}
was (Author: cheersyang):
In a jstack dump, there are more than 50 threads waiting on
{{ShortCircuitCache#unref()}}
{code}
RW.default.readRpcServer.handler=318,queue=21,port=16020" #369 daemon prio=5
os_prio=0 tid=0x00007fcf0814f000 nid=0x1bbf5 waiting on condition
[0x00007fce93cf9000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00007fd18d7c1300> (a
java.util.concurrent.locks.ReentrantLock$NonfairSync)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:189)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199)
at
java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209)
at java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285)
at
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.unref(ShortCircuitCache.java:415)
at
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplica.unref(ShortCircuitReplica.java:143)
at
org.apache.hadoop.hdfs.client.impl.BlockReaderLocal.close(BlockReaderLocal.java:627)
- locked <0x00007fcf89ac37b8> (a
org.apache.hadoop.hdfs.client.impl.BlockReaderLocal)
at
org.apache.hadoop.hdfs.DFSInputStream.actualGetFromOneDataNode(DFSInputStream.java:1328)
at
org.apache.hadoop.hdfs.DFSInputStream.actualGetFromOneDataNode(DFSInputStream.java:1245)
at
org.apache.hadoop.hdfs.DFSInputStream.fetchBlockByteRange(DFSInputStream.java:1206)
at org.apache.hadoop.hdfs.DFSInputStream.pread(DFSInputStream.java:1579)
at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:1543)
at
org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:92)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1455)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1616)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1495)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.readBlock(HFileReaderImpl.java:1453)
at
org.apache.hadoop.hbase.io.hfile.HFileBlockIndex$CellBasedKeyBlockIndexReader.loadDataBlockWithScanInfo(HFileBlockIndex.java:336)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.seekTo(HFileReaderImpl.java:816)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.reseekTo(HFileReaderImpl.java:797)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.reseekAtOrAfter(StoreFileScanner.java:263)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.reseek(StoreFileScanner.java:180)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.enforceSeek(StoreFileScanner.java:381)
at
org.apache.hadoop.hbase.regionserver.KeyValueHeap.pollRealKV(KeyValueHeap.java:377)
at
org.apache.hadoop.hbase.regionserver.KeyValueHeap.next(KeyValueHeap.java:140)
at
org.apache.hadoop.hbase.regionserver.StoreScanner.next(StoreScanner.java:613)
{code}
and more than 60 threads blocked waiting on
{{ShortCircuitCache#fetchOrCreate()}}
{code}
"RW.default.readRpcServer.handler=316,queue=19,port=16020" #367 daemon prio=5
os_prio=0 tid=0x00007fcf0814d000 nid=0x1bbf3 waiting on condition
[0x00007fce93d7b000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00007fd18d7c1300> (a
java.util.concurrent.locks.ReentrantLock$NonfairSync)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:189)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:836)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:870)
at
java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1199)
at
java.util.concurrent.locks.ReentrantLock$NonfairSync.lock(ReentrantLock.java:209)
at java.util.concurrent.locks.ReentrantLock.lock(ReentrantLock.java:285)
at
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:677)
at
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:486)
at
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:367)
at
org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:720)
at
org.apache.hadoop.hdfs.DFSInputStream.actualGetFromOneDataNode(DFSInputStream.java:1282)
at
org.apache.hadoop.hdfs.DFSInputStream.actualGetFromOneDataNode(DFSInputStream.java:1245)
at
org.apache.hadoop.hdfs.DFSInputStream.fetchBlockByteRange(DFSInputStream.java:1206)
at org.apache.hadoop.hdfs.DFSInputStream.pread(DFSInputStream.java:1579)
at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:1543)
at
org.apache.hadoop.fs.FSDataInputStream.read(FSDataInputStream.java:92)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readAtOffset(HFileBlock.java:1455)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockDataInternal(HFileBlock.java:1616)
at
org.apache.hadoop.hbase.io.hfile.HFileBlock$FSReaderImpl.readBlockData(HFileBlock.java:1495)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.readBlock(HFileReaderImpl.java:1453)
at
org.apache.hadoop.hbase.io.hfile.HFileBlockIndex$CellBasedKeyBlockIndexReader.loadDataBlockWithScanInfo(HFileBlockIndex.java:336)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.seekTo(HFileReaderImpl.java:816)
at
org.apache.hadoop.hbase.io.hfile.HFileReaderImpl$HFileScannerImpl.reseekTo(HFileReaderImpl.java:797)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.reseekAtOrAfter(StoreFileScanner.java:263)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.reseek(StoreFileScanner.java:180)
at
org.apache.hadoop.hbase.regionserver.StoreFileScanner.enforceSeek(StoreFileScanner.java:381)
at
org.apache.hadoop.hbase.regionserver.KeyValueHeap.pollRealKV(KeyValueHeap.java:377)
at
org.apache.hadoop.hbase.regionserver.KeyValueHeap.next(KeyValueHeap.java:140)
at
org.apache.hadoop.hbase.regionserver.StoreScanner.next(StoreScanner.java:613)
{code}
full jstack dump attached.
> More fine-grained locks in ShortCircuitCache
> --------------------------------------------
>
> Key: HDFS-12701
> URL: https://issues.apache.org/jira/browse/HDFS-12701
> Project: Hadoop HDFS
> Issue Type: Improvement
> Affects Versions: 2.8.1
> Reporter: Weiwei Yang
>
> When cluster is heavily loaded, we found HBase regionserver handlers are
> often blocked by {{ShortCircuitCache}}. Dumped jstack and found more lots of
> thread waiting on obtain the cache lock. It should be able to be improved by
> using more fine-grained locks to improve the performance.
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]