[
https://issues.apache.org/jira/browse/HBASE-9187?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Jonathan Hsieh reassigned HBASE-9187:
-------------------------------------
Assignee: stack
> HBaseAdmin#testTableExists can go zombie
> ----------------------------------------
>
> Key: HBASE-9187
> URL: https://issues.apache.org/jira/browse/HBASE-9187
> Project: HBase
> Issue Type: Sub-task
> Components: test
> Reporter: stack
> Assignee: stack
> Priority: Critical
> Fix For: 0.98.0, 0.95.2
>
> Attachments: 9187_shorten_timeout.txt
>
>
> See it here as a zombie in hadoopqa:
> https://builds.apache.org/job/PreCommit-HBASE-Build/6687/consoleText
> Looking at it, we seem stuck in here:
> {code}
> "RpcServer.handler=1,port=51776" daemon prio=10 tid=0x72001400 nid=0x17ea
> waiting on condition [0x71cd4000]
> java.lang.Thread.State: TIMED_WAITING (sleeping)
> at java.lang.Thread.sleep(Native Method)
> at
> org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithRetries(RpcRetryingCaller.java:150)
> - locked <0x81042070> (a
> org.apache.hadoop.hbase.client.RpcRetryingCaller)
> at org.apache.hadoop.hbase.client.HTable.get(HTable.java:732)
> at
> org.apache.hadoop.hbase.master.TableNamespaceManager.get(TableNamespaceManager.java:111)
> - locked <0x7f71ba70> (a
> org.apache.hadoop.hbase.master.TableNamespaceManager)
> at
> org.apache.hadoop.hbase.master.HMaster.getNamespaceDescriptor(HMaster.java:3076)
> at org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:1779)
> at org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:1820)
> at
> org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos$MasterAdminService$2.callBlockingMethod(MasterAdminProtos.java:26698)
> at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2068)
> at
> org.apache.hadoop.hbase.ipc.RpcServer$CallRunner.run(RpcServer.java:1807)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler.consumerLoop(SimpleRpcScheduler.java:165)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler.access$000(SimpleRpcScheduler.java:41)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler$1.run(SimpleRpcScheduler.java:113)
> at java.lang.Thread.run(Thread.java:662)
> {code}
> This lock is held: 0x7f71ba70 We are doing retries against the new ns table.
> A bunch other threads are trying to get in here while we are retrying:
> {code}
> "RpcServer.handler=0,port=51776" daemon prio=10 tid=0x72000400 nid=0x17e9
> waiting for monitor entry [0x71d25000]
> java.lang.Thread.State: BLOCKED (on object monitor)
> at
> org.apache.hadoop.hbase.master.TableNamespaceManager.get(TableNamespaceManager.java:111)
> - waiting to lock <0x7f71ba70> (a
> org.apache.hadoop.hbase.master.TableNamespaceManager)
> at
> org.apache.hadoop.hbase.master.HMaster.getNamespaceDescriptor(HMaster.java:3076)
> at org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:1779)
> at org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:1820)
> at
> org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos$MasterAdminService$2.callBlockingMethod(MasterAdminProtos.java:26698)
> at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2068)
> at
> org.apache.hadoop.hbase.ipc.RpcServer$CallRunner.run(RpcServer.java:1807)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler.consumerLoop(SimpleRpcScheduler.java:165)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler.access$000(SimpleRpcScheduler.java:41)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler$1.run(SimpleRpcScheduler.java:113)
> at java.lang.Thread.run(Thread.java:662)
> ...
> "RpcServer.handler=4,port=51776" daemon prio=10 tid=0x72cc9000 nid=0x17ed
> waiting for monitor entry [0x71be1000]
> java.lang.Thread.State: BLOCKED (on object monitor)
> at
> org.apache.hadoop.hbase.master.TableNamespaceManager.get(TableNamespaceManager.java:111)
> - waiting to lock <0x7f71ba70> (a
> org.apache.hadoop.hbase.master.TableNamespaceManager)
> at
> org.apache.hadoop.hbase.master.HMaster.getNamespaceDescriptor(HMaster.java:3076)
> at org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:1779)
> at org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:1820)
> at
> org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos$MasterAdminService$2.callBlockingMethod(MasterAdminProtos.java:26698)
> at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2068)
> at
> org.apache.hadoop.hbase.ipc.RpcServer$CallRunner.run(RpcServer.java:1807)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler.consumerLoop(SimpleRpcScheduler.java:165)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler.access$000(SimpleRpcScheduler.java:41)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler$1.run(SimpleRpcScheduler.java:113)
> at java.lang.Thread.run(Thread.java:662)
> "RpcServer.handler=3,port=51776" daemon prio=10 tid=0x72cc7800 nid=0x17ec
> waiting for monitor entry [0x71c32000]
> java.lang.Thread.State: BLOCKED (on object monitor)
> at
> org.apache.hadoop.hbase.master.TableNamespaceManager.get(TableNamespaceManager.java:111)
> - waiting to lock <0x7f71ba70> (a
> org.apache.hadoop.hbase.master.TableNamespaceManager)
> at
> org.apache.hadoop.hbase.master.HMaster.getNamespaceDescriptor(HMaster.java:3076)
> at org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:1779)
> at org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:1820)
> at
> org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos$MasterAdminService$2.callBlockingMethod(MasterAdminProtos.java:26698)
> at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2068)
> at
> org.apache.hadoop.hbase.ipc.RpcServer$CallRunner.run(RpcServer.java:1807)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler.consumerLoop(SimpleRpcScheduler.java:165)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler.access$000(SimpleRpcScheduler.java:41)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler$1.run(SimpleRpcScheduler.java:113)
> at java.lang.Thread.run(Thread.java:662)
> "RpcServer.handler=2,port=51776" daemon prio=10 tid=0x72002c00 nid=0x17eb
> waiting for monitor entry [0x71c83000]
> java.lang.Thread.State: BLOCKED (on object monitor)
> at
> org.apache.hadoop.hbase.master.TableNamespaceManager.get(TableNamespaceManager.java:111)
> - waiting to lock <0x7f71ba70> (a
> org.apache.hadoop.hbase.master.TableNamespaceManager)
> at
> org.apache.hadoop.hbase.master.HMaster.getNamespaceDescriptor(HMaster.java:3076)
> at org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:1779)
> at org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:1820)
> at
> org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos$MasterAdminService$2.callBlockingMethod(MasterAdminProtos.java:26698)
> at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2068)
> at
> org.apache.hadoop.hbase.ipc.RpcServer$CallRunner.run(RpcServer.java:1807)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler.consumerLoop(SimpleRpcScheduler.java:165)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler.access$000(SimpleRpcScheduler.java:41)
> at
> org.apache.hadoop.hbase.ipc.SimpleRpcScheduler$1.run(SimpleRpcScheduler.java:113)
> at java.lang.Thread.run(Thread.java:662)
> {code}
> I'd guess no one is getting in here till we finish our 35 retryings (almost
> ten minutes, which makes us look like a zombie).
> Seems like we need to be able to interrupt in here when done or at least add
> logging why we are in here having trouble trying to get from the ns table?
--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira