[ 
https://issues.apache.org/jira/browse/HBASE-8531?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

stack reopened HBASE-8531:
--------------------------


Noticed on ec2 build that TestZooKeeper failed again.  While we fixed the 
failed scan of .META. as part of create table, the same exception coming up 
later in the create table sequence can kill us again.  See below exception 
where the RegionServerStopped comes up through createTableAsyn.  Will add an 
addendum.

{code}
org.apache.hadoop.hbase.exceptions.RegionServerStoppedException: 
org.apache.hadoop.hbase.exceptions.RegionServerStoppedException: 
org.apache.hadoop.hbase.exceptions.RegionServerStoppedException: Server 
ip-10-174-58-159.us-west-1.compute.internal,33970,1368593564901 not running, 
aborting
        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
        at 
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
        at 
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
        at 
org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:95)
        at 
org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:79)
        at 
org.apache.hadoop.hbase.protobuf.ProtobufUtil.getRemoteException(ProtobufUtil.java:227)
        at 
org.apache.hadoop.hbase.client.HBaseAdmin.executeCallable(HBaseAdmin.java:2638)
        at 
org.apache.hadoop.hbase.client.HBaseAdmin.execute(HBaseAdmin.java:2607)
        at 
org.apache.hadoop.hbase.client.HBaseAdmin.createTableAsync(HBaseAdmin.java:523)
        at 
org.apache.hadoop.hbase.client.HBaseAdmin.createTable(HBaseAdmin.java:416)
        at 
org.apache.hadoop.hbase.client.HBaseAdmin.createTable(HBaseAdmin.java:348)
        at 
org.apache.hadoop.hbase.TestZooKeeper.testSanity(TestZooKeeper.java:242)
        at 
org.apache.hadoop.hbase.TestZooKeeper.testRegionServerSessionExpired(TestZooKeeper.java:202)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
        at java.lang.reflect.Method.invoke(Method.java:597)
        at 
org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
        at 
org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
        at 
org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
        at 
org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
        at 
org.junit.internal.runners.statements.FailOnTimeout$StatementThread.run(FailOnTimeout.java:74)
Caused by: org.apache.hadoop.hbase.ipc.RemoteWithExtrasException: 
org.apache.hadoop.hbase.exceptions.RegionServerStoppedException: 
org.apache.hadoop.hbase.exceptions.RegionServerStoppedException: Server 
ip-10-174-58-159.us-west-1.compute.internal,33970,1368593564901 not running, 
aborting
        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
        at 
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
        at 
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
        at 
org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:95)
        at 
org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:79)
        at 
org.apache.hadoop.hbase.protobuf.ProtobufUtil.getRemoteException(ProtobufUtil.java:227)
        at 
org.apache.hadoop.hbase.client.ScannerCallable.openScanner(ScannerCallable.java:272)
        at 
org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:137)
        at 
org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:52)
        at 
org.apache.hadoop.hbase.client.ServerCallable.withRetries(ServerCallable.java:170)
        at 
org.apache.hadoop.hbase.client.ClientScanner.nextScanner(ClientScanner.java:216)
        at 
org.apache.hadoop.hbase.client.ClientScanner.<init>(ClientScanner.java:131)
        at org.apache.hadoop.hbase.client.HTable.getScanner(HTable.java:550)
        at 
org.apache.hadoop.hbase.catalog.MetaReader.fullScan(MetaReader.java:536)
        at 
org.apache.hadoop.hbase.catalog.MetaReader.tableExists(MetaReader.java:309)
        at 
org.apache.hadoop.hbase.master.handler.CreateTableHandler.prepare(CreateTableHandler.java:104)
        at org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:1586)
        at org.apache.hadoop.hbase.master.HMaster.createTable(HMaster.java:1616)
        at 
org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos$MasterAdminService$2.callBlockingMethod(MasterAdminProtos.java:20930)
        at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2103)
        at 
org.apache.hadoop.hbase.ipc.RpcServer$Handler.run(RpcServer.java:1810)
Caused by: org.apache.hadoop.hbase.ipc.RemoteWithExtrasException: 
org.apache.hadoop.hbase.exceptions.RegionServerStoppedException: Server 
ip-10-174-58-159.us-west-1.compute.internal,33970,1368593564901 not running, 
aborting
        at 
org.apache.hadoop.hbase.regionserver.HRegionServer.checkOpen(HRegionServer.java:2356)
        at 
org.apache.hadoop.hbase.regionserver.HRegionServer.scan(HRegionServer.java:2927)
        at 
org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:20577)
        at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2103)
        at 
org.apache.hadoop.hbase.ipc.RpcServer$Handler.run(RpcServer.java:1810)

        at org.apache.hadoop.hbase.ipc.RpcClient.call(RpcClient.java:1336)
        at 
org.apache.hadoop.hbase.ipc.RpcClient.callBlockingMethod(RpcClient.java:1532)
        at 
org.apache.hadoop.hbase.ipc.RpcClient$BlockingRpcChannelImplementation.callBlockingMethod(RpcClient.java:1587)
        at 
org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$BlockingStub.scan(ClientProtos.java:21012)
        at 
org.apache.hadoop.hbase.client.ScannerCallable.openScanner(ScannerCallable.java:263)
        ... 14 more

        at org.apache.hadoop.hbase.ipc.RpcClient.call(RpcClient.java:1336)
        at 
org.apache.hadoop.hbase.ipc.RpcClient.callBlockingMethod(RpcClient.java:1532)
        at 
org.apache.hadoop.hbase.ipc.RpcClient$BlockingRpcChannelImplementation.callBlockingMethod(RpcClient.java:1587)
        at 
org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos$MasterAdminService$BlockingStub.createTable(MasterAdminProtos.java:22295)
        at 
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation$5.createTable(HConnectionManager.java:1761)
        at org.apache.hadoop.hbase.client.HBaseAdmin$2.call(HBaseAdmin.java:527)
        at org.apache.hadoop.hbase.client.HBaseAdmin$2.call(HBaseAdmin.java:523)
        at 
org.apache.hadoop.hbase.client.HBaseAdmin.executeCallable(HBaseAdmin.java:2632)
        ... 15 more
{code}
                
> TestZooKeeper fails in trunk/0.95 builds
> ----------------------------------------
>
>                 Key: HBASE-8531
>                 URL: https://issues.apache.org/jira/browse/HBASE-8531
>             Project: HBase
>          Issue Type: Bug
>          Components: Zookeeper
>            Reporter: stack
>            Assignee: stack
>             Fix For: 0.95.1
>
>         Attachments: 8531.txt, 8531v4.txt, 8531v5.txt, 8531v6.txt, 
> 8531v7.txt, 8531v8.txt, 8531v8.txt, 8531v9.txt
>
>
> TestZooKeeper fails on occasion.  I caught a good example recently.  See 
> below failure stack trace.
> It took me a while.  I thought the issue had to do w/ our recent ipc 
> refactorings but it looks like a problem we have always had.  In short, 
> MetaScanner is not handling DoNotRetryIOEs -- it is letting them out.  
> DNRIOEs when scanning are supposed to force a reset of the scan.  HTable#next 
> catches these and does the necessary scanner reset up.  MetaScanner is 
> running some subset of what HTable does when it is scanning except the part 
> where it catches a DNRIOE and redoes the scan.  Odd.
> TestZooKeeper failed in this instance because the test kills a regionserver 
> at same time as we are trying to create a table.  In create table we do a 
> meta scan using MetaScanner passing a Visitor.  The scan starts and gets a 
> RegionServerStoppedException (This is NOT a DNRIOE -- it should be -- but 
> later we convert it into one up in ScannerCallable).
> DNRIOEs are thrown to the upper layers to handle....
> Let me look into having MetaScanner just use HTable scanning.  It makes an 
> instance just to find where to start the scan... let me try using this 
> instance for actually scanning.
> TODO: Do this convertion everywhere a DNRIOE could come out.
> Here is the stack trace
> {code}
> org.apache.hadoop.hbase.exceptions.DoNotRetryIOException: Reset scanner
>       at 
> org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:209)
>       at 
> org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:52)
>       at 
> org.apache.hadoop.hbase.client.ServerCallable.withRetries(ServerCallable.java:170)
>       at 
> org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:212)
>       at 
> org.apache.hadoop.hbase.client.MetaScanner.access$000(MetaScanner.java:52)
>       at 
> org.apache.hadoop.hbase.client.MetaScanner$1.connect(MetaScanner.java:131)
>       at 
> org.apache.hadoop.hbase.client.MetaScanner$1.connect(MetaScanner.java:128)
>       at 
> org.apache.hadoop.hbase.client.HConnectionManager.execute(HConnectionManager.java:398)
>       at 
> org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:128)
>       at 
> org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:103)
>       at 
> org.apache.hadoop.hbase.client.MetaScanner.metaScan(MetaScanner.java:81)
>       at 
> org.apache.hadoop.hbase.client.HBaseAdmin.createTable(HBaseAdmin.java:448)
>       at 
> org.apache.hadoop.hbase.client.HBaseAdmin.createTable(HBaseAdmin.java:348)
>       at 
> org.apache.hadoop.hbase.TestZooKeeper.testSanity(TestZooKeeper.java:242)
>       at 
> org.apache.hadoop.hbase.TestZooKeeper.testRegionServerSessionExpired(TestZooKeeper.java:203)
>       at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>       at 
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>       at java.lang.reflect.Method.invoke(Method.java:597)
>       at 
> org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47)
>       at 
> org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
>       at 
> org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44)
>       at 
> org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
>       at 
> org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
>       at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:271)
>       at 
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:70)
>       at 
> org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50)
>       at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238)
>       at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63)
>       at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236)
>       at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53)
>       at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:229)
>       at 
> org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:26)
>       at 
> org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
>       at org.junit.runners.ParentRunner.run(ParentRunner.java:309)
>       at org.junit.runners.Suite.runChild(Suite.java:127)
>       at org.junit.runners.Suite.runChild(Suite.java:26)
>       at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238)
>       at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441)
>       at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303)
>       at java.util.concurrent.FutureTask.run(FutureTask.java:138)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>       at java.lang.Thread.run(Thread.java:662)
> Caused by: org.apache.hadoop.hbase.exceptions.RegionServerStoppedException: 
> org.apache.hadoop.hbase.exceptions.RegionServerStoppedException: Server 
> p0116.mtv.cloudera.com,60679,1368057284663 not running, aborting
>       at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
>       at 
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:39)
>       at 
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:27)
>       at java.lang.reflect.Constructor.newInstance(Constructor.java:513)
>       at 
> org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:95)
>       at 
> org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:79)
>       at 
> org.apache.hadoop.hbase.protobuf.ProtobufUtil.getRemoteException(ProtobufUtil.java:227)
>       at 
> org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:175)
>       ... 43 more
> Caused by: org.apache.hadoop.hbase.ipc.RemoteWithExtrasException: 
> org.apache.hadoop.hbase.exceptions.RegionServerStoppedException: Server 
> p0116.mtv.cloudera.com,60679,1368057284663 not running, aborting
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.checkOpen(HRegionServer.java:2310)
>       at 
> org.apache.hadoop.hbase.regionserver.HRegionServer.scan(HRegionServer.java:2874)
>       at 
> org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:20577)
>       at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2103)
>       at 
> org.apache.hadoop.hbase.ipc.RpcServer$Handler.run(RpcServer.java:1810)
>       at org.apache.hadoop.hbase.ipc.RpcClient.call(RpcClient.java:1336)
>       at 
> org.apache.hadoop.hbase.ipc.RpcClient.callBlockingMethod(RpcClient.java:1532)
>       at 
> org.apache.hadoop.hbase.ipc.RpcClient$BlockingRpcChannelImplementation.callBlockingMethod(RpcClient.java:1587)
>       at 
> org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$BlockingStub.scan(ClientProtos.java:21012)
>       at 
> org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:147)
>       ... 43 more
> {code}

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira

Reply via email to