Likely this patch broke something: commit 4c4a4353898aef35016c342f997eba6e439cc039
Author: Enis Soztutar <[email protected]> Date: Wed Oct 28 18:16:32 2015 -0700 HBASE-14689 Addendum and unit test for HBASE-13471 It was supposed to bring the logic around that to be on par with branch-1.1+. Looking into it. Enis On Tue, Nov 17, 2015 at 6:34 PM, Enis Söztutar <[email protected]> wrote: > I have run PE on the 1.0.3 RC, and most of the handlers were blocked at > the getRegionLockInternal() call. Also noticed these in the logs: > > 015-11-17 18:29:39,717 WARN > [B.defaultRpcServer.handler=18,queue=0,port=58215] regionserver.HRegion: > Failed getting lock in batch put, row=00000000000000000000132949 > java.io.IOException: Timed out waiting for lock for row: > 00000000000000000000132949 > at > org.apache.hadoop.hbase.regionserver.HRegion.getRowLockInternal(HRegion.java:3995) > at > org.apache.hadoop.hbase.regionserver.HRegion.doMiniBatchMutation(HRegion.java:2661) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2519) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2473) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2477) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.doBatchOp(RSRpcServices.java:654) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.doNonAtomicRegionMutation(RSRpcServices.java:618) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.multi(RSRpcServices.java:1864) > at > org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:31451) > at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2049) > at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:111) > at > org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:133) > at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:108) > at java.lang.Thread.run(Thread.java:745) > 2015-11-17 18:29:39,717 WARN > [B.defaultRpcServer.handler=14,queue=2,port=58215] regionserver.HRegion: > Failed getting lock in batch put, row=00000000000000000000178256 > java.io.IOException: Timed out waiting for lock for row: > 00000000000000000000178256 > at > org.apache.hadoop.hbase.regionserver.HRegion.getRowLockInternal(HRegion.java:3995) > at > org.apache.hadoop.hbase.regionserver.HRegion.doMiniBatchMutation(HRegion.java:2661) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2519) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2473) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2477) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.doBatchOp(RSRpcServices.java:654) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.doNonAtomicRegionMutation(RSRpcServices.java:618) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.multi(RSRpcServices.java:1864) > at > org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:31451) > at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2049) > at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:111) > at > org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:133) > at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:108) > at java.lang.Thread.run(Thread.java:745) > 2015-11-17 18:29:39,717 WARN > [B.defaultRpcServer.handler=6,queue=0,port=58215] regionserver.HRegion: > Failed getting lock in batch put, row=00000000000000000000198294 > java.io.IOException: Timed out waiting for lock for row: > 00000000000000000000198294 > at > org.apache.hadoop.hbase.regionserver.HRegion.getRowLockInternal(HRegion.java:3995) > at > org.apache.hadoop.hbase.regionserver.HRegion.doMiniBatchMutation(HRegion.java:2661) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2519) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2473) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2477) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.doBatchOp(RSRpcServices.java:654) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.doNonAtomicRegionMutation(RSRpcServices.java:618) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.multi(RSRpcServices.java:1864) > at > org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:31451) > at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2049) > at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:111) > at > org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:133) > at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:108) > at java.lang.Thread.run(Thread.java:745) > 2015-11-17 18:29:39,717 WARN > [B.defaultRpcServer.handler=17,queue=2,port=58215] regionserver.HRegion: > Failed getting lock in batch put, row=00000000000000000000173891 > java.io.IOException: Timed out waiting for lock for row: > 00000000000000000000173891 > at > org.apache.hadoop.hbase.regionserver.HRegion.getRowLockInternal(HRegion.java:3995) > at > org.apache.hadoop.hbase.regionserver.HRegion.doMiniBatchMutation(HRegion.java:2661) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2519) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2473) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2477) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.doBatchOp(RSRpcServices.java:654) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.doNonAtomicRegionMutation(RSRpcServices.java:618) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.multi(RSRpcServices.java:1864) > at > org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:31451) > at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2049) > at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:111) > at > org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:133) > at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:108) > at java.lang.Thread.run(Thread.java:745) > 2015-11-17 18:29:39,718 WARN > [B.defaultRpcServer.handler=3,queue=0,port=58215] regionserver.HRegion: > Failed getting lock in batch put, row=00000000000000000000202900 > java.io.IOException: Timed out waiting for lock for row: > 00000000000000000000202900 > at > org.apache.hadoop.hbase.regionserver.HRegion.getRowLockInternal(HRegion.java:3995) > at > org.apache.hadoop.hbase.regionserver.HRegion.doMiniBatchMutation(HRegion.java:2661) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2519) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2473) > at > org.apache.hadoop.hbase.regionserver.HRegion.batchMutate(HRegion.java:2477) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.doBatchOp(RSRpcServices.java:654) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.doNonAtomicRegionMutation(RSRpcServices.java:618) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.multi(RSRpcServices.java:1864) > at > org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:31451) > at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2049) > at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:111) > at > org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:133) > at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:108) > at java.lang.Thread.run(Thread.java:745) > > > On Tue, Nov 17, 2015 at 8:44 AM, Nick Dimiduk <[email protected]> wrote: > >> Indeed. I'll see about finishing my evaluation of both RCs and >> investigating this behavior this evening. >> >> On Monday, November 16, 2015, Andrew Purtell <[email protected]> >> wrote: >> >> > Standalone. >> > >> > Perhaps if you attach a profiler to the JVM running the test something >> > interesting will turn up. >> > >> > >> > > On Nov 16, 2015, at 10:34 PM, Nick Dimiduk <[email protected] >> > <javascript:;>> wrote: >> > > >> > > Your LTT ran vs a cluster, or in standalone mode? My run in >> standalone is >> > > failing consistently due to GC pause. I suppose it's possible my HDD >> is >> > > failing, but the same test ran without incident with the the 1.1 RC, >> and >> > > also with the.0.98 RC. Perhaps there's a perf degradation on 1.0 >> that's >> > > been resolved on 1.1, exhibited by the conditions of my single >> machine? >> > As >> > > this is the terminal release of the 1.0 line and no one else has >> observed >> > > issues, I'm inclined to proceed despite the test failure. >> > > >> > >> On Mon, Nov 16, 2015 at 8:35 PM, Andrew Purtell <[email protected] >> > <javascript:;>> wrote: >> > >> >> > >> +1 >> > >> >> > >> Built from source >> > >> RAT check passes >> > >> Loaded 1M rows with LoadTestTool, no unexpected log messages, >> reported >> > >> latencies in line with expectations >> > >> Ran IntegrationTestBigLinkedList, no errors >> > >> >> > >> On Fri, Nov 13, 2015 at 4:39 PM, Andrew Purtell <[email protected] >> > <javascript:;>> >> > >> wrote: >> > >> >> > >>> The 1st HBase 1.0.3 release candidate (RC0) is available for >> download >> > at >> > >>> http://people.apache.org/~apurtell/1.0.3RC0/ and Maven artifacts >> are >> > >> also >> > >>> available in the temporary repository >> > >>> >> > https://repository.apache.org/content/repositories/orgapachehbase-1119/ >> > >> . >> > >>> >> > >>> The API compatibility report with respect to the previous release >> can >> > be >> > >>> found at >> > >>> >> http://people.apache.org/~apurtell/1.0.2_1.0.3RC0_compat_report.html . >> > >>> There are no reported source compatibility issues with user facing >> > public >> > >>> or LimitedPrivate interfaces. There are no reported binary >> > compatibility >> > >>> issues with user facing public interfaces. A constant was changed in >> > >>> RegionServerSnapshotManager, classified as LimitedPrivate. This >> change >> > >> was >> > >>> made in commit 25b4427 by Enis, the branch RM for 1.0. >> > >>> >> > >>> Signed with my code signing key D5365CCD. >> > >>> >> > >>> The 76 issues resolved in this release can be found at >> > >>> http://s.apache.org/0Cc . >> > >>> >> > >>> Please try out the candidate and vote +1/0/-1. This vote will be >> open >> > for >> > >>> at least 72 hours. Unless objection I will try to close it Monday >> > October >> > >>> 23, 2015 if we have sufficient votes. Three +1 votes from PMC will >> be >> > >>> required to release. >> > >>> >> > >>> -- >> > >>> Best regards, >> > >>> >> > >>> - Andy >> > >>> >> > >>> Problems worthy of attack prove their worth by hitting back. - Piet >> > Hein >> > >>> (via Tom White) >> > >> >> > >> >> > >> >> > >> -- >> > >> Best regards, >> > >> >> > >> - Andy >> > >> >> > >> Problems worthy of attack prove their worth by hitting back. - Piet >> Hein >> > >> (via Tom White) >> > >> >> > >> > >
