[ https://issues.apache.org/jira/browse/HBASE-9791?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13798048#comment-13798048 ]
stack commented on HBASE-9791: ------------------------------ +1 > MR initializes scanner twice > ---------------------------- > > Key: HBASE-9791 > URL: https://issues.apache.org/jira/browse/HBASE-9791 > Project: HBase > Issue Type: Bug > Reporter: Jimmy Xiang > Assignee: Jimmy Xiang > Priority: Minor > Attachments: trunk-9791.patch > > > The first is in TableInputFormatBase.createRecordReader(). The second time is > in initializing it. We should not call initialize in creating the record > reader. > {noformat} > 2013-10-16 16:58:27,163 INFO [main] > org.apache.hadoop.hbase.client.ScannerCallable: Open > scanner=466730774138784884 for > scan={"timeRange":[0,9223372036854775807],"batch":-1,"startRow":"","stopRow":"\\x08\\x02\\xC2b","loadColumnFamiliesOnDemand":null,"totalColumns":1,"cacheBlocks":false,"families":{"meta":["prev"]},"maxResultSize":-1,"maxVersions":1,"caching":100} > on region > region=IntegrationTestBigLinkedList,,1381966998140.518fba7c69f069bef99658ca172f9009., > hostname=e1521.halxg.cloudera.com,36020,1381967631098, seqNum=968466 > ip:e1521.halxg.cloudera.com:36020 > 2013-10-16 16:58:27,164 INFO [main] > org.apache.hadoop.hbase.client.ScannerCallable:org.apache.hadoop.hbase.client.ScannerCallable.openScanner(ScannerCallable.java:312) > at > org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:157) > at > org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:58) > at > org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithRetries(RpcRetryingCaller.java:116) > at > org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithRetries(RpcRetryingCaller.java:94) > at > org.apache.hadoop.hbase.client.ClientScanner.nextScanner(ClientScanner.java:271) > at > org.apache.hadoop.hbase.client.ClientScanner.initializeScannerInConstruction(ClientScanner.java:176) > at > org.apache.hadoop.hbase.client.ClientScanner.<init>(ClientScanner.java:171) > at > org.apache.hadoop.hbase.client.ClientScanner.<init>(ClientScanner.java:110) > at org.apache.hadoop.hbase.client.HTable.getScanner(HTable.java:719) > at > org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl.restart(TableRecordReaderImpl.java:86) > at > org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl.initialize(TableRecordReaderImpl.java:148) > at > org.apache.hadoop.hbase.mapreduce.TableRecordReader.initialize(TableRecordReader.java:125) > at > org.apache.hadoop.hbase.mapreduce.TableInputFormatBase.createRecordReader(TableInputFormatBase.java:135) > at > org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.<init>(MapTask.java:491) > at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:734) > at org.apache.hadoop.mapred.MapTask.run(MapTask.java:339) > at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:162) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1477) > at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:157) > 2013-10-16 16:58:27,167 INFO [main] > org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl: Current > scan={"timeRange":[0,9223372036854775807],"batch":-1,"startRow":"","stopRow":"\\x08\\x02\\xC2b","loadColumnFamiliesOnDemand":null,"totalColumns":1,"cacheBlocks":false,"families":{"meta":["prev"]},"maxResultSize":-1,"maxVersions":1,"caching":100} > 2013-10-16 16:58:27,175 INFO [main] org.apache.hadoop.mapred.MapTask: Map > output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer > 2013-10-16 16:58:27,891 INFO [main] org.apache.hadoop.mapred.MapTask: > (EQUATOR) 0 kvi 268435452(1073741808) > 2013-10-16 16:58:27,892 INFO [main] org.apache.hadoop.mapred.MapTask: > mapreduce.task.io.sort.mb: 1024 > 2013-10-16 16:58:27,892 INFO [main] org.apache.hadoop.mapred.MapTask: soft > limit at 858993472 > 2013-10-16 16:58:27,892 INFO [main] org.apache.hadoop.mapred.MapTask: > bufstart = 0; bufvoid = 1073741824 > 2013-10-16 16:58:27,892 INFO [main] org.apache.hadoop.mapred.MapTask: kvstart > = 268435452; length = 67108864 > 2013-10-16 16:58:27,903 INFO [main] > org.apache.hadoop.hbase.client.ScannerCallable: Open > scanner=7462140737850801183 for > scan={"timeRange":[0,9223372036854775807],"batch":-1,"startRow":"","stopRow":"\\x08\\x02\\xC2b","loadColumnFamiliesOnDemand":null,"totalColumns":1,"cacheBlocks":false,"families":{"meta":["prev"]},"maxResultSize":-1,"maxVersions":1,"caching":100} > on region > region=IntegrationTestBigLinkedList,,1381966998140.518fba7c69f069bef99658ca172f9009., > hostname=e1521.halxg.cloudera.com,36020,1381967631098, seqNum=968466 > ip:e1521.halxg.cloudera.com:36020 > 2013-10-16 16:58:27,903 INFO [main] > org.apache.hadoop.hbase.client.ScannerCallable: > org.apache.hadoop.hbase.client.ScannerCallable.openScanner(ScannerCallable.java:312) > at > org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:157) > at > org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:58) > at > org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithRetries(RpcRetryingCaller.java:116) > at > org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithRetries(RpcRetryingCaller.java:94) > at > org.apache.hadoop.hbase.client.ClientScanner.nextScanner(ClientScanner.java:271) > at > org.apache.hadoop.hbase.client.ClientScanner.initializeScannerInConstruction(ClientScanner.java:176) > at > org.apache.hadoop.hbase.client.ClientScanner.<init>(ClientScanner.java:171) > at > org.apache.hadoop.hbase.client.ClientScanner.<init>(ClientScanner.java:110) > at org.apache.hadoop.hbase.client.HTable.getScanner(HTable.java:719) > at > org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl.restart(TableRecordReaderImpl.java:86) > at > org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl.initialize(TableRecordReaderImpl.java:148) > at > org.apache.hadoop.hbase.mapreduce.TableRecordReader.initialize(TableRecordReader.java:125) > at > org.apache.hadoop.mapred.MapTask$NewTrackingRecordReader.initialize(MapTask.java:524) > at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:762) > at org.apache.hadoop.mapred.MapTask.run(MapTask.java:339) > at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:162) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:415) > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1477) > at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:157) > 2013-10-16 16:58:27,904 INFO [main] > org.apache.hadoop.hbase.mapreduce.TableRecordReaderImpl: Current > scan={"timeRange":[0,9223372036854775807],"batch":-1,"startRow":"","stopRow":"\\x08\\x02\\xC2b","loadColumnFamiliesOnDemand":null,"totalColumns":1,"cacheBlocks":false,"families":{"meta":["prev"]},"maxResultSize":-1,"maxVersions":1,"caching":100} > {noformat} -- This message was sent by Atlassian JIRA (v6.1#6144)