Hello,
I try to use nutch-2.0, hadoop-1.03, hbase-0.92.1 in pseudo distributed mode
with iptables turned off. As soon as map reaches 100%, fetcher works for a few
minutes and fails with the error
java.net.ConnectException: Connection refused
at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
at
sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:701)
at
org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:489)
at
org.apache.hadoop.hbase.ipc.HBaseClient$Connection.setupConnection(HBaseClient.java:328)
at
org.apache.hadoop.hbase.ipc.HBaseClient$Connection.setupIOstreams(HBaseClient.java:362)
at
org.apache.hadoop.hbase.ipc.HBaseClient.getConnection(HBaseClient.java:1045)
at org.apache.hadoop.hbase.ipc.HBaseClient.call(HBaseClient.java:897)
at
org.apache.hadoop.hbase.ipc.WritableRpcEngine$Invoker.invoke(WritableRpcEngine.java:150)
at $Proxy10.getClosestRowBefore(Unknown Source)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.locateRegionInMeta(HConnectionManager.java:947)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.locateRegion(HConnectionManager.java:814)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.relocateRegion(HConnectionManager.java:788)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.locateRegionInMeta(HConnectionManager.java:1024)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.locateRegion(HConnectionManager.java:818)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.processBatchCallback(HConnectionManager.java:1524)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.processBatch(HConnectionManager.java:1409)
at org.apache.hadoop.hbase.client.HTable.flushCommits(HTable.java:943)
at
org.apache.gora.hbase.store.HBaseTableConnection.close(HBaseTableConnection.java:96)
at org.apache.gora.hbase.store.HBaseStore.close(HBaseStore.java:599)
at
org.apache.gora.mapreduce.GoraRecordWriter.close(GoraRecordWriter.java:55)
at
org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.close(ReduceTask.java:579)
at
org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:650)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:417)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
org.apache.gora.util.GoraException:
org.apache.hadoop.hbase.client.RetriesExhaustedException: Failed setting up
proxy interface org.apache.hadoop.hbase.ipc.HRegionInterface to
master/192.168.1.4:60020 after attempts=1
at
org.apache.gora.store.DataStoreFactory.createDataStore(DataStoreFactory.java:167)
at
org.apache.gora.store.DataStoreFactory.createDataStore(DataStoreFactory.java:118)
at
org.apache.gora.mapreduce.GoraOutputFormat.getRecordWriter(GoraOutputFormat.java:88)
at
org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.<init>(ReduceTask.java:569)
at
org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:638)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:417)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: org.apache.hadoop.hbase.client.RetriesExhaustedException: Failed
setting up proxy interface org.apache.hadoop.hbase.ipc.HRegionInterface to
master/192.168.1.4:60020 after attempts=1
at org.apache.hadoop.hbase.ipc.HBaseRPC.waitForProxy(HBaseRPC.java:242)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.getHRegionConnection(HConnectionManager.java:1278)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.getHRegionConnection(HConnectionManager.java:1235)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.getHRegionConnection(HConnectionManager.java:1222)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.locateRegionInMeta(HConnectionManager.java:918)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.locateRegion(HConnectionManager.java:814)
at
org.apache.hadoop.hbase.client.HConnectionManager$HConnectionImplementation.locateRegion(HConnectionManager.java:782)
at org.apache.hadoop.hbase.client.HTable.finishSetup(HTable.java:249)
at org.apache.hadoop.hbase.client.HTable.<init>(HTable.java:213)
at
org.apache.hadoop.hbase.catalog.MetaReader.getHTable(MetaReader.java:200)
at
org.apache.hadoop.hbase.catalog.MetaReader.getMetaHTable(MetaReader.java:226)
at
org.apache.hadoop.hbase.catalog.MetaReader.fullScan(MetaReader.java:700)
at
org.apache.hadoop.hbase.catalog.MetaReader.fullScan(MetaReader.java:183)
at
org.apache.hadoop.hbase.catalog.MetaReader.tableExists(MetaReader.java:448)
at
org.apache.hadoop.hbase.client.HBaseAdmin.tableExists(HBaseAdmin.java:202)
at
org.apache.gora.hbase.store.HBaseStore.schemaExists(HBaseStore.java:164)
at
org.apache.gora.hbase.store.HBaseStore.createSchema(HBaseStore.java:145)
at
org.apache.gora.hbase.store.HBaseStore.initialize(HBaseStore.java:130)
at
org.apache.gora.store.DataStoreFactory.initializeDataStore(DataStoreFactory.java:102)
at
org.apache.gora.store.DataStoreFactory.createDataStore(DataStoreFactory.java:161)
... 10 more
bin/nutch inject works fine. Also, I have a different linux, box. fetcher with
the same config runs fine, but the generated set is much less than in the first
linux box.
Any ideas how to fix this issue and what is the benefit running fetcher in
pseudo distributed mode against the local one?
Thanks.
Alex.