Is it possible to set a HBase config when using phoenix in spark as a
datasource ?
Had a peek at the source, and it looks I can only set the table and the
zookeeper quorum.
What would be my options if I want to set "hbase.rpc.timeout" in the client?
// Override 'RelationProvider.createRelation', this enables DataFrame.load()
override def createRelation(sqlContext: SQLContext, parameters:
Map[String, String]): BaseRelation = {
verifyParameters(parameters)
new PhoenixRelation(
parameters("table"),
parameters("zkUrl")
)(sqlContext)
}
A bit of background, I am getting those exception when running queries in a
spark job,
Sat Jan 23 20:13:57 GMT 2016, null, java.net.SocketTimeoutException:
callTimeout=60000, callDuration=60303: row '�Ԑx~R�Traders' on table 'EVENT'
at region=EVENT,,1453212975336.b28c683cea467d1a2fff4c495e39a3d1.,
hostname=dc-hadoop-slave2.local,17020,1453578819465, seqNum=16367961
at
org.apache.phoenix.mapreduce.PhoenixRecordReader.nextKeyValue(PhoenixRecordReader.java:159)
at org.apache.spark.rdd.NewHadoopRDD$$anon$1.hasNext(NewHadoopRDD.scala:143)
at
org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at scala.collection.Iterator$$anon$14.hasNext(Iterator.scala:388)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at
org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$
1.org
$apache$spark$sql$execution$aggregate$TungstenAggregate$$anonfun$$executePartition$1(TungstenAggregate.scala:97)
at
org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:119)
at
org.apache.spark.sql.execution.aggregate.TungstenAggregate$$anonfun$doExecute$1$$anonfun$2.apply(TungstenAggregate.scala:119)
at
org.apache.spark.rdd.MapPartitionsWithPreparationRDD.compute(MapPartitionsWithPreparationRDD.scala:64)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:297)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:264)
at
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
at
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
at org.apache.spark.scheduler.Task.run(Task.scala:88)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.phoenix.exception.PhoenixIOException: Failed after
attempts=36, exceptions:
Sat Jan 23 20:13:57 GMT 2016, null, java.net.SocketTimeoutException:
callTimeout=60000, callDuration=60303: row '�Ԑx~R�Traders' on table 'EVENT'
at region=EVENT,,1453212975336.b28c683cea467d1a2fff4c495e39a3d1.,
hostname=dc-hadoop-slave2.local,17020,1453578819465, seqNum=16367961
at
org.apache.phoenix.util.ServerUtil.parseServerException(ServerUtil.java:111)
at
org.apache.phoenix.iterate.ScanningResultIterator.next(ScanningResultIterator.java:65)
at
org.apache.phoenix.iterate.TableResultIterator.next(TableResultIterator.java:110)
at
org.apache.phoenix.iterate.LookAheadResultIterator$1.advance(LookAheadResultIterator.java:47)
at
org.apache.phoenix.iterate.LookAheadResultIterator.init(LookAheadResultIterator.java:59)
at
org.apache.phoenix.iterate.LookAheadResultIterator.next(LookAheadResultIterator.java:65)
at org.apache.phoenix.jdbc.PhoenixResultSet.next(PhoenixResultSet.java:778)
at
org.apache.phoenix.mapreduce.PhoenixRecordReader.nextKeyValue(PhoenixRecordReader.java:152)
... 25 more
Caused by: org.apache.hadoop.hbase.client.RetriesExhaustedException: Failed
after attempts=36, exceptions:
Sat Jan 23 20:13:57 GMT 2016, null, java.net.SocketTimeoutException:
callTimeout=60000, callDuration=60303: row '�Ԑx~R�Traders' on table 'EVENT'
at region=EVENT,,1453212975336.b28c683cea467d1a2fff4c495e39a3d1.,
hostname=dc-hadoop-slave2.local,17020,1453578819465, seqNum=16367961
at
org.apache.hadoop.hbase.client.RpcRetryingCallerWithReadReplicas.throwEnrichedException(RpcRetryingCallerWithReadReplicas.java:271)
at
org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:195)
at
org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:59)
at
org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:200)
at org.apache.hadoop.hbase.client.ClientScanner.call(ClientScanner.java:320)
at
org.apache.hadoop.hbase.client.ClientScanner.loadCache(ClientScanner.java:403)
at org.apache.hadoop.hbase.client.ClientScanner.next(ClientScanner.java:364)
at
org.apache.phoenix.iterate.ScanningResultIterator.next(ScanningResultIterator.java:55)
... 31 more
Caused by: java.net.SocketTimeoutException: callTimeout=60000,
callDuration=60303: row '�Ԑx~R�Traders' on table 'EVENT' at
region=EVENT,,1453212975336.b28c683cea467d1a2fff4c495e39a3d1.,
hostname=dc-hadoop-slave2.local,17020,1453578819465, seqNum=16367961
at
org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithRetries(RpcRetryingCaller.java:159)
at
org.apache.hadoop.hbase.client.ResultBoundedCompletionService$QueueingFuture.run(ResultBoundedCompletionService.java:64)
... 3 more
Caused by: java.io.IOException: Call to dc-hadoop-slave2.local/
172.20.1.185:17020 failed on local exception:
org.apache.hadoop.hbase.ipc.CallTimeoutException: Call id=257,
waitTime=60001, operationTimeout=60000 expired.
at
org.apache.hadoop.hbase.ipc.RpcClientImpl.wrapException(RpcClientImpl.java:1284)
at org.apache.hadoop.hbase.ipc.RpcClientImpl.call(RpcClientImpl.java:1252)
at
org.apache.hadoop.hbase.ipc.AbstractRpcClient.callBlockingMethod(AbstractRpcClient.java:213)
at
org.apache.hadoop.hbase.ipc.AbstractRpcClient$BlockingRpcChannelImplementation.callBlockingMethod(AbstractRpcClient.java:287)
at
org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$BlockingStub.scan(ClientProtos.java:32651)
at
org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:213)
at
org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:62)
at
org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:200)
at
org.apache.hadoop.hbase.client.ScannerCallableWithReplicas$RetryingRPC.call(ScannerCallableWithReplicas.java:346)
at
org.apache.hadoop.hbase.client.ScannerCallableWithReplicas$RetryingRPC.call(ScannerCallableWithReplicas.java:320)
at
org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithRetries(RpcRetryingCaller.java:126)
... 4 more
Caused by: org.apache.hadoop.hbase.ipc.CallTimeoutException: Call id=257,
waitTime=60001, operationTimeout=60000 expired.
at org.apache.hadoop.hbase.ipc.Call.checkAndSetTimeout(Call.java:70)
at org.apache.hadoop.hbase.ipc.RpcClientImpl.call(RpcClientImpl.java:1226)
... 13 more
the hbase regions server have this config
<property>
<name>hbase.client.scanner.timeout.period</name>
<value>1200000</value>
</property>
<property>
<name>hbase.rpc.timeout</name>
<value>1200000</value>
</property>
<property>
<name>phoenix.query.keepAliveMs</name>
<value>240000</value>
</property>
Thanks
*Pierre Lacave*