[ 
https://issues.apache.org/jira/browse/SPARK-14693?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15255734#comment-15255734
 ] 

Evan Oman commented on SPARK-14693:
-----------------------------------

I do get the following error after waiting for two hours:

{code}
java.rmi.RemoteException: java.util.concurrent.TimeoutException: Timed out 
retrying send to http://10.210.224.74:7070: 2 hours; nested exception is: 
        java.util.concurrent.TimeoutException: Timed out retrying send to 
http://10.210.224.74:7070: 2 hours
        at 
com.databricks.backend.daemon.data.client.DbfsClient.send0(DbfsClient.scala:71)
        at 
com.databricks.backend.daemon.data.client.DbfsClient.sendIdempotent(DbfsClient.scala:40)
        at 
com.databricks.backend.daemon.data.client.DatabricksFileSystem.listStatus(DatabricksFileSystem.scala:189)
        at 
org.apache.spark.streaming.util.FileBasedWriteAheadLog.initializeOrRecover(FileBasedWriteAheadLog.scala:228)
        at 
org.apache.spark.streaming.util.FileBasedWriteAheadLog.<init>(FileBasedWriteAheadLog.scala:72)
        at 
org.apache.spark.streaming.util.WriteAheadLogUtils$$anonfun$2.apply(WriteAheadLogUtils.scala:141)
        at 
org.apache.spark.streaming.util.WriteAheadLogUtils$$anonfun$2.apply(WriteAheadLogUtils.scala:141)
        at scala.Option.getOrElse(Option.scala:120)
        at 
org.apache.spark.streaming.util.WriteAheadLogUtils$.createLog(WriteAheadLogUtils.scala:140)
        at 
org.apache.spark.streaming.util.WriteAheadLogUtils$.createLogForDriver(WriteAheadLogUtils.scala:98)
        at 
org.apache.spark.streaming.scheduler.ReceivedBlockTracker$$anonfun$createWriteAheadLog$1.apply(ReceivedBlockTracker.scala:254)
        at 
org.apache.spark.streaming.scheduler.ReceivedBlockTracker$$anonfun$createWriteAheadLog$1.apply(ReceivedBlockTracker.scala:252)
        at scala.Option.map(Option.scala:145)
        at 
org.apache.spark.streaming.scheduler.ReceivedBlockTracker.createWriteAheadLog(ReceivedBlockTracker.scala:252)
        at 
org.apache.spark.streaming.scheduler.ReceivedBlockTracker.<init>(ReceivedBlockTracker.scala:75)
        at 
org.apache.spark.streaming.scheduler.ReceiverTracker.<init>(ReceiverTracker.scala:106)
        at 
org.apache.spark.streaming.scheduler.JobScheduler.start(JobScheduler.scala:80)
        at 
org.apache.spark.streaming.StreamingContext$$anonfun$liftedTree1$1$1.apply$mcV$sp(StreamingContext.scala:610)
        at 
org.apache.spark.streaming.StreamingContext$$anonfun$liftedTree1$1$1.apply(StreamingContext.scala:606)
        at 
org.apache.spark.streaming.StreamingContext$$anonfun$liftedTree1$1$1.apply(StreamingContext.scala:606)
        at ... run in separate thread using org.apache.spark.util.ThreadUtils 
... ()
        at 
org.apache.spark.streaming.StreamingContext.liftedTree1$1(StreamingContext.scala:606)
        at 
org.apache.spark.streaming.StreamingContext.start(StreamingContext.scala:600)
Caused by: java.util.concurrent.TimeoutException: Timed out retrying send to 
http://10.210.224.74:7070: 2 hours
        at 
com.databricks.rpc.ReliableJettyClient.retryOnNetworkError(ReliableJettyClient.scala:138)
        at 
com.databricks.rpc.ReliableJettyClient.sendIdempotent(ReliableJettyClient.scala:46)
        at 
com.databricks.backend.daemon.data.client.DbfsClient.doSend(DbfsClient.scala:83)
        at 
com.databricks.backend.daemon.data.client.DbfsClient.send0(DbfsClient.scala:60)
        at 
com.databricks.backend.daemon.data.client.DbfsClient.sendIdempotent(DbfsClient.scala:40)
        at 
com.databricks.backend.daemon.data.client.DatabricksFileSystem.listStatus(DatabricksFileSystem.scala:189)
        at 
org.apache.spark.streaming.util.FileBasedWriteAheadLog.initializeOrRecover(FileBasedWriteAheadLog.scala:228)
        at 
org.apache.spark.streaming.util.FileBasedWriteAheadLog.<init>(FileBasedWriteAheadLog.scala:72)
        at 
org.apache.spark.streaming.util.WriteAheadLogUtils$$anonfun$2.apply(WriteAheadLogUtils.scala:141)
        at 
org.apache.spark.streaming.util.WriteAheadLogUtils$$anonfun$2.apply(WriteAheadLogUtils.scala:141)
        at scala.Option.getOrElse(Option.scala:120)
        at 
org.apache.spark.streaming.util.WriteAheadLogUtils$.createLog(WriteAheadLogUtils.scala:140)
        at 
org.apache.spark.streaming.util.WriteAheadLogUtils$.createLogForDriver(WriteAheadLogUtils.scala:98)
        at 
org.apache.spark.streaming.scheduler.ReceivedBlockTracker$$anonfun$createWriteAheadLog$1.apply(ReceivedBlockTracker.scala:254)
        at 
org.apache.spark.streaming.scheduler.ReceivedBlockTracker$$anonfun$createWriteAheadLog$1.apply(ReceivedBlockTracker.scala:252)
        at scala.Option.map(Option.scala:145)
        at 
org.apache.spark.streaming.scheduler.ReceivedBlockTracker.createWriteAheadLog(ReceivedBlockTracker.scala:252)
        at 
org.apache.spark.streaming.scheduler.ReceivedBlockTracker.<init>(ReceivedBlockTracker.scala:75)
        at 
org.apache.spark.streaming.scheduler.ReceiverTracker.<init>(ReceiverTracker.scala:106)
        at 
org.apache.spark.streaming.scheduler.JobScheduler.start(JobScheduler.scala:80)
        at 
org.apache.spark.streaming.StreamingContext$$anonfun$liftedTree1$1$1.apply$mcV$sp(StreamingContext.scala:610)
        at 
org.apache.spark.streaming.StreamingContext$$anonfun$liftedTree1$1$1.apply(StreamingContext.scala:606)
        at 
org.apache.spark.streaming.StreamingContext$$anonfun$liftedTree1$1$1.apply(StreamingContext.scala:606)
        at org.apache.spark.util.ThreadUtils$$anon$1.run(ThreadUtils.scala:122)
{code}

> Spark Streaming Context Hangs on Start
> --------------------------------------
>
>                 Key: SPARK-14693
>                 URL: https://issues.apache.org/jira/browse/SPARK-14693
>             Project: Spark
>          Issue Type: Bug
>          Components: Streaming
>    Affects Versions: 1.6.0, 1.6.1
>         Environment: Databricks Cloud
>            Reporter: Evan Oman
>
> All,
> I am trying to use Kinesis with Spark Streaming on Spark 1.6.0 via Databricks 
> and my `ssc.start()` command is hanging. 
> I am using the following function (based on [this 
> guide|http://spark.apache.org/docs/latest/streaming-kinesis-integration.html],
>  which, as an aside, contains some broken Github links) to make my Spark 
> Streaming Context:
> {code:borderStyle=solid}
> def creatingFunc(sc: SparkContext): StreamingContext = 
> {
>               // Create a StreamingContext
>               val ssc = new StreamingContext(sc, 
> Seconds(batchIntervalSeconds))
>               // Creata a Kinesis stream
>               val kinesisStream = KinesisUtils.createStream(ssc,
>                       kinesisAppName, kinesisStreamName,
>                       kinesisEndpointUrl, 
> RegionUtils.getRegionByEndpoint(kinesisEndpointUrl).getName,
>                       InitialPositionInStream.LATEST, 
> Seconds(kinesisCheckpointIntervalSeconds),
>                       StorageLevel.MEMORY_AND_DISK_SER_2, 
> config.awsAccessKeyId, config.awsSecretKey)
>               kinesisStream.print()
>               ssc.remember(Minutes(1))
>               ssc.checkpoint(checkpointDir)
>               ssc
> }
> {code}
> However when I run the following to start the streaming context:
> {code:borderStyle=solid}
> // Stop any existing StreamingContext 
> val stopActiveContext = true
> if (stopActiveContext) {      
>   StreamingContext.getActive.foreach { _.stop(stopSparkContext = false) }
> } 
> // Get or create a streaming context.
> val ssc = StreamingContext.getActiveOrCreate(() => main.creatingFunc(sc))
> // This starts the streaming context in the background. 
> ssc.start()
> {code}
> The last bit, `ssc.start()`, hangs indefinitely without issuing any log 
> messages. I am running this on a freshly spun up cluster with no other 
> notebooks attached so there aren't any other streaming contexts running.
> Any thoughts?
> Additionally, here are the libraries I am using (from my build.sbt file):
> {code:borderStyle=solid}
> "org.apache.spark" % "spark-core_2.10" % "1.6.0"
> "org.apache.spark" % "spark-sql_2.10" % "1.6.0"
> "org.apache.spark" % "spark-streaming-kinesis-asl_2.10" % "1.6.0"
> "org.apache.spark" % "spark-streaming_2.10" % "1.6.0"
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to