Github user aarondav commented on a diff in the pull request:
https://github.com/apache/spark/pull/5392#discussion_r33514995
--- Diff:
core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala ---
@@ -40,98 +37,139 @@ import org.apache.spark.util.{ActorLogReceive,
RpcUtils, Utils, AkkaUtils}
* @param masterUrls Each url should look like spark://host:port.
*/
private[spark] class AppClient(
- actorSystem: ActorSystem,
+ rpcEnv: RpcEnv,
masterUrls: Array[String],
appDescription: ApplicationDescription,
listener: AppClientListener,
conf: SparkConf)
extends Logging {
- private val masterAkkaUrls = masterUrls.map(Master.toAkkaUrl(_,
AkkaUtils.protocol(actorSystem)))
+ private val masterRpcAddresses =
masterUrls.map(RpcAddress.fromSparkURL(_))
- private val REGISTRATION_TIMEOUT = 20.seconds
+ private val REGISTRATION_TIMEOUT_SECONDS = 20
private val REGISTRATION_RETRIES = 3
- private var masterAddress: Address = null
- private var actor: ActorRef = null
+ private var endpoint: RpcEndpointRef = null
private var appId: String = null
- private var registered = false
- private var activeMasterUrl: String = null
-
- private class ClientActor extends Actor with ActorLogReceive with
Logging {
- var master: ActorSelection = null
- var alreadyDisconnected = false // To avoid calling
listener.disconnected() multiple times
- var alreadyDead = false // To avoid calling listener.dead() multiple
times
- var registrationRetryTimer: Option[Cancellable] = None
-
- override def preStart() {
- context.system.eventStream.subscribe(self,
classOf[RemotingLifecycleEvent])
+ @volatile private var registered = false
+
+ private class ClientEndpoint(override val rpcEnv: RpcEnv) extends
ThreadSafeRpcEndpoint
+ with Logging {
+
+ private var master: Option[RpcEndpointRef] = None
+ // To avoid calling listener.disconnected() multiple times
+ private var alreadyDisconnected = false
+ @volatile private var alreadyDead = false // To avoid calling
listener.dead() multiple times
+ @volatile private var registerMasterFutures: Array[JFuture[_]] = null
+ @volatile private var registrationRetryTimer: JScheduledFuture[_] =
null
+
+ // A thread pool for registering with masters. Because registering
with a master is a blocking
+ // action, this thread pool must be able to create
"masterRpcAddresses.size" threads at the same
+ // time so that we can register with all masters.
+ private val registerMasterThreadPool = new ThreadPoolExecutor(
+ 0,
+ masterRpcAddresses.size, // Make sure we can register with all
masters at the same time
+ 60L, TimeUnit.SECONDS,
+ new SynchronousQueue[Runnable](),
+
ThreadUtils.namedThreadFactory("appclient-register-master-threadpool"))
+
+ // A scheduled executor for scheduling the registration actions
+ private val registrationRetryThread =
+
ThreadUtils.newDaemonSingleThreadScheduledExecutor("appclient-registration-retry-thread")
+
+ override def onStart(): Unit = {
try {
- registerWithMaster()
+ registerWithMaster(1)
} catch {
case e: Exception =>
logWarning("Failed to connect to master", e)
markDisconnected()
- context.stop(self)
+ stop()
}
}
- def tryRegisterAllMasters() {
- for (masterAkkaUrl <- masterAkkaUrls) {
- logInfo("Connecting to master " + masterAkkaUrl + "...")
- val actor = context.actorSelection(masterAkkaUrl)
- actor ! RegisterApplication(appDescription)
+ /**
+ * Register with all masters asynchronously and returns an array
`Future`s for cancellation.
+ */
+ private def tryRegisterAllMasters(): Array[JFuture[_]] = {
+ for (masterAddress <- masterRpcAddresses) yield {
+ registerMasterThreadPool.submit(new Runnable {
+ override def run(): Unit = try {
+ if (registered) {
+ return
+ }
+ logInfo("Connecting to master " + masterAddress.toSparkURL +
"...")
+ val masterRef =
+ rpcEnv.setupEndpointRef(Master.SYSTEM_NAME, masterAddress,
Master.ENDPOINT_NAME)
+ masterRef.send(RegisterApplication(appDescription, self))
+ } catch {
+ case ie: InterruptedException => // Cancelled
+ case NonFatal(e) => logError(e.getMessage, e)
--- End diff --
message should probably be like "Failed to connect to $masterAddress"
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]