Github user BryanCutler commented on a diff in the pull request:
https://github.com/apache/spark/pull/11746#discussion_r59657799
--- Diff:
core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala ---
@@ -68,56 +70,72 @@ private[deploy] class DriverRunner(
private var clock: Clock = new SystemClock()
private var sleeper = new Sleeper {
- def sleep(seconds: Int): Unit = (0 until seconds).takeWhile(f =>
{Thread.sleep(1000); !killed})
+ def sleep(seconds: Int): Unit = Thread.sleep(seconds * 1000)
}
/** Starts a thread to run and manage the driver. */
private[worker] def start() = {
- new Thread("DriverRunner for " + driverId) {
+ workerThread = new Thread("DriverRunner for " + driverId) {
override def run() {
+ var shutdownHook: AnyRef = null
try {
- val driverDir = createWorkingDirectory()
- val localJarFilename = downloadUserJar(driverDir)
-
- def substituteVariables(argument: String): String = argument
match {
- case "{{WORKER_URL}}" => workerUrl
- case "{{USER_JAR}}" => localJarFilename
- case other => other
+ shutdownHook = ShutdownHookManager.addShutdownHook { () =>
+ logInfo(s"Worker shutting down, killing driver $driverId")
+ kill()
}
- // TODO: If we add ability to submit multiple jars they should
also be added here
- val builder =
CommandUtils.buildProcessBuilder(driverDesc.command, securityManager,
- driverDesc.mem, sparkHome.getAbsolutePath, substituteVariables)
- launchDriver(builder, driverDir, driverDesc.supervise)
+ // prepare driver jars, launch driver and set final state from
process exit code
+ val exitCode = prepareAndLaunchDriver()
+ finalState = if (exitCode == 0) Some(DriverState.FINISHED) else
Some(DriverState.FAILED)
}
catch {
- case e: Exception => finalException = Some(e)
+ case interrupted: InterruptedException =>
+ logInfo("Runner thread for driver " + driverId + "
interrupted")
+ killProcessAndFinalize(DriverState.KILLED, interrupted)
+ case e: Exception =>
+ killProcessAndFinalize(DriverState.ERROR, e)
+ }
+ finally {
+ if (shutdownHook != null)
ShutdownHookManager.removeShutdownHook(shutdownHook)
}
- val state =
- if (killed) {
- DriverState.KILLED
- } else if (finalException.isDefined) {
- DriverState.ERROR
- } else {
- finalExitCode match {
- case Some(0) => DriverState.FINISHED
- case _ => DriverState.FAILED
- }
- }
+ // notify worker of final driver state, possible exception
+ worker.send(DriverStateChanged(driverId, finalState.get,
finalException))
+ }
+ // kill the process if started, set shared finalizing variables
+ def killProcessAndFinalize(state: DriverState.DriverState, e:
Exception): Unit = {
+ killProcess()
finalState = Some(state)
+ finalException = Some(e)
+ }
+ }
+
+ workerThread.start()
+ }
- worker.send(DriverStateChanged(driverId, state, finalException))
+ /** Kill driver process and wait for it to exit. */
+ private def killProcess(): Unit = {
+ if (process != null) {
+ logInfo("Killing driver process!")
+ val exitCode = Utils.terminateProcess(process,
DRIVER_TERMINATE_TIMEOUT_MS)
+ if (exitCode.isEmpty) {
+ logWarning("Failed to terminate driver process: " + process +
+ ". This process will likely be orphaned.")
}
- }.start()
+ }
}
- /** Terminate this driver (or prevent it from ever starting if not yet
started) */
- private[worker] def kill() {
- synchronized {
- process.foreach(p => p.destroy())
- killed = true
+ /** Stop this driver, including the process it launched */
+ private[worker] def kill(): Unit = {
+ if (workerThread != null) {
+ // make sure process does not start if being interrupted
+ this.synchronized {
+ // the workerThread will kill the child process when interrupted
+ workerThread.interrupt()
+ workerThread.join()
--- End diff --
> There is a dead lock. If you join with a lock here, workerThread won't be
able to get the lock. Then if it happens to reach the synchronized line, it
will block forever.
I thought that might be the case, so I forced this condition to happen and
the workerThread was still able to interrupt when waiting for the lock. I can
investigate this more though..
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]