Github user andrewor14 commented on a diff in the pull request:
https://github.com/apache/spark/pull/8007#discussion_r38607895
--- Diff:
yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala ---
@@ -440,22 +454,43 @@ private[yarn] class YarnAllocator(
// Hadoop 2.2.X added a ContainerExitStatus we should switch to use
// there are some exit status' we shouldn't necessarily count
against us, but for
// now I think its ok as none of the containers are expected to
exit
- if (completedContainer.getExitStatus ==
ContainerExitStatus.PREEMPTED) {
- logInfo("Container preempted: " + containerId)
- } else if (completedContainer.getExitStatus == -103) { // vmem
limit exceeded
- logWarning(memLimitExceededLogMessage(
+ var isNormalExit = false
+ var containerExitReason = "Container exited for an unknown reason."
+ val exitStatus = completedContainer.getExitStatus
+ if (exitStatus == ContainerExitStatus.PREEMPTED) {
+ isNormalExit = true
+ containerExitReason = s"Container $containerId was preempted."
+ logInfo(containerExitReason)
+ } else if (exitStatus == -103) { // vmem limit exceeded
+ // Should probably still count these towards task failures
+ containerExitReason = memLimitExceededLogMessage(
completedContainer.getDiagnostics,
- VMEM_EXCEEDED_PATTERN))
- } else if (completedContainer.getExitStatus == -104) { // pmem
limit exceeded
- logWarning(memLimitExceededLogMessage(
+ VMEM_EXCEEDED_PATTERN)
+ logWarning(containerExitReason)
+ } else if (exitStatus == -104) { // pmem limit exceeded
+ // Should probably still count these towards task failures
+ containerExitReason = memLimitExceededLogMessage(
completedContainer.getDiagnostics,
- PMEM_EXCEEDED_PATTERN))
- } else if (completedContainer.getExitStatus != 0) {
+ PMEM_EXCEEDED_PATTERN)
+ logWarning(containerExitReason)
+ } else if (exitStatus != 0) {
logInfo("Container marked as failed: " + containerId +
". Exit status: " + completedContainer.getExitStatus +
". Diagnostics: " + completedContainer.getDiagnostics)
numExecutorsFailed += 1
+ containerExitReason = s"Container $containerId exited abnormally
with exit" +
+ s" status $exitStatus, and was marked as failed."
+ }
+
+ if (exitStatus == 0) {
+ ExecutorExited(0, isNormalExit = true,
+ s"Executor for container $containerId exited normally.")
+ } else {
+ ExecutorExited(completedContainer.getExitStatus, isNormalExit,
containerExitReason)
}
+ } else {
+ ExecutorExited(completedContainer.getExitStatus, isNormalExit =
true,
+ s"Container $containerId exited from explicit termination
request.")
--- End diff --
can you also add a comment here:
```
// If we have already released this container, then it must mean
// that the driver has explicitly requested it to be killed
```
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]