This is an automated email from the ASF dual-hosted git repository.
style95 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/openwhisk.git
The following commit(s) were added to refs/heads/master by this push:
new f03cbcc4a Fail all activations when it fails to pull a blackbox image.
(#5270)
f03cbcc4a is described below
commit f03cbcc4ab6ff2d041b5e83b243b48afa4c81381
Author: Dominic Kim <[email protected]>
AuthorDate: Fri Jul 15 14:27:34 2022 +0900
Fail all activations when it fails to pull a blackbox image. (#5270)
* Fail all activations if it fails to pull images for blackbox actions.
* Apply scalaFmt.
* Remove 127 from the case.
---
.../org/apache/openwhisk/http/ErrorResponse.scala | 2 ++
.../core/containerpool/docker/DockerClient.scala | 10 +++++--
.../containerpool/docker/DockerContainer.scala | 5 +++-
.../kubernetes/KubernetesClient.scala | 32 ++++++++++++++++++++--
.../kubernetes/KubernetesContainer.scala | 4 +++
.../standalone/StandaloneDockerSupport.scala | 2 +-
.../openwhisk/common/etcd/EtcdConfigTests.scala | 7 +----
.../docker/test/DockerClientTests.scala | 13 ++++++++-
8 files changed, 61 insertions(+), 14 deletions(-)
diff --git
a/common/scala/src/main/scala/org/apache/openwhisk/http/ErrorResponse.scala
b/common/scala/src/main/scala/org/apache/openwhisk/http/ErrorResponse.scala
index c5e3f82af..35af54fa8 100644
--- a/common/scala/src/main/scala/org/apache/openwhisk/http/ErrorResponse.scala
+++ b/common/scala/src/main/scala/org/apache/openwhisk/http/ErrorResponse.scala
@@ -235,6 +235,8 @@ object Messages {
/** Indicates that the image could not be pulled. */
def imagePullError(image: String) = s"Failed to pull container image
'$image'."
+ def commandNotFoundError = "executable file not found"
+
/** Indicates that the container for the action could not be started. */
val resourceProvisionError = "Failed to provision resources to run the
action."
diff --git
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerClient.scala
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerClient.scala
index 4efdc2d6d..2d40a0bd4 100644
---
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerClient.scala
+++
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerClient.scala
@@ -147,11 +147,15 @@ class DockerClient(dockerHost: Option[String] = None,
// Examples:
// - Unrecognized option specified
// - Not enough disk space
- case pre: ProcessUnsuccessfulException if pre.exitStatus ==
ExitStatus(125) =>
+ // Exit status 127 means an error that container command cannot be
found.
+ // Examples:
+ // - executable file not found in $PATH": unknown
+ case pre: ProcessUnsuccessfulException
+ if pre.exitStatus == ExitStatus(125) || pre.exitStatus ==
ExitStatus(127) =>
Future.failed(
DockerContainerId
.parse(pre.stdout)
- .map(BrokenDockerContainer(_, s"Broken container:
${pre.getMessage}"))
+ .map(BrokenDockerContainer(_, s"Broken container:
${pre.getMessage}", Some(pre.exitStatus.statusValue)))
.getOrElse(pre))
}
}
@@ -296,4 +300,4 @@ trait DockerApi {
}
/** Indicates any error while starting a container that leaves a broken
container behind that needs to be removed */
-case class BrokenDockerContainer(id: ContainerId, msg: String) extends
Exception(msg)
+case class BrokenDockerContainer(id: ContainerId, msg: String, existStatus:
Option[Int] = None) extends Exception(msg)
diff --git
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerContainer.scala
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerContainer.scala
index ca0df62af..3e834be61 100644
---
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerContainer.scala
+++
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerContainer.scala
@@ -127,11 +127,14 @@ object DockerContainer {
for {
pullSuccessful <- pulled
id <- docker.run(imageToUse, args).recoverWith {
- case BrokenDockerContainer(brokenId, _) =>
+ case BrokenDockerContainer(brokenId, _, exitStatus) if
exitStatus.isEmpty || exitStatus.contains(125) =>
// Remove the broken container - but don't wait or check for the
result.
// If the removal fails, there is nothing we could do to recover
from the recovery.
docker.rm(brokenId)
Future.failed(WhiskContainerStartupError(Messages.resourceProvisionError))
+ case BrokenDockerContainer(brokenId, _, exitStatus) if
exitStatus.contains(127) =>
+ docker.rm(brokenId)
+
Future.failed(BlackboxStartupError(s"${Messages.commandNotFoundError} in image
${imageToUse}"))
case _ =>
// Iff the pull was successful, we assume that the error is not due
to an image pull error, otherwise
// the docker run was a backup measure to try and start the
container anyway. If it fails again, we assume
diff --git
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
index 9dd588eda..ce5a6559b 100644
---
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
+++
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
@@ -22,7 +22,6 @@ import java.net.SocketTimeoutException
import java.time.format.DateTimeFormatterBuilder
import java.time.temporal.ChronoField
import java.time.{Instant, ZoneId}
-
import akka.actor.ActorSystem
import akka.event.Logging.ErrorLevel
import akka.event.Logging.InfoLevel
@@ -48,6 +47,7 @@ import
org.apache.openwhisk.core.containerpool.docker.ProcessRunner
import org.apache.openwhisk.core.containerpool.{ContainerAddress, ContainerId}
import org.apache.openwhisk.core.entity.ByteSize
import org.apache.openwhisk.core.entity.size._
+import org.apache.openwhisk.http.Messages
import pureconfig._
import pureconfig.generic.auto._
import spray.json.DefaultJsonProtocol._
@@ -82,6 +82,16 @@ case class KubernetesEphemeralStorageConfig(limit: ByteSize)
case class KubernetesPodReadyTimeoutException(timeout: FiniteDuration)
extends Exception(s"Pod readiness timed out after ${timeout.toSeconds}s")
+/**
+ * Exception to indicate it failed to pull an image for blackbox actions.
+ */
+case class KubernetesImagePullFailedException(msg: String) extends
Exception(msg)
+
+/**
+ * Exception to indicate the command for an image is not found.
+ */
+case class KubernetesImageCommandNotFoundException(msg: String) extends
Exception(msg)
+
/**
* Exception to indicate a pod could not be created at the apiserver.
*/
@@ -132,6 +142,8 @@ class KubernetesClient(
new DefaultKubernetesClient(configBuilder.build())
}
+ private val imagePullFailedMsgs = Set("ImagePullBackOff", "ErrImagePull")
+
private val podBuilder = new WhiskPodBuilder(kubeRestClient, config)
def run(name: String,
@@ -321,7 +333,23 @@ class KubernetesClient(
.withName(pod.getMetadata.getName)
val deadline = deadlineOpt.getOrElse((timeout -
(System.currentTimeMillis() - start.toEpochMilli).millis).fromNow)
if (!readyPod.isReady) {
- if (deadline.isOverdue()) {
+ // when action pod is failed while pulling images, we need to let users
know that
+ val imagePullErr = Try {
+ readyPod.get.getStatus.getContainerStatuses.asScala.exists { status =>
+ imagePullFailedMsgs.contains(status.getState.getWaiting.getReason)
+ }
+ } getOrElse false
+ // when command not found in image, we need to let users know that as
well
+ val commandNotFoundErr = Try {
+ readyPod.get.getStatus.getContainerStatuses.asScala.exists { status =>
+
status.getState.getWaiting.getMessage.contains(Messages.commandNotFoundError)
+ }
+ } getOrElse false
+ if (imagePullErr) {
+ Future.failed(KubernetesImagePullFailedException(s"Failed to pull
image for pod ${pod.getMetadata.getName}"))
+ } else if (commandNotFoundErr) {
+
Future.failed(KubernetesImageCommandNotFoundException(Messages.commandNotFoundError))
+ } else if (deadline.isOverdue()) {
Future.failed(KubernetesPodReadyTimeoutException(timeout))
} else {
after(1.seconds, scheduler) {
diff --git
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
index 8c06a00f2..51b4c87a2 100644
---
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
+++
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
@@ -74,6 +74,10 @@ object KubernetesContainer {
case e: KubernetesPodApiException =>
//apiserver call failed - this will expose a different error to users
cleanupFailedPod(e, podName,
WhiskContainerStartupError(Messages.resourceProvisionError))
+ case e: KubernetesImagePullFailedException =>
+ cleanupFailedPod(e, podName,
BlackboxStartupError(Messages.imagePullError(image)))
+ case e: KubernetesImageCommandNotFoundException =>
+ cleanupFailedPod(e, podName,
BlackboxStartupError(s"${Messages.commandNotFoundError} in image ${image}"))
case e: Throwable =>
cleanupFailedPod(e, podName, WhiskContainerStartupError(s"Failed to
run container with image '${image}'."))
}
diff --git
a/core/standalone/src/main/scala/org/apache/openwhisk/standalone/StandaloneDockerSupport.scala
b/core/standalone/src/main/scala/org/apache/openwhisk/standalone/StandaloneDockerSupport.scala
index 8388fff3f..697410015 100644
---
a/core/standalone/src/main/scala/org/apache/openwhisk/standalone/StandaloneDockerSupport.scala
+++
b/core/standalone/src/main/scala/org/apache/openwhisk/standalone/StandaloneDockerSupport.scala
@@ -221,7 +221,7 @@ class StandaloneDockerClient(pullDisabled:
Boolean)(implicit log: Logging, as: A
for {
_ <- if (shouldPull) pull(image) else Future.successful(())
id <- run(image, args).recoverWith {
- case t @ BrokenDockerContainer(brokenId, _) =>
+ case t @ BrokenDockerContainer(brokenId, _, _) =>
// Remove the broken container - but don't wait or check for the
result.
// If the removal fails, there is nothing we could do to recover
from the recovery.
rm(brokenId)
diff --git
a/tests/src/test/scala/org/apache/openwhisk/common/etcd/EtcdConfigTests.scala
b/tests/src/test/scala/org/apache/openwhisk/common/etcd/EtcdConfigTests.scala
index feef4bdff..c53608ccf 100644
---
a/tests/src/test/scala/org/apache/openwhisk/common/etcd/EtcdConfigTests.scala
+++
b/tests/src/test/scala/org/apache/openwhisk/common/etcd/EtcdConfigTests.scala
@@ -7,13 +7,8 @@ import org.scalatest.{FlatSpec, Matchers}
import org.scalatest.junit.JUnitRunner
import scala.concurrent.ExecutionContextExecutor
-
-
@RunWith(classOf[JUnitRunner])
-class EtcdConfigTests
- extends FlatSpec
- with Matchers
- with WskActorSystem {
+class EtcdConfigTests extends FlatSpec with Matchers with WskActorSystem {
behavior of "EtcdConfig"
implicit val ece: ExecutionContextExecutor = actorSystem.dispatcher
diff --git
a/tests/src/test/scala/org/apache/openwhisk/core/containerpool/docker/test/DockerClientTests.scala
b/tests/src/test/scala/org/apache/openwhisk/core/containerpool/docker/test/DockerClientTests.scala
index 7de2a35cf..d08e8fa37 100644
---
a/tests/src/test/scala/org/apache/openwhisk/core/containerpool/docker/test/DockerClientTests.scala
+++
b/tests/src/test/scala/org/apache/openwhisk/core/containerpool/docker/test/DockerClientTests.scala
@@ -358,7 +358,6 @@ class DockerClientTests
}
Seq[(ProcessRunningException, String)](
- (ProcessUnsuccessfulException(ExitStatus(127), id.asString, "Unknown
command"), "Exit code not 125"),
(ProcessUnsuccessfulException(ExitStatus(125), "", "Unknown flag:
--foo"), "No container ID"),
(ProcessUnsuccessfulException(ExitStatus(1), "", ""), "Exit code not 125
and no container ID"),
(ProcessTimeoutException(1.second, ExitStatus(125), id.asString, ""),
"Timeout instead of unsuccessful command"))
@@ -367,6 +366,18 @@ class DockerClientTests
}
}
+ it should "fail with BrokenDockerContainer when run returns with exit status
127 and a container ID" in {
+ val dc = dockerClient {
+ Future.failed(
+ ProcessUnsuccessfulException(
+ exitStatus = ExitStatus(127),
+ stdout = id.asString,
+ stderr = """Error response from daemon: OCI runtime create failed:
executable file not found"""))
+ }
+ val bdc = the[BrokenDockerContainer] thrownBy await(dc.run("image",
Seq.empty))
+ bdc.id shouldBe id
+ }
+
it should "fail with ProcessTimeoutException when command times out" in {
val expectedPTE =
ProcessTimeoutException(timeout = 10.seconds, exitStatus =
ExitStatus(143), stdout = "stdout", stderr = "stderr")