This is an automated email from the ASF dual-hosted git repository.

style95 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/openwhisk.git


The following commit(s) were added to refs/heads/master by this push:
     new f03cbcc4a Fail all activations when it fails to pull a blackbox image. 
(#5270)
f03cbcc4a is described below

commit f03cbcc4ab6ff2d041b5e83b243b48afa4c81381
Author: Dominic Kim <[email protected]>
AuthorDate: Fri Jul 15 14:27:34 2022 +0900

    Fail all activations when it fails to pull a blackbox image. (#5270)
    
    * Fail all activations if it fails to pull images for blackbox actions.
    
    * Apply scalaFmt.
    
    * Remove 127 from the case.
---
 .../org/apache/openwhisk/http/ErrorResponse.scala  |  2 ++
 .../core/containerpool/docker/DockerClient.scala   | 10 +++++--
 .../containerpool/docker/DockerContainer.scala     |  5 +++-
 .../kubernetes/KubernetesClient.scala              | 32 ++++++++++++++++++++--
 .../kubernetes/KubernetesContainer.scala           |  4 +++
 .../standalone/StandaloneDockerSupport.scala       |  2 +-
 .../openwhisk/common/etcd/EtcdConfigTests.scala    |  7 +----
 .../docker/test/DockerClientTests.scala            | 13 ++++++++-
 8 files changed, 61 insertions(+), 14 deletions(-)

diff --git 
a/common/scala/src/main/scala/org/apache/openwhisk/http/ErrorResponse.scala 
b/common/scala/src/main/scala/org/apache/openwhisk/http/ErrorResponse.scala
index c5e3f82af..35af54fa8 100644
--- a/common/scala/src/main/scala/org/apache/openwhisk/http/ErrorResponse.scala
+++ b/common/scala/src/main/scala/org/apache/openwhisk/http/ErrorResponse.scala
@@ -235,6 +235,8 @@ object Messages {
   /** Indicates that the image could not be pulled. */
   def imagePullError(image: String) = s"Failed to pull container image 
'$image'."
 
+  def commandNotFoundError = "executable file not found"
+
   /** Indicates that the container for the action could not be started. */
   val resourceProvisionError = "Failed to provision resources to run the 
action."
 
diff --git 
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerClient.scala
 
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerClient.scala
index 4efdc2d6d..2d40a0bd4 100644
--- 
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerClient.scala
+++ 
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerClient.scala
@@ -147,11 +147,15 @@ class DockerClient(dockerHost: Option[String] = None,
           // Examples:
           // - Unrecognized option specified
           // - Not enough disk space
-          case pre: ProcessUnsuccessfulException if pre.exitStatus == 
ExitStatus(125) =>
+          // Exit status 127 means an error that container command cannot be 
found.
+          // Examples:
+          // - executable file not found in $PATH": unknown
+          case pre: ProcessUnsuccessfulException
+              if pre.exitStatus == ExitStatus(125) || pre.exitStatus == 
ExitStatus(127) =>
             Future.failed(
               DockerContainerId
                 .parse(pre.stdout)
-                .map(BrokenDockerContainer(_, s"Broken container: 
${pre.getMessage}"))
+                .map(BrokenDockerContainer(_, s"Broken container: 
${pre.getMessage}", Some(pre.exitStatus.statusValue)))
                 .getOrElse(pre))
         }
     }
@@ -296,4 +300,4 @@ trait DockerApi {
 }
 
 /** Indicates any error while starting a container that leaves a broken 
container behind that needs to be removed */
-case class BrokenDockerContainer(id: ContainerId, msg: String) extends 
Exception(msg)
+case class BrokenDockerContainer(id: ContainerId, msg: String, existStatus: 
Option[Int] = None) extends Exception(msg)
diff --git 
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerContainer.scala
 
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerContainer.scala
index ca0df62af..3e834be61 100644
--- 
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerContainer.scala
+++ 
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/docker/DockerContainer.scala
@@ -127,11 +127,14 @@ object DockerContainer {
     for {
       pullSuccessful <- pulled
       id <- docker.run(imageToUse, args).recoverWith {
-        case BrokenDockerContainer(brokenId, _) =>
+        case BrokenDockerContainer(brokenId, _, exitStatus) if 
exitStatus.isEmpty || exitStatus.contains(125) =>
           // Remove the broken container - but don't wait or check for the 
result.
           // If the removal fails, there is nothing we could do to recover 
from the recovery.
           docker.rm(brokenId)
           
Future.failed(WhiskContainerStartupError(Messages.resourceProvisionError))
+        case BrokenDockerContainer(brokenId, _, exitStatus) if 
exitStatus.contains(127) =>
+          docker.rm(brokenId)
+          
Future.failed(BlackboxStartupError(s"${Messages.commandNotFoundError} in image 
${imageToUse}"))
         case _ =>
           // Iff the pull was successful, we assume that the error is not due 
to an image pull error, otherwise
           // the docker run was a backup measure to try and start the 
container anyway. If it fails again, we assume
diff --git 
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
 
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
index 9dd588eda..ce5a6559b 100644
--- 
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
+++ 
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
@@ -22,7 +22,6 @@ import java.net.SocketTimeoutException
 import java.time.format.DateTimeFormatterBuilder
 import java.time.temporal.ChronoField
 import java.time.{Instant, ZoneId}
-
 import akka.actor.ActorSystem
 import akka.event.Logging.ErrorLevel
 import akka.event.Logging.InfoLevel
@@ -48,6 +47,7 @@ import 
org.apache.openwhisk.core.containerpool.docker.ProcessRunner
 import org.apache.openwhisk.core.containerpool.{ContainerAddress, ContainerId}
 import org.apache.openwhisk.core.entity.ByteSize
 import org.apache.openwhisk.core.entity.size._
+import org.apache.openwhisk.http.Messages
 import pureconfig._
 import pureconfig.generic.auto._
 import spray.json.DefaultJsonProtocol._
@@ -82,6 +82,16 @@ case class KubernetesEphemeralStorageConfig(limit: ByteSize)
 case class KubernetesPodReadyTimeoutException(timeout: FiniteDuration)
     extends Exception(s"Pod readiness timed out after ${timeout.toSeconds}s")
 
+/**
+ * Exception to indicate it failed to pull an image for blackbox actions.
+ */
+case class KubernetesImagePullFailedException(msg: String) extends 
Exception(msg)
+
+/**
+ * Exception to indicate the command for an image is not found.
+ */
+case class KubernetesImageCommandNotFoundException(msg: String) extends 
Exception(msg)
+
 /**
  * Exception to indicate a pod could not be created at the apiserver.
  */
@@ -132,6 +142,8 @@ class KubernetesClient(
     new DefaultKubernetesClient(configBuilder.build())
   }
 
+  private val imagePullFailedMsgs = Set("ImagePullBackOff", "ErrImagePull")
+
   private val podBuilder = new WhiskPodBuilder(kubeRestClient, config)
 
   def run(name: String,
@@ -321,7 +333,23 @@ class KubernetesClient(
       .withName(pod.getMetadata.getName)
     val deadline = deadlineOpt.getOrElse((timeout - 
(System.currentTimeMillis() - start.toEpochMilli).millis).fromNow)
     if (!readyPod.isReady) {
-      if (deadline.isOverdue()) {
+      // when action pod is failed while pulling images, we need to let users 
know that
+      val imagePullErr = Try {
+        readyPod.get.getStatus.getContainerStatuses.asScala.exists { status =>
+          imagePullFailedMsgs.contains(status.getState.getWaiting.getReason)
+        }
+      } getOrElse false
+      // when command not found in image, we need to let users know that as 
well
+      val commandNotFoundErr = Try {
+        readyPod.get.getStatus.getContainerStatuses.asScala.exists { status =>
+          
status.getState.getWaiting.getMessage.contains(Messages.commandNotFoundError)
+        }
+      } getOrElse false
+      if (imagePullErr) {
+        Future.failed(KubernetesImagePullFailedException(s"Failed to pull 
image for pod ${pod.getMetadata.getName}"))
+      } else if (commandNotFoundErr) {
+        
Future.failed(KubernetesImageCommandNotFoundException(Messages.commandNotFoundError))
+      } else if (deadline.isOverdue()) {
         Future.failed(KubernetesPodReadyTimeoutException(timeout))
       } else {
         after(1.seconds, scheduler) {
diff --git 
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
 
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
index 8c06a00f2..51b4c87a2 100644
--- 
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
+++ 
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
@@ -74,6 +74,10 @@ object KubernetesContainer {
         case e: KubernetesPodApiException =>
           //apiserver call failed - this will expose a different error to users
           cleanupFailedPod(e, podName, 
WhiskContainerStartupError(Messages.resourceProvisionError))
+        case e: KubernetesImagePullFailedException =>
+          cleanupFailedPod(e, podName, 
BlackboxStartupError(Messages.imagePullError(image)))
+        case e: KubernetesImageCommandNotFoundException =>
+          cleanupFailedPod(e, podName, 
BlackboxStartupError(s"${Messages.commandNotFoundError} in image ${image}"))
         case e: Throwable =>
           cleanupFailedPod(e, podName, WhiskContainerStartupError(s"Failed to 
run container with image '${image}'."))
       }
diff --git 
a/core/standalone/src/main/scala/org/apache/openwhisk/standalone/StandaloneDockerSupport.scala
 
b/core/standalone/src/main/scala/org/apache/openwhisk/standalone/StandaloneDockerSupport.scala
index 8388fff3f..697410015 100644
--- 
a/core/standalone/src/main/scala/org/apache/openwhisk/standalone/StandaloneDockerSupport.scala
+++ 
b/core/standalone/src/main/scala/org/apache/openwhisk/standalone/StandaloneDockerSupport.scala
@@ -221,7 +221,7 @@ class StandaloneDockerClient(pullDisabled: 
Boolean)(implicit log: Logging, as: A
     for {
       _ <- if (shouldPull) pull(image) else Future.successful(())
       id <- run(image, args).recoverWith {
-        case t @ BrokenDockerContainer(brokenId, _) =>
+        case t @ BrokenDockerContainer(brokenId, _, _) =>
           // Remove the broken container - but don't wait or check for the 
result.
           // If the removal fails, there is nothing we could do to recover 
from the recovery.
           rm(brokenId)
diff --git 
a/tests/src/test/scala/org/apache/openwhisk/common/etcd/EtcdConfigTests.scala 
b/tests/src/test/scala/org/apache/openwhisk/common/etcd/EtcdConfigTests.scala
index feef4bdff..c53608ccf 100644
--- 
a/tests/src/test/scala/org/apache/openwhisk/common/etcd/EtcdConfigTests.scala
+++ 
b/tests/src/test/scala/org/apache/openwhisk/common/etcd/EtcdConfigTests.scala
@@ -7,13 +7,8 @@ import org.scalatest.{FlatSpec, Matchers}
 import org.scalatest.junit.JUnitRunner
 
 import scala.concurrent.ExecutionContextExecutor
-
-
 @RunWith(classOf[JUnitRunner])
-class EtcdConfigTests
-  extends FlatSpec
-    with Matchers
-    with WskActorSystem {
+class EtcdConfigTests extends FlatSpec with Matchers with WskActorSystem {
   behavior of "EtcdConfig"
 
   implicit val ece: ExecutionContextExecutor = actorSystem.dispatcher
diff --git 
a/tests/src/test/scala/org/apache/openwhisk/core/containerpool/docker/test/DockerClientTests.scala
 
b/tests/src/test/scala/org/apache/openwhisk/core/containerpool/docker/test/DockerClientTests.scala
index 7de2a35cf..d08e8fa37 100644
--- 
a/tests/src/test/scala/org/apache/openwhisk/core/containerpool/docker/test/DockerClientTests.scala
+++ 
b/tests/src/test/scala/org/apache/openwhisk/core/containerpool/docker/test/DockerClientTests.scala
@@ -358,7 +358,6 @@ class DockerClientTests
     }
 
     Seq[(ProcessRunningException, String)](
-      (ProcessUnsuccessfulException(ExitStatus(127), id.asString, "Unknown 
command"), "Exit code not 125"),
       (ProcessUnsuccessfulException(ExitStatus(125), "", "Unknown flag: 
--foo"), "No container ID"),
       (ProcessUnsuccessfulException(ExitStatus(1), "", ""), "Exit code not 125 
and no container ID"),
       (ProcessTimeoutException(1.second, ExitStatus(125), id.asString, ""), 
"Timeout instead of unsuccessful command"))
@@ -367,6 +366,18 @@ class DockerClientTests
       }
   }
 
+  it should "fail with BrokenDockerContainer when run returns with exit status 
127 and a container ID" in {
+    val dc = dockerClient {
+      Future.failed(
+        ProcessUnsuccessfulException(
+          exitStatus = ExitStatus(127),
+          stdout = id.asString,
+          stderr = """Error response from daemon: OCI runtime create failed: 
executable file not found"""))
+    }
+    val bdc = the[BrokenDockerContainer] thrownBy await(dc.run("image", 
Seq.empty))
+    bdc.id shouldBe id
+  }
+
   it should "fail with ProcessTimeoutException when command times out" in {
     val expectedPTE =
       ProcessTimeoutException(timeout = 10.seconds, exitStatus = 
ExitStatus(143), stdout = "stdout", stderr = "stderr")

Reply via email to