This is an automated email from the ASF dual-hosted git repository.

tysonnorris pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/openwhisk.git


The following commit(s) were added to refs/heads/master by this push:
     new 608c7d5  On an error from kubernetes api server (whether a response 
error, or some exception from fabric8 client, doesn't matter), attempt to 
delete the pod which may or may not have been created. (#4961)
608c7d5 is described below

commit 608c7d55bf17cf59ae6429622974a0d2ea452b59
Author: tysonnorris <[email protected]>
AuthorDate: Tue Sep 22 15:06:35 2020 -0700

    On an error from kubernetes api server (whether a response error, or some 
exception from fabric8 client, doesn't matter), attempt to delete the pod which 
may or may not have been created. (#4961)
---
 .../kubernetes/KubernetesClient.scala              | 10 ++++-
 .../kubernetes/KubernetesContainer.scala           | 33 ++++++++++-------
 .../kubernetes/test/KubernetesContainerTests.scala | 43 +++++++++++++++++++++-
 3 files changed, 70 insertions(+), 16 deletions(-)

diff --git 
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
 
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
index 57b14be..ca96dd4 100644
--- 
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
+++ 
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
@@ -40,6 +40,7 @@ import io.fabric8.kubernetes.client.utils.Serialization
 import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient}
 import okhttp3.{Call, Callback, Request, Response}
 import okio.BufferedSource
+import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.openwhisk.common.LoggingMarkers
 import org.apache.openwhisk.common.{ConfigMapValue, Logging, TransactionId}
 import org.apache.openwhisk.core.ConfigKeys
@@ -84,7 +85,7 @@ case class KubernetesPodReadyTimeoutException(timeout: 
FiniteDuration)
 /**
  * Exception to indicate a pod could not be created at the apiserver.
  */
-case class KubernetesPodApiException(e: Throwable) extends Exception(s"Pod was 
not created at apiserver: ${e}")
+case class KubernetesPodApiException(e: Throwable) extends Exception(s"Pod was 
not created at apiserver: ${e}", e)
 
 /**
  * Configuration for node affinity for the pods that execute user action 
containers
@@ -164,7 +165,12 @@ class KubernetesClient(
     } match {
       case Failure(e) =>
         //call to api-server failed
-        transid.failed(this, start, s"Failed create pod for '$name': 
${e.getClass} - ${e.getMessage}", ErrorLevel)
+        val stackTrace = ExceptionUtils.getStackTrace(e)
+        transid.failed(
+          this,
+          start,
+          s"Failed create pod for '$name': ${e.getClass} (Caused by: 
${e.getCause}) - ${e.getMessage}; stacktrace: $stackTrace",
+          ErrorLevel)
         Future.failed(KubernetesPodApiException(e))
       case Success(createdPod) => {
         //call to api-server succeeded; wait for the pod to become ready; 
catch any failure to end the transaction timer
diff --git 
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
 
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
index 0e23470..8c06a00 100644
--- 
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
+++ 
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
@@ -71,23 +71,30 @@ object KubernetesContainer {
 
     for {
       container <- kubernetes.run(podName, image, memory, environment, 
labels).recoverWith {
-        case _: KubernetesPodApiException =>
+        case e: KubernetesPodApiException =>
           //apiserver call failed - this will expose a different error to users
-          
Future.failed(WhiskContainerStartupError(Messages.resourceProvisionError))
-        case _ =>
-          kubernetes
-            .rm(podName)
-            .andThen {
-              case Failure(e) =>
-                log.error(this, s"Failed delete pod for '$name': ${e.getClass} 
- ${e.getMessage}")
-            }
-            .transformWith { _ =>
-              Future.failed(WhiskContainerStartupError(s"Failed to run 
container with image '${image}'."))
-            }
+          cleanupFailedPod(e, podName, 
WhiskContainerStartupError(Messages.resourceProvisionError))
+        case e: Throwable =>
+          cleanupFailedPod(e, podName, WhiskContainerStartupError(s"Failed to 
run container with image '${image}'."))
       }
     } yield container
   }
-
+  private def cleanupFailedPod(e: Throwable, podName: String, failureCause: 
Exception)(
+    implicit kubernetes: KubernetesApi,
+    ec: ExecutionContext,
+    tid: TransactionId,
+    log: Logging) = {
+    log.info(this, s"Deleting failed pod '$podName' after: ${e.getClass} - 
${e.getMessage}")
+    kubernetes
+      .rm(podName)
+      .andThen {
+        case Failure(e) =>
+          log.error(this, s"Failed delete pod for '$podName': ${e.getClass} - 
${e.getMessage}")
+      }
+      .transformWith { _ =>
+        Future.failed(failureCause)
+      }
+  }
 }
 
 /**
diff --git 
a/tests/src/test/scala/org/apache/openwhisk/core/containerpool/kubernetes/test/KubernetesContainerTests.scala
 
b/tests/src/test/scala/org/apache/openwhisk/core/containerpool/kubernetes/test/KubernetesContainerTests.scala
index 0f331d3..0959193 100644
--- 
a/tests/src/test/scala/org/apache/openwhisk/core/containerpool/kubernetes/test/KubernetesContainerTests.scala
+++ 
b/tests/src/test/scala/org/apache/openwhisk/core/containerpool/kubernetes/test/KubernetesContainerTests.scala
@@ -50,6 +50,7 @@ import 
org.apache.openwhisk.core.entity.ActivationResponse.Timeout
 import org.apache.openwhisk.core.entity.size._
 import org.apache.openwhisk.http.Messages
 import 
org.apache.openwhisk.core.containerpool.docker.test.DockerContainerTests._
+import org.scalatest.concurrent.ScalaFutures
 
 import scala.collection.immutable.Queue
 import scala.collection.mutable
@@ -65,7 +66,8 @@ class KubernetesContainerTests
     with StreamLogging
     with BeforeAndAfterEach
     with WskActorSystem
-    with TimingHelpers {
+    with TimingHelpers
+    with ScalaFutures {
 
   import KubernetesClientTests.TestKubernetesClient
   import KubernetesContainerTests._
@@ -483,6 +485,45 @@ class KubernetesContainerTests
     processedLogsFalse(0) shouldBe expectedLogEntry.rawString
   }
 
+  it should "delete a pod that failed to start due to 
KubernetesPodApiException" in {
+    implicit val kubernetes = new TestKubernetesClient {
+      override def run(
+        name: String,
+        image: String,
+        memory: ByteSize = 256.MB,
+        env: Map[String, String] = Map.empty,
+        labels: Map[String, String] = Map.empty)(implicit transid: 
TransactionId): Future[KubernetesContainer] = {
+        Future.failed(KubernetesPodApiException(new Exception("faking fabric8 
failure...")))
+      }
+    }
+
+    val container =
+      KubernetesContainer.create(transid = transid, name = "name", image = 
"image", userProvidedImage = true)
+    container.failed.futureValue shouldBe 
WhiskContainerStartupError(Messages.resourceProvisionError)
+
+    kubernetes.runs should have size 0
+    kubernetes.rms should have size 1
+  }
+
+  it should "delete a pod that failed to start due to some other Exception" in 
{
+    implicit val kubernetes = new TestKubernetesClient {
+      override def run(
+        name: String,
+        image: String,
+        memory: ByteSize = 256.MB,
+        env: Map[String, String] = Map.empty,
+        labels: Map[String, String] = Map.empty)(implicit transid: 
TransactionId): Future[KubernetesContainer] = {
+        Future.failed(new Exception("faking fabric8 failure..."))
+      }
+    }
+
+    val container =
+      KubernetesContainer.create(transid = transid, name = "name", image = 
"image", userProvidedImage = true)
+    container.failed.futureValue shouldBe a[WhiskContainerStartupError]
+
+    kubernetes.runs should have size 0
+    kubernetes.rms should have size 1
+  }
   def currentTsp: Instant = Instant.now
 
 }

Reply via email to