This is an automated email from the ASF dual-hosted git repository.
tysonnorris pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/openwhisk.git
The following commit(s) were added to refs/heads/master by this push:
new 608c7d5 On an error from kubernetes api server (whether a response
error, or some exception from fabric8 client, doesn't matter), attempt to
delete the pod which may or may not have been created. (#4961)
608c7d5 is described below
commit 608c7d55bf17cf59ae6429622974a0d2ea452b59
Author: tysonnorris <[email protected]>
AuthorDate: Tue Sep 22 15:06:35 2020 -0700
On an error from kubernetes api server (whether a response error, or some
exception from fabric8 client, doesn't matter), attempt to delete the pod which
may or may not have been created. (#4961)
---
.../kubernetes/KubernetesClient.scala | 10 ++++-
.../kubernetes/KubernetesContainer.scala | 33 ++++++++++-------
.../kubernetes/test/KubernetesContainerTests.scala | 43 +++++++++++++++++++++-
3 files changed, 70 insertions(+), 16 deletions(-)
diff --git
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
index 57b14be..ca96dd4 100644
---
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
+++
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesClient.scala
@@ -40,6 +40,7 @@ import io.fabric8.kubernetes.client.utils.Serialization
import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient}
import okhttp3.{Call, Callback, Request, Response}
import okio.BufferedSource
+import org.apache.commons.lang3.exception.ExceptionUtils
import org.apache.openwhisk.common.LoggingMarkers
import org.apache.openwhisk.common.{ConfigMapValue, Logging, TransactionId}
import org.apache.openwhisk.core.ConfigKeys
@@ -84,7 +85,7 @@ case class KubernetesPodReadyTimeoutException(timeout:
FiniteDuration)
/**
* Exception to indicate a pod could not be created at the apiserver.
*/
-case class KubernetesPodApiException(e: Throwable) extends Exception(s"Pod was
not created at apiserver: ${e}")
+case class KubernetesPodApiException(e: Throwable) extends Exception(s"Pod was
not created at apiserver: ${e}", e)
/**
* Configuration for node affinity for the pods that execute user action
containers
@@ -164,7 +165,12 @@ class KubernetesClient(
} match {
case Failure(e) =>
//call to api-server failed
- transid.failed(this, start, s"Failed create pod for '$name':
${e.getClass} - ${e.getMessage}", ErrorLevel)
+ val stackTrace = ExceptionUtils.getStackTrace(e)
+ transid.failed(
+ this,
+ start,
+ s"Failed create pod for '$name': ${e.getClass} (Caused by:
${e.getCause}) - ${e.getMessage}; stacktrace: $stackTrace",
+ ErrorLevel)
Future.failed(KubernetesPodApiException(e))
case Success(createdPod) => {
//call to api-server succeeded; wait for the pod to become ready;
catch any failure to end the transaction timer
diff --git
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
index 0e23470..8c06a00 100644
---
a/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
+++
b/core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/kubernetes/KubernetesContainer.scala
@@ -71,23 +71,30 @@ object KubernetesContainer {
for {
container <- kubernetes.run(podName, image, memory, environment,
labels).recoverWith {
- case _: KubernetesPodApiException =>
+ case e: KubernetesPodApiException =>
//apiserver call failed - this will expose a different error to users
-
Future.failed(WhiskContainerStartupError(Messages.resourceProvisionError))
- case _ =>
- kubernetes
- .rm(podName)
- .andThen {
- case Failure(e) =>
- log.error(this, s"Failed delete pod for '$name': ${e.getClass}
- ${e.getMessage}")
- }
- .transformWith { _ =>
- Future.failed(WhiskContainerStartupError(s"Failed to run
container with image '${image}'."))
- }
+ cleanupFailedPod(e, podName,
WhiskContainerStartupError(Messages.resourceProvisionError))
+ case e: Throwable =>
+ cleanupFailedPod(e, podName, WhiskContainerStartupError(s"Failed to
run container with image '${image}'."))
}
} yield container
}
-
+ private def cleanupFailedPod(e: Throwable, podName: String, failureCause:
Exception)(
+ implicit kubernetes: KubernetesApi,
+ ec: ExecutionContext,
+ tid: TransactionId,
+ log: Logging) = {
+ log.info(this, s"Deleting failed pod '$podName' after: ${e.getClass} -
${e.getMessage}")
+ kubernetes
+ .rm(podName)
+ .andThen {
+ case Failure(e) =>
+ log.error(this, s"Failed delete pod for '$podName': ${e.getClass} -
${e.getMessage}")
+ }
+ .transformWith { _ =>
+ Future.failed(failureCause)
+ }
+ }
}
/**
diff --git
a/tests/src/test/scala/org/apache/openwhisk/core/containerpool/kubernetes/test/KubernetesContainerTests.scala
b/tests/src/test/scala/org/apache/openwhisk/core/containerpool/kubernetes/test/KubernetesContainerTests.scala
index 0f331d3..0959193 100644
---
a/tests/src/test/scala/org/apache/openwhisk/core/containerpool/kubernetes/test/KubernetesContainerTests.scala
+++
b/tests/src/test/scala/org/apache/openwhisk/core/containerpool/kubernetes/test/KubernetesContainerTests.scala
@@ -50,6 +50,7 @@ import
org.apache.openwhisk.core.entity.ActivationResponse.Timeout
import org.apache.openwhisk.core.entity.size._
import org.apache.openwhisk.http.Messages
import
org.apache.openwhisk.core.containerpool.docker.test.DockerContainerTests._
+import org.scalatest.concurrent.ScalaFutures
import scala.collection.immutable.Queue
import scala.collection.mutable
@@ -65,7 +66,8 @@ class KubernetesContainerTests
with StreamLogging
with BeforeAndAfterEach
with WskActorSystem
- with TimingHelpers {
+ with TimingHelpers
+ with ScalaFutures {
import KubernetesClientTests.TestKubernetesClient
import KubernetesContainerTests._
@@ -483,6 +485,45 @@ class KubernetesContainerTests
processedLogsFalse(0) shouldBe expectedLogEntry.rawString
}
+ it should "delete a pod that failed to start due to
KubernetesPodApiException" in {
+ implicit val kubernetes = new TestKubernetesClient {
+ override def run(
+ name: String,
+ image: String,
+ memory: ByteSize = 256.MB,
+ env: Map[String, String] = Map.empty,
+ labels: Map[String, String] = Map.empty)(implicit transid:
TransactionId): Future[KubernetesContainer] = {
+ Future.failed(KubernetesPodApiException(new Exception("faking fabric8
failure...")))
+ }
+ }
+
+ val container =
+ KubernetesContainer.create(transid = transid, name = "name", image =
"image", userProvidedImage = true)
+ container.failed.futureValue shouldBe
WhiskContainerStartupError(Messages.resourceProvisionError)
+
+ kubernetes.runs should have size 0
+ kubernetes.rms should have size 1
+ }
+
+ it should "delete a pod that failed to start due to some other Exception" in
{
+ implicit val kubernetes = new TestKubernetesClient {
+ override def run(
+ name: String,
+ image: String,
+ memory: ByteSize = 256.MB,
+ env: Map[String, String] = Map.empty,
+ labels: Map[String, String] = Map.empty)(implicit transid:
TransactionId): Future[KubernetesContainer] = {
+ Future.failed(new Exception("faking fabric8 failure..."))
+ }
+ }
+
+ val container =
+ KubernetesContainer.create(transid = transid, name = "name", image =
"image", userProvidedImage = true)
+ container.failed.futureValue shouldBe a[WhiskContainerStartupError]
+
+ kubernetes.runs should have size 0
+ kubernetes.rms should have size 1
+ }
def currentTsp: Instant = Instant.now
}