This is an automated email from the ASF dual-hosted git repository.
feiwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kyuubi.git
The following commit(s) were added to refs/heads/master by this push:
new 0b59fa00c [KYUUBI #6053] [K8S] Show more details for pod/container
status in application error
0b59fa00c is described below
commit 0b59fa00c9c693efe2289ffa3ed5bf064764ba4a
Author: Fei Wang <[email protected]>
AuthorDate: Wed Feb 7 20:55:11 2024 -0800
[KYUUBI #6053] [K8S] Show more details for pod/container status in
application error
# :mag: Description
Now the information for k8s app error is too short and confuse.
For example:
```
App Id: spark-8d04e31b6c9540f6952fde81fcc4e19f
App State: FAILED
App Diagnostic:
kyuubi-spark-2c7d0d5a-c25b-4b36-bcd0-250c97c90025-driver/spark-kubernetes-driver[Error]
```
We need to provide more information to help debug.
For PodStatus:
```
public String toString() {
return "PodStatus(conditions=" + this.getConditions() + ",
containerStatuses=" + this.getContainerStatuses() + ",
ephemeralContainerStatuses=" + this.getEphemeralContainerStatuses() + ",
hostIP=" + this.getHostIP() + ", initContainerStatuses=" +
this.getInitContainerStatuses() + ", message=" + this.getMessage() + ",
nominatedNodeName=" + this.getNominatedNodeName() + ", phase=" +
this.getPhase() + ", podIP=" + this.getPodIP() + ", podIPs=" + this.getPodIPs()
+ ", qosClass=" + [...]
}
```
For ContainerState:
```
public String toString() {
return "ContainerState(running=" + this.getRunning() + ",
terminated=" + this.getTerminated() + ", waiting=" + this.getWaiting() + ",
additionalProperties=" + this.getAdditionalProperties() + ")";
}
```
In this pr, we show the PodStatus or ContainerState directly and provide
more useful information.
## Issue References ๐
This pull request fixes #
## Describe Your Solution ๐ง
Please include a summary of the change and which issue is fixed. Please
also include relevant motivation and context. List any dependencies that are
required for this change.
## Types of changes :bookmark:
- [ ] Bugfix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
## Test Plan ๐งช
#### Behavior Without This Pull Request :coffin:
#### Behavior With This Pull Request :tada:
#### Related Unit Tests
---
# Checklist ๐
- [ ] This patch was not authored or co-authored using [Generative
Tooling](https://www.apache.org/legal/generative-tooling.html)
**Be nice. Be informative.**
Closes #6053 from turboFei/pod_diagnose.
Closes #6053
34de08c13 [Fei Wang] pod status
f8152e78c [Fei Wang] Show more info
Authored-by: Fei Wang <[email protected]>
Signed-off-by: Fei Wang <[email protected]>
---
.../org/apache/kyuubi/client/util/JsonUtils.java | 9 +++++++
.../engine/KubernetesApplicationOperation.scala | 28 ++++++++++++----------
2 files changed, 25 insertions(+), 12 deletions(-)
diff --git
a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/JsonUtils.java
b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/JsonUtils.java
index 855a15280..6d1d58e64 100644
---
a/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/JsonUtils.java
+++
b/kyuubi-rest-client/src/main/java/org/apache/kyuubi/client/util/JsonUtils.java
@@ -35,6 +35,15 @@ public final class JsonUtils {
}
}
+ public static String toPrettyJson(Object object) {
+ try {
+ return
MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(object);
+ } catch (Exception e) {
+ throw new KyuubiRestException(
+ String.format("Failed to convert object(%s) to json", object), e);
+ }
+ }
+
public static <T> T fromJson(String json, Class<T> clazz) {
try {
return MAPPER.readValue(json, clazz);
diff --git
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala
index 6afe3257b..738e7b23c 100644
---
a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala
+++
b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala
@@ -29,6 +29,7 @@ import io.fabric8.kubernetes.client.KubernetesClient
import io.fabric8.kubernetes.client.informers.{ResourceEventHandler,
SharedIndexInformer}
import org.apache.kyuubi.{KyuubiException, Logging, Utils}
+import org.apache.kyuubi.client.util.JsonUtils
import org.apache.kyuubi.config.KyuubiConf
import org.apache.kyuubi.config.KyuubiConf.{KubernetesApplicationStateSource,
KubernetesCleanupDriverPodStrategy}
import
org.apache.kyuubi.config.KyuubiConf.KubernetesApplicationStateSource.KubernetesApplicationStateSource
@@ -364,17 +365,26 @@ object KubernetesApplicationOperation extends Logging {
appStateSource: KubernetesApplicationStateSource,
appStateContainer: String): (ApplicationState, Option[String]) = {
val podName = pod.getMetadata.getName
- val containerStateToBuildAppState = appStateSource match {
+ val containerStatusToBuildAppState = appStateSource match {
case KubernetesApplicationStateSource.CONTAINER =>
pod.getStatus.getContainerStatuses.asScala
- .find(cs =>
appStateContainer.equalsIgnoreCase(cs.getName)).map(_.getState)
+ .find(cs => appStateContainer.equalsIgnoreCase(cs.getName))
case KubernetesApplicationStateSource.POD => None
}
- val applicationState =
containerStateToBuildAppState.map(containerStateToApplicationState)
+ val applicationState = containerStatusToBuildAppState
+ .map(_.getState)
+ .map(containerStateToApplicationState)
.getOrElse(podStateToApplicationState(pod.getStatus.getPhase))
- val applicationError = containerStateToBuildAppState
- .map(cs => containerStateToApplicationError(cs).map(r =>
s"$podName/$appStateContainer[$r]"))
- .getOrElse(Option(pod.getStatus.getReason).map(r => s"$podName[$r]"))
+ val applicationError = if (ApplicationState.isFailed(applicationState)) {
+ val errorMap = containerStatusToBuildAppState.map { cs =>
+ Map("Pod" -> podName, "Container" -> appStateContainer,
"ContainerStatus" -> cs)
+ }.getOrElse {
+ Map("Pod" -> podName, "PodStatus" -> pod.getStatus)
+ }
+ Some(JsonUtils.toPrettyJson(errorMap.asJava))
+ } else {
+ None
+ }
applicationState -> applicationError
}
@@ -393,12 +403,6 @@ object KubernetesApplicationOperation extends Logging {
}
}
- def containerStateToApplicationError(containerState: ContainerState):
Option[String] = {
- //
https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-states
- Option(containerState.getWaiting).map(_.getReason)
- .orElse(Option(containerState.getTerminated).map(_.getReason))
- }
-
def podStateToApplicationState(podState: String): ApplicationState =
podState match {
//
https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase
case "Pending" => PENDING