This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 1545df6  [SPARK-33668][K8S][TEST] Fix flaky test "Verify logging 
configuration is picked from the provided
1545df6 is described below

commit 1545df657bb5dc93197e8014fb7fa656724d4174
Author: Prashant Sharma <[email protected]>
AuthorDate: Sat Dec 5 23:04:55 2020 -0800

    [SPARK-33668][K8S][TEST] Fix flaky test "Verify logging configuration is 
picked from the provided
    
    ### What changes were proposed in this pull request?
    Fix flaky test "Verify logging configuration is picked from the provided 
SPARK_CONF_DIR/log4j.properties."
    The test is flaking, with multiple flaked instances - the reason for the 
failure has been similar to:
    
    ```
    
    The code passed to eventually never returned normally. Attempted 109 times 
over 3.0079882413999997 minutes. Last failure message: Failure executing: GET 
at:
    
https://192.168.39.167:8443/api/v1/namespaces/b37fc72a991b49baa68a2eaaa1516463/pods/spark-pi-97a9bc76308e7fe3-exec-1/log?pretty=false.
 Message: pods "spark-pi-97a9bc76308e7fe3-exec-1" not found. Received status: 
Status(apiVersion=v1, code=404, details=StatusDetails(causes=[], group=null, 
kind=pods, name=spark-pi-97a9bc76308e7fe3-exec-1, retryAfterSeconds=null, 
uid=null, additionalProperties={}), kind=Status, message=pods 
"spark-pi-97a9bc76308e7fe3-exec-1" not found, metadata=ListMeta( [...]
    
    ```
    
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36854/console
    
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36852/console
    
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36850/console
    
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36848/console
    From the above failures, it seems, that executor finishes too quickly and 
is removed by spark before the test can complete.
    So, in order to mitigate this situation, one way is to turn on the flag
       "spark.kubernetes.executor.deleteOnTermination"
    
    ### Why are the changes needed?
    
    Fixes a flaky test.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Existing tests.
    May be a few runs of jenkins integration test, may reveal if the problem is 
resolved or not.
    
    Closes #30616 from ScrapCodes/SPARK-33668/fix-flaky-k8s-integration-test.
    
    Authored-by: Prashant Sharma <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit 6317ba29a1bb1b7198fe8df71ddefcf47a55bd51)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../deploy/k8s/integrationtest/KubernetesSuite.scala   | 18 ++++++++++++++++++
 .../k8s/integrationtest/SparkConfPropagateSuite.scala  |  1 +
 2 files changed, 19 insertions(+)

diff --git 
a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
 
b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 193a02a..7b2a2d0 100644
--- 
a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ 
b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -158,6 +158,7 @@ class KubernetesSuite extends SparkFunSuite
       kubernetesTestComponents.deleteNamespace()
     }
     deleteDriverPod()
+    deleteExecutorPod(appLocator)
   }
 
   protected def runSparkPiAndVerifyCompletion(
@@ -508,6 +509,23 @@ class KubernetesSuite extends SparkFunSuite
         .get() == null)
     }
   }
+
+  private def deleteExecutorPod(appLocator: String): Unit = {
+    kubernetesTestComponents
+      .kubernetesClient
+      .pods()
+      .withLabel("spark-app-locator", appLocator)
+      .withLabel("spark-role", "executor")
+      .delete()
+    Eventually.eventually(TIMEOUT, INTERVAL) {
+      assert(kubernetesTestComponents.kubernetesClient
+        .pods()
+        .withLabel("spark-app-locator", appLocator)
+        .withLabel("spark-role", "executor")
+        .list()
+        .getItems.isEmpty)
+    }
+  }
 }
 
 private[spark] object KubernetesSuite {
diff --git 
a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
 
b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
index 5d3b426..0bc6327 100644
--- 
a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
+++ 
b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
@@ -39,6 +39,7 @@ private[spark] trait SparkConfPropagateSuite { k8sSuite: 
KubernetesSuite =>
 
       sparkAppConf.set("spark.driver.extraJavaOptions", "-Dlog4j.debug")
       sparkAppConf.set("spark.executor.extraJavaOptions", "-Dlog4j.debug")
+      sparkAppConf.set("spark.kubernetes.executor.deleteOnTermination", 
"false")
 
       val log4jExpectedLog =
         s"log4j: Reading configuration from URL 
file:/opt/spark/conf/log4j.properties"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to