This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 58dab6e  [SPARK-34154][YARN] Extend LocalityPlacementStrategySuite's 
test with a timeout
58dab6e is described below

commit 58dab6e7d20221248d2c6db199b70a7713d6323e
Author: “attilapiros” <[email protected]>
AuthorDate: Thu Jan 28 08:04:25 2021 +0900

    [SPARK-34154][YARN] Extend LocalityPlacementStrategySuite's test with a 
timeout
    
    ### What changes were proposed in this pull request?
    
    This PR extends the `handle large number of containers and tasks 
(SPARK-18750)` test with a time limit and in case of timeout it saves the stack 
trace of the running thread to provide extra information about the reason why 
it got stuck.
    
    ### Why are the changes needed?
    
    This is a flaky test which sometime runs for hours without stopping.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    I checked it with a temporary code change: by adding a `Thread.sleep` to 
`LocalityPreferredContainerPlacementStrategy#expectedHostToContainerCount`.
    
    The stack trace showed the correct method:
    
    ```
    [info] LocalityPlacementStrategySuite:
    [info] - handle large number of containers and tasks (SPARK-18750) *** 
FAILED *** (30 seconds, 26 milliseconds)
    [info]   Failed with an exception or a timeout at thread join:
    [info]
    [info]   java.lang.RuntimeException: Timeout at waiting for thread to stop 
(its stack trace is added to the exception)
    [info]      at java.lang.Thread.sleep(Native Method)
    [info]      at 
org.apache.spark.deploy.yarn.LocalityPreferredContainerPlacementStrategy.$anonfun$expectedHostToContainerCount$1(LocalityPreferredContainerPlacementStrategy.scala:198)
    [info]      at 
org.apache.spark.deploy.yarn.LocalityPreferredContainerPlacementStrategy$$Lambda$281/381161906.apply(Unknown
 Source)
    [info]      at 
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
    [info]      at 
scala.collection.TraversableLike$$Lambda$16/322836221.apply(Unknown Source)
    [info]      at 
scala.collection.immutable.HashMap$HashMap1.foreach(HashMap.scala:234)
    [info]      at 
scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:468)
    [info]      at 
scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:468)
    [info]      at 
scala.collection.TraversableLike.map(TraversableLike.scala:238)
    [info]      at 
scala.collection.TraversableLike.map$(TraversableLike.scala:231)
    [info]      at 
scala.collection.AbstractTraversable.map(Traversable.scala:108)
    [info]      at 
org.apache.spark.deploy.yarn.LocalityPreferredContainerPlacementStrategy.expectedHostToContainerCount(LocalityPreferredContainerPlacementStrategy.scala:188)
    [info]      at 
org.apache.spark.deploy.yarn.LocalityPreferredContainerPlacementStrategy.localityOfRequestedContainers(LocalityPreferredContainerPlacementStrategy.scala:112)
    [info]      at 
org.apache.spark.deploy.yarn.LocalityPlacementStrategySuite.org$apache$spark$deploy$yarn$LocalityPlacementStrategySuite$$runTest(LocalityPlacementStrategySuite.scala:94)
    [info]      at 
org.apache.spark.deploy.yarn.LocalityPlacementStrategySuite$$anon$1.run(LocalityPlacementStrategySuite.scala:40)
    [info]      at java.lang.Thread.run(Thread.java:748) 
(LocalityPlacementStrategySuite.scala:61)
    ...
    ```
    
    Closes #31363 from attilapiros/SPARK-34154.
    
    Authored-by: “attilapiros” <[email protected]>
    Signed-off-by: HyukjinKwon <[email protected]>
    (cherry picked from commit 0dedf24cd0359b36f655adbf22bd5048b7288ba5)
    Signed-off-by: HyukjinKwon <[email protected]>
---
 .../spark/deploy/yarn/LocalityPlacementStrategySuite.scala | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git 
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
 
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
index cf2c384..14f1ec2 100644
--- 
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
+++ 
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
@@ -32,7 +32,7 @@ class LocalityPlacementStrategySuite extends SparkFunSuite {
   test("handle large number of containers and tasks (SPARK-18750)") {
     // Run the test in a thread with a small stack size, since the original 
issue
     // surfaced as a StackOverflowError.
-    var error: Throwable = null
+    @volatile var error: Throwable = null
 
     val runnable = new Runnable() {
       override def run(): Unit = try {
@@ -43,13 +43,21 @@ class LocalityPlacementStrategySuite extends SparkFunSuite {
     }
 
     val thread = new Thread(new ThreadGroup("test"), runnable, "test-thread", 
256 * 1024)
+    thread.setDaemon(true)
     thread.start()
-    thread.join()
+    val secondsToWait = 30
+    thread.join(secondsToWait * 1000)
+    if (thread.isAlive()) {
+      error = new RuntimeException(
+        "Timeout at waiting for thread to stop (its stack trace is added to 
the exception)")
+      error.setStackTrace(thread.getStackTrace)
+      thread.interrupt()
+    }
 
     if (error != null) {
       val errors = new StringWriter()
       error.printStackTrace(new PrintWriter(errors))
-      fail(s"StackOverflowError should not be thrown; however, 
got:\n\n$errors")
+      fail(s"Failed with an exception or a timeout at thread join:\n\n$errors")
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to