This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 58dab6e [SPARK-34154][YARN] Extend LocalityPlacementStrategySuite's
test with a timeout
58dab6e is described below
commit 58dab6e7d20221248d2c6db199b70a7713d6323e
Author: “attilapiros” <[email protected]>
AuthorDate: Thu Jan 28 08:04:25 2021 +0900
[SPARK-34154][YARN] Extend LocalityPlacementStrategySuite's test with a
timeout
### What changes were proposed in this pull request?
This PR extends the `handle large number of containers and tasks
(SPARK-18750)` test with a time limit and in case of timeout it saves the stack
trace of the running thread to provide extra information about the reason why
it got stuck.
### Why are the changes needed?
This is a flaky test which sometime runs for hours without stopping.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
I checked it with a temporary code change: by adding a `Thread.sleep` to
`LocalityPreferredContainerPlacementStrategy#expectedHostToContainerCount`.
The stack trace showed the correct method:
```
[info] LocalityPlacementStrategySuite:
[info] - handle large number of containers and tasks (SPARK-18750) ***
FAILED *** (30 seconds, 26 milliseconds)
[info] Failed with an exception or a timeout at thread join:
[info]
[info] java.lang.RuntimeException: Timeout at waiting for thread to stop
(its stack trace is added to the exception)
[info] at java.lang.Thread.sleep(Native Method)
[info] at
org.apache.spark.deploy.yarn.LocalityPreferredContainerPlacementStrategy.$anonfun$expectedHostToContainerCount$1(LocalityPreferredContainerPlacementStrategy.scala:198)
[info] at
org.apache.spark.deploy.yarn.LocalityPreferredContainerPlacementStrategy$$Lambda$281/381161906.apply(Unknown
Source)
[info] at
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
[info] at
scala.collection.TraversableLike$$Lambda$16/322836221.apply(Unknown Source)
[info] at
scala.collection.immutable.HashMap$HashMap1.foreach(HashMap.scala:234)
[info] at
scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:468)
[info] at
scala.collection.immutable.HashMap$HashTrieMap.foreach(HashMap.scala:468)
[info] at
scala.collection.TraversableLike.map(TraversableLike.scala:238)
[info] at
scala.collection.TraversableLike.map$(TraversableLike.scala:231)
[info] at
scala.collection.AbstractTraversable.map(Traversable.scala:108)
[info] at
org.apache.spark.deploy.yarn.LocalityPreferredContainerPlacementStrategy.expectedHostToContainerCount(LocalityPreferredContainerPlacementStrategy.scala:188)
[info] at
org.apache.spark.deploy.yarn.LocalityPreferredContainerPlacementStrategy.localityOfRequestedContainers(LocalityPreferredContainerPlacementStrategy.scala:112)
[info] at
org.apache.spark.deploy.yarn.LocalityPlacementStrategySuite.org$apache$spark$deploy$yarn$LocalityPlacementStrategySuite$$runTest(LocalityPlacementStrategySuite.scala:94)
[info] at
org.apache.spark.deploy.yarn.LocalityPlacementStrategySuite$$anon$1.run(LocalityPlacementStrategySuite.scala:40)
[info] at java.lang.Thread.run(Thread.java:748)
(LocalityPlacementStrategySuite.scala:61)
...
```
Closes #31363 from attilapiros/SPARK-34154.
Authored-by: “attilapiros” <[email protected]>
Signed-off-by: HyukjinKwon <[email protected]>
(cherry picked from commit 0dedf24cd0359b36f655adbf22bd5048b7288ba5)
Signed-off-by: HyukjinKwon <[email protected]>
---
.../spark/deploy/yarn/LocalityPlacementStrategySuite.scala | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
index cf2c384..14f1ec2 100644
---
a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
+++
b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
@@ -32,7 +32,7 @@ class LocalityPlacementStrategySuite extends SparkFunSuite {
test("handle large number of containers and tasks (SPARK-18750)") {
// Run the test in a thread with a small stack size, since the original
issue
// surfaced as a StackOverflowError.
- var error: Throwable = null
+ @volatile var error: Throwable = null
val runnable = new Runnable() {
override def run(): Unit = try {
@@ -43,13 +43,21 @@ class LocalityPlacementStrategySuite extends SparkFunSuite {
}
val thread = new Thread(new ThreadGroup("test"), runnable, "test-thread",
256 * 1024)
+ thread.setDaemon(true)
thread.start()
- thread.join()
+ val secondsToWait = 30
+ thread.join(secondsToWait * 1000)
+ if (thread.isAlive()) {
+ error = new RuntimeException(
+ "Timeout at waiting for thread to stop (its stack trace is added to
the exception)")
+ error.setStackTrace(thread.getStackTrace)
+ thread.interrupt()
+ }
if (error != null) {
val errors = new StringWriter()
error.printStackTrace(new PrintWriter(errors))
- fail(s"StackOverflowError should not be thrown; however,
got:\n\n$errors")
+ fail(s"Failed with an exception or a timeout at thread join:\n\n$errors")
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]