Weizhong created SPARK-19325: -------------------------------- Summary: Running query hang-up 5min Key: SPARK-19325 URL: https://issues.apache.org/jira/browse/SPARK-19325 Project: Spark Issue Type: Bug Components: SQL Affects Versions: 1.5.2 Environment: OS: SUSE 12.1 JDK: Oracle JDK 1.8.0_112 Reporter: Weizhong Priority: Minor
1. Running query, 1 stage hang-up 5min 2. The stage is: {noformat} PhysicalRDD(read from parquet file) --> Filter --> ConvertToUnsafe --> BroadcastHashJoin --> TungstenProject --> BroadcastHashJoin --> TungstenProject --> TungstenExchange {noformat} 3. When hang-up, we dump the jstack, and details: {noformat} "Executor task launch worker-3" #147 daemon prio=5 os_prio=0 tid=0x00007fb5481af000 nid=0x3a166 runnable [0x00007fb4f18a6000] java.lang.Thread.State: RUNNABLE at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.fetchNext(HashJoin.scala:181) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.hasNext(HashJoin.scala:149) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.fetchNext(HashJoin.scala:181) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.hasNext(HashJoin.scala:149) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:219) at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:75) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:42) at org.apache.spark.scheduler.Task.run(Task.scala:90) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:253) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Locked ownable synchronizers: - <0x00000002c590e7e0> (a java.util.concurrent.ThreadPoolExecutor$Worker) "Executor task launch worker-2" #146 daemon prio=5 os_prio=0 tid=0x00007fb548b15000 nid=0x3a137 runnable [0x00007fb4f31e3000] java.lang.Thread.State: RUNNABLE at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.fetchNext(HashJoin.scala:181) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.hasNext(HashJoin.scala:149) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.fetchNext(HashJoin.scala:181) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.hasNext(HashJoin.scala:149) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:219) at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:75) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:42) at org.apache.spark.scheduler.Task.run(Task.scala:90) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:253) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Locked ownable synchronizers: - <0x00000002c590fab0> (a java.util.concurrent.ThreadPoolExecutor$Worker) "Executor task launch worker-1" #145 daemon prio=5 os_prio=0 tid=0x00007fb54a4c6000 nid=0x3a12f runnable [0x00007fb4f19a7000] java.lang.Thread.State: RUNNABLE at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.fetchNext(HashJoin.scala:181) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.hasNext(HashJoin.scala:149) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.fetchNext(HashJoin.scala:181) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.hasNext(HashJoin.scala:149) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:219) at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:75) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:42) at org.apache.spark.scheduler.Task.run(Task.scala:90) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:253) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Locked ownable synchronizers: - <0x00000002c590ea30> (a java.util.concurrent.ThreadPoolExecutor$Worker) "Executor task launch worker-0" #144 daemon prio=5 os_prio=0 tid=0x00007fb54a4c7800 nid=0x3a12e waiting on condition [0x00007fb4f1cdf000] java.lang.Thread.State: RUNNABLE at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.fetchNext(HashJoin.scala:181) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.hasNext(HashJoin.scala:149) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.fetchNext(HashJoin.scala:181) at org.apache.spark.sql.execution.joins.HashJoin$$anon$1.hasNext(HashJoin.scala:149) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327) at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:219) at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:73) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:75) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:42) at org.apache.spark.scheduler.Task.run(Task.scala:90) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:253) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) Locked ownable synchronizers: - <0x00000002c5910ba8> (a java.util.concurrent.ThreadPoolExecutor$Worker) {noformat} >From the jstack info, we can see it hang-up when do ConvertToUnsafe, call >iter.hastNext -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org