[ 
https://issues.apache.org/jira/browse/SPARK-45972?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17787111#comment-17787111
 ] 

Yang Jie commented on SPARK-45972:
----------------------------------

cc [~dongjoon] I haven't found the root cause yet. Do you have any ideas? 
Thanks ~
 
 
 
 
 

> `ParquetFileFormatV1Suite/ParquetFileFormatV2Suite` often fails in the daily 
> tests of Java 21
> ---------------------------------------------------------------------------------------------
>
>                 Key: SPARK-45972
>                 URL: https://issues.apache.org/jira/browse/SPARK-45972
>             Project: Spark
>          Issue Type: Improvement
>          Components: SQL, Tests
>    Affects Versions: 4.0.0
>            Reporter: Yang Jie
>            Priority: Major
>
> There are two test cases, ParquetFileFormatV1Suite and 
> ParquetFileFormatV2Suite, that often fail in the daily tests of Java 21. The 
> reason for the failure is 'There are ${n} possibly leaked file streams..'.
> [https://github.com/apache/spark/actions/runs/6899686304/job/18771676570]
> [https://github.com/apache/spark/actions/runs/6886143740/job/18731379673]
> [https://github.com/apache/spark/actions/runs/6872747886/job/18691717269]
> [https://github.com/apache/spark/actions/runs/6859020738/job/18650698085]
> https://github.com/apache/spark/actions/runs/6845491908/job/18610786715
>  
> {code:java}
> ===== POSSIBLE THREAD LEAK IN SUITE 
> o.a.s.sql.execution.datasources.parquet.ParquetFileFormatV1Suite, threads: 
> readingParquetFooters-ForkJoinPool-7045-worker-3 (daemon=true), 
> readingParquetFooters-ForkJoinPool-7045-worker-4 (daemon=true), 
> shuffle-boss-2239-1 (daemon=true), rpc-boss-2236-1 (daemon=true) =====
> 30295
> 30296
> 30297[info] 
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormatV1Suite 
> *** ABORTED *** (11 seconds, 836 milliseconds)
> 30298[info]   The code passed to eventually never returned normally. 
> Attempted 15 times over 10.009598219999999 seconds. Last failure message: 
> There are 2 possibly leaked file streams.. (SharedSparkSession.scala:164)
> 30299[info]   org.scalatest.exceptions.TestFailedDueToTimeoutException:
> 30300[info]   at 
> org.scalatest.enablers.Retrying$$anon$4.tryTryAgain$2(Retrying.scala:219)
> 30301[info]   at 
> org.scalatest.enablers.Retrying$$anon$4.retry(Retrying.scala:226)
> 30302[info]   at 
> org.scalatest.concurrent.Eventually.eventually(Eventually.scala:313)
> 30303[info]   at 
> org.scalatest.concurrent.Eventually.eventually$(Eventually.scala:312)
> 30304[info]   at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormatSuite.eventually(ParquetFileFormatSuite.scala:31)
> 30305[info]   at 
> org.apache.spark.sql.test.SharedSparkSessionBase.afterEach(SharedSparkSession.scala:164)
> 30306[info]   at 
> org.apache.spark.sql.test.SharedSparkSessionBase.afterEach$(SharedSparkSession.scala:158)
> 30307[info]   at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormatSuite.afterEach(ParquetFileFormatSuite.scala:31)
> 30308[info]   at 
> org.scalatest.BeforeAndAfterEach.$anonfun$runTest$1(BeforeAndAfterEach.scala:247)
> 30309[info]   at 
> org.scalatest.Status.$anonfun$withAfterEffect$1(Status.scala:377)
> 30310[info]   at 
> org.scalatest.Status.$anonfun$withAfterEffect$1$adapted(Status.scala:373)
> 30311[info]   at 
> org.scalatest.SucceededStatus$.whenCompleted(Status.scala:462)
> 30312[info]   at org.scalatest.Status.withAfterEffect(Status.scala:373)
> 30313[info]   at org.scalatest.Status.withAfterEffect$(Status.scala:371)
> 30314[info]   at 
> org.scalatest.SucceededStatus$.withAfterEffect(Status.scala:434)
> 30315[info]   at 
> org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:246)
> 30316[info]   at 
> org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
> 30317[info]   at 
> org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:69)
> 30318[info]   at 
> org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:269)
> 30319[info]   at 
> org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
> 30320[info]   at scala.collection.immutable.List.foreach(List.scala:333)
> 30321[info]   at 
> org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
> 30322[info]   at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
> 30323[info]   at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
> 30324[info]   at 
> org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:269)
> 30325[info]   at 
> org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:268)
> 30326[info]   at 
> org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1564)
> 30327[info]   at org.scalatest.Suite.run(Suite.scala:1114)
> 30328[info]   at org.scalatest.Suite.run$(Suite.scala:1096)
> 30329[info]   at 
> org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1564)
> 30330[info]   at 
> org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:273)
> 30331[info]   at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
> 30332[info]   at 
> org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:273)
> 30333[info]   at 
> org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:272)
> 30334[info]   at 
> org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:69)
> 30335[info]   at 
> org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
> 30336[info]   at 
> org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
> 30337[info]   at 
> org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
> 30338[info]   at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:69)
> 30339[info]   at 
> org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:321)
> 30340[info]   at 
> org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:517)
> 30341[info]   at sbt.ForkMain$Run.lambda$runTest$1(ForkMain.java:414)
> 30342[info]   at 
> java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
> 30343[info]   at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
> 30344[info]   at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
> 30345[info]   at java.base/java.lang.Thread.run(Thread.java:1583)
> 30346[info]   Cause: java.lang.IllegalStateException: There are 2 possibly 
> leaked file streams.
> 30347[info]   at 
> org.apache.spark.DebugFilesystem$.assertNoOpenStreams(DebugFilesystem.scala:54)
> 30348[info]   at 
> org.apache.spark.sql.test.SharedSparkSessionBase.$anonfun$afterEach$1(SharedSparkSession.scala:165)
> 30349[info]   at 
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
> 30350[info]   at 
> org.scalatest.enablers.Retrying$$anon$4.makeAValiantAttempt$1(Retrying.scala:184)
> 30351[info]   at 
> org.scalatest.enablers.Retrying$$anon$4.tryTryAgain$2(Retrying.scala:196)
> 30352[info]   at 
> org.scalatest.enablers.Retrying$$anon$4.retry(Retrying.scala:226)
> 30353[info]   at 
> org.scalatest.concurrent.Eventually.eventually(Eventually.scala:313)
> 30354[info]   at 
> org.scalatest.concurrent.Eventually.eventually$(Eventually.scala:312)
> 30355[info]   at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormatSuite.eventually(ParquetFileFormatSuite.scala:31)
> 30356[info]   at 
> org.apache.spark.sql.test.SharedSparkSessionBase.afterEach(SharedSparkSession.scala:164)
> 30357[info]   at 
> org.apache.spark.sql.test.SharedSparkSessionBase.afterEach$(SharedSparkSession.scala:158)
> 30358[info]   at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormatSuite.afterEach(ParquetFileFormatSuite.scala:31)
> 30359[info]   at 
> org.scalatest.BeforeAndAfterEach.$anonfun$runTest$1(BeforeAndAfterEach.scala:247)
> 30360[info]   at 
> org.scalatest.Status.$anonfun$withAfterEffect$1(Status.scala:377)
> 30361[info]   at 
> org.scalatest.Status.$anonfun$withAfterEffect$1$adapted(Status.scala:373)
> 30362[info]   at 
> org.scalatest.SucceededStatus$.whenCompleted(Status.scala:462)
> 30363[info]   at org.scalatest.Status.withAfterEffect(Status.scala:373)
> 30364[info]   at org.scalatest.Status.withAfterEffect$(Status.scala:371)
> 30365[info]   at 
> org.scalatest.SucceededStatus$.withAfterEffect(Status.scala:434)
> 30366[info]   at 
> org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:246)
> 30367[info]   at 
> org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
> 30368[info]   at 
> org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:69)
> 30369[info]   at 
> org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:269)
> 30370[info]   at 
> org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
> 30371[info]   at scala.collection.immutable.List.foreach(List.scala:333)
> 30372[info]   at 
> org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
> 30373[info]   at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
> 30374[info]   at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
> 30375[info]   at 
> org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:269)
> 30376[info]   at 
> org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:268)
> 30377[info]   at 
> org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1564)
> 30378[info]   at org.scalatest.Suite.run(Suite.scala:1114)
> 30379[info]   at org.scalatest.Suite.run$(Suite.scala:1096)
> 30380[info]   at 
> org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1564)
> 30381[info]   at 
> org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:273)
> 30382[info]   at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
> 30383[info]   at 
> org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:273)
> 30384[info]   at 
> org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:272)
> 30385[info]   at 
> org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:69)
> 30386[info]   at 
> org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
> 30387[info]   at 
> org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
> 30388[info]   at 
> org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
> 30389[info]   at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:69)
> 30390[info]   at 
> org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:321)
> 30391[info]   at 
> org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:517)
> 30392[info]   at sbt.ForkMain$Run.lambda$runTest$1(ForkMain.java:414)
> 30393[info]   at 
> java.base/java.util.concurrent.FutureTask.run(FutureTask.java:317)
> 30394[info]   at 
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144)
> 30395[info]   at 
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642)
> 30396[info]   at java.base/java.lang.Thread.run(Thread.java:1583)
> 30397[info]   Cause: java.lang.Throwable:
> 30398[info]   at 
> org.apache.spark.DebugFilesystem$.addOpenStream(DebugFilesystem.scala:35)
> 30399[info]   at 
> org.apache.spark.DebugFilesystem.open(DebugFilesystem.scala:75)
> 30400[info]   at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:997)
> 30401[info]   at 
> org.apache.parquet.hadoop.util.HadoopInputFile.newStream(HadoopInputFile.java:69)
> 30402[info]   at 
> org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:796)
> 30403[info]   at 
> org.apache.parquet.hadoop.ParquetFileReader.open(ParquetFileReader.java:666)
> 30404[info]   at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:85)
> 30405[info]   at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:76)
> 30406[info]   at 
> org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat$.$anonfun$readParquetFootersInParallel$1(ParquetFileFormat.scala:450)
> 30407[info]   at 
> org.apache.spark.util.ThreadUtils$.$anonfun$parmap$2(ThreadUtils.scala:384)
> 30408[info]   at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:687)
> 30409[info]   at 
> scala.concurrent.impl.Promise$Transformation.run(Promise.scala:467)
> 30410[info]   at 
> java.base/java.util.concurrent.ForkJoinTask$RunnableExecuteAction.exec(ForkJoinTask.java:1423)
> 30411[info]   at 
> java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:387)
> 30412[info]   at 
> java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1312)
> 30413[info]   at 
> java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1843)
> 30414[info]   at 
> java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1808)
> 30415[info]   at 
> java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:188)
>  {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to