[
https://issues.apache.org/jira/browse/HUDI-2986?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Raymond Xu closed HUDI-2986.
----------------------------
Fix Version/s: (was: 0.11.0)
(was: 0.10.1)
Resolution: Won't Fix
not seeing the issue in 0.10.0
> Deltastreamer continuous mode run into Too many open files exception
> --------------------------------------------------------------------
>
> Key: HUDI-2986
> URL: https://issues.apache.org/jira/browse/HUDI-2986
> Project: Apache Hudi
> Issue Type: Bug
> Components: DeltaStreamer, Writer Core
> Reporter: Raymond Xu
> Assignee: Raymond Xu
> Priority: Blocker
> Labels: core-flow-ds, sev:critical
>
> Caused by: org.apache.spark.SparkException: Job aborted due to stage failure:
> Task 6 in stage 35202.0 failed 4 times, most recent failure: Lost task 6.3 in
> stage 35202.0 (TID 1172485, ip-10-211-53-165.infra.usw2.zdsys.com, executor
> 1): java.io.FileNotFoundException:
> /mnt/yarn/usercache/hadoop/appcache/application_1638666447607_0001/blockmgr-3725bb05-2c9a-4073-80f6-4eaa335321c9/34/temp_shuffle_8f675a83-21ac-4908-b8da-1c8e25a59b8e
> (Too many open files)
> at java.io.FileOutputStream.open0(Native Method)
> at java.io.FileOutputStream.open(FileOutputStream.java:270)
> at java.io.FileOutputStream.<init>(FileOutputStream.java:213)
> at
> org.apache.spark.storage.DiskBlockObjectWriter.initialize(DiskBlockObjectWriter.scala:106)
> at
> org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:119)
> at
> org.apache.spark.storage.DiskBlockObjectWriter.write(DiskBlockObjectWriter.scala:251)
> at
> org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:157)
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:95)
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
> at org.apache.spark.scheduler.Task.run(Task.scala:123)
> at
> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1405)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:748)
> Driver stacktrace:
> at
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:2136)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2124)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:2123)
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2123)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:994)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:994)
> at scala.Option.foreach(Option.scala:257)
> at
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:994)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2384)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2333)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2322)
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
> at
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:805)
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2097)
> at org.apache.spark.SparkContext.runJob(SparkContext.scala:2194)
> at org.apache.spark.rdd.RDD$$anonfun$fold$1.apply(RDD.scala:1143)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
> at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
> at org.apache.spark.rdd.RDD.fold(RDD.scala:1137)
> at
> org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply$mcD$sp(DoubleRDDFunctions.scala:35)
> at
> org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:35)
> at
> org.apache.spark.rdd.DoubleRDDFunctions$$anonfun$sum$1.apply(DoubleRDDFunctions.scala:35)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
> at org.apache.spark.rdd.RDD.withScope(RDD.scala:385)
> at
> org.apache.spark.rdd.DoubleRDDFunctions.sum(DoubleRDDFunctions.scala:34)
> at org.apache.spark.api.java.JavaDoubleRDD.sum(JavaDoubleRDD.scala:165)
> at
> org.apache.hudi.utilities.deltastreamer.DeltaSync.writeToSink(DeltaSync.java:447)
> at
> org.apache.hudi.utilities.deltastreamer.DeltaSync.syncOnce(DeltaSync.java:281)
> at
> org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer$DeltaSyncService.lambda$startService$0(HoodieDeltaStreamer.java:587)
> ... 4 more
> Caused by: java.io.FileNotFoundException:
> /mnt/yarn/usercache/hadoop/appcache/application_1638666447607_0001/blockmgr-3725bb05-2c9a-4073-80f6-4eaa335321c9/34/temp_shuffle_8f675a83-21ac-4908-b8da-1c8e25a59b8e
> (Too many open files)
> at java.io.FileOutputStream.open0(Native Method)
> at java.io.FileOutputStream.open(FileOutputStream.java:270)
> at java.io.FileOutputStream.<init>(FileOutputStream.java:213)
> at
> org.apache.spark.storage.DiskBlockObjectWriter.initialize(DiskBlockObjectWriter.scala:106)
> at
> org.apache.spark.storage.DiskBlockObjectWriter.open(DiskBlockObjectWriter.scala:119)
> at
> org.apache.spark.storage.DiskBlockObjectWriter.write(DiskBlockObjectWriter.scala:251)
> at
> org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:157)
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:95)
> at
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
> at org.apache.spark.scheduler.Task.run(Task.scala:123)
> at
> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408)
> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1405)
> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414)
> ... 3 more
--
This message was sent by Atlassian Jira
(v8.20.1#820001)