luffyd commented on issue #588:
URL: https://github.com/apache/hudi/issues/588#issuecomment-647941035
> > BUG:Transient no such file or directory
> > ```
> > scala> splits.zipWithIndex.foreach{case (x,i)=>{
> > | println(s"adding split $i")
> > |
> > | val path =
s"hdfs:///tmp/ap-invoices-all-snapshot-slice-stg/"
> > |
> > | x.write.mode("overwrite").parquet(path)//stage the subset
> > |
> > | new HorizonCompactionUtil().saveToHudiTable(
> > | spark.read.parquet(path),
> > | tableName = "ap_invoices_all_hudi",
> > | tablePath =
"s3://horizon-hudi-dev/data/direct-write-ap-invoices-all/",
> > | primaryKey = "invoice_id",
> > | pkOrderingCol = "capture_timestamp",
> > | enableHiveSync = false,
> > | partitionCol = None,
> > | insertMode = "append",
> > | parallelism = 5000,
> > | fakePartitionCol = false
> > | )
> > | }}
> > adding split 0
> > 19/02/27 22:33:11 WARN SparkConf: The configuration key
'spark.yarn.executor.memoryOverhead' has been deprecated as of Spark 2.3 and
may be removed in the future. Please use the new key
'spark.executor.memoryOverhead' instead.
> > 19/02/27 22:33:11 WARN SparkConf: The configuration key
'spark.yarn.executor.memoryOverhead' has been deprecated as of Spark 2.3 and
may be removed in the future. Please use the new key
'spark.executor.memoryOverhead' instead.
> > [Stage 34:===========================> (138 +
124) / 262]19/02/27 22:39:35 WARN TaskSetManager: Lost task 103.0 in stage 34.0
(TID 38146, ip-172-31-25-39.ec2.internal, executor 2674):
java.lang.RuntimeException: com.uber.hoodie.exception.HoodieException:
com.uber.hoodie.exception.HoodieException:
java.util.concurrent.ExecutionException:
com.uber.hoodie.exception.HoodieInsertException: Failed to close the Insert
Handle for path
s3://horizon-hudi-dev/data/direct-write-ap-invoices-all/db15bed7-b959-41fd-ac98-0e04d7058a20_103_20190227223315.parquet
> > at
com.uber.hoodie.func.LazyIterableIterator.next(LazyIterableIterator.java:121)
> > at
scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:43)
> > at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435)
> > at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441)
> > at
org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:220)
> > at
org.apache.spark.storage.memory.MemoryStore.putIteratorAsBytes(MemoryStore.scala:348)
> > at
org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1182)
> > at
org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
> > at
org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)
> > at
org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)
> > at
org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)
> > at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
> > at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
> > at
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> > at
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
> > at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
> > at
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> > at org.apache.spark.scheduler.Task.run(Task.scala:121)
> > at
org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
> > at
org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
> > at
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
> > at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> > at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> > at java.lang.Thread.run(Thread.java:748)
> > Caused by: com.uber.hoodie.exception.HoodieException:
com.uber.hoodie.exception.HoodieException:
java.util.concurrent.ExecutionException:
com.uber.hoodie.exception.HoodieInsertException: Failed to close the Insert
Handle for path
s3://horizon-hudi-dev/data/direct-write-ap-invoices-all/db15bed7-b959-41fd-ac98-0e04d7058a20_103_20190227223315.parquet
> > at
com.uber.hoodie.func.CopyOnWriteLazyInsertIterable.computeNext(CopyOnWriteLazyInsertIterable.java:106)
> > at
com.uber.hoodie.func.CopyOnWriteLazyInsertIterable.computeNext(CopyOnWriteLazyInsertIterable.java:45)
> > at
com.uber.hoodie.func.LazyIterableIterator.next(LazyIterableIterator.java:119)
> > ... 23 more
> > Caused by: com.uber.hoodie.exception.HoodieException:
java.util.concurrent.ExecutionException:
com.uber.hoodie.exception.HoodieInsertException: Failed to close the Insert
Handle for path
s3://horizon-hudi-dev/data/direct-write-ap-invoices-all/db15bed7-b959-41fd-ac98-0e04d7058a20_103_20190227223315.parquet
> > at
com.uber.hoodie.common.util.queue.BoundedInMemoryExecutor.execute(BoundedInMemoryExecutor.java:146)
> > at
com.uber.hoodie.func.CopyOnWriteLazyInsertIterable.computeNext(CopyOnWriteLazyInsertIterable.java:102)
> > ... 25 more
> > Caused by: java.util.concurrent.ExecutionException:
com.uber.hoodie.exception.HoodieInsertException: Failed to close the Insert
Handle for path
s3://horizon-hudi-dev/data/direct-write-ap-invoices-all/db15bed7-b959-41fd-ac98-0e04d7058a20_103_20190227223315.parquet
> > at java.util.concurrent.FutureTask.report(FutureTask.java:122)
> > at java.util.concurrent.FutureTask.get(FutureTask.java:192)
> > at
com.uber.hoodie.common.util.queue.BoundedInMemoryExecutor.execute(BoundedInMemoryExecutor.java:144)
> > ... 26 more
> > Caused by: com.uber.hoodie.exception.HoodieInsertException: Failed to
close the Insert Handle for path
s3://horizon-hudi-dev/data/direct-write-ap-invoices-all/db15bed7-b959-41fd-ac98-0e04d7058a20_103_20190227223315.parquet
> > at
com.uber.hoodie.io.HoodieCreateHandle.close(HoodieCreateHandle.java:165)
> > at
com.uber.hoodie.func.CopyOnWriteLazyInsertIterable$CopyOnWriteInsertHandler.finish(CopyOnWriteLazyInsertIterable.java:168)
> > at
com.uber.hoodie.common.util.queue.BoundedInMemoryQueueConsumer.consume(BoundedInMemoryQueueConsumer.java:42)
> > at
com.uber.hoodie.common.util.queue.BoundedInMemoryExecutor.lambda$null$2(BoundedInMemoryExecutor.java:124)
> > at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> > ... 3 more
> > Caused by: java.io.FileNotFoundException: No such file or directory
's3://horizon-hudi-dev/data/direct-write-ap-invoices-all/db15bed7-b959-41fd-ac98-0e04d7058a20_103_20190227223315.parquet'
> > at
com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.getFileStatus(S3NativeFileSystem.java:808)
> > at
com.amazon.ws.emr.hadoop.fs.EmrFileSystem.getFileStatus(EmrFileSystem.java:548)
> > at
com.uber.hoodie.common.util.FSUtils.getFileSize(FSUtils.java:126)
> > at
com.uber.hoodie.io.HoodieCreateHandle.close(HoodieCreateHandle.java:156)
> > ... 7 more
> > ```
>
> Have noticed similar failures
Not seeing this issue after using consistent view setting
https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-plan-consistent-view.html
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]