umehrot2 commented on issue #6900:
URL: https://github.com/apache/hudi/issues/6900#issuecomment-1286257672
Latest stack trace, when using structured streaming with the following
properties:
```
hoodie.datasource.hive_sync.enable=false
hoodie.upsert.shuffle.parallelism=20
hoodie.insert.shuffle.parallelism=20
hoodie.keep.min.commits=6
hoodie.keep.max.commits=7
hoodie.parquet.small.file.limit=52428800
hoodie.index.type=GLOBAL_BLOOM
hoodie.datasource.write.payload.class=org.apache.hudi.common.model.DefaultHoodieRecordPayload
hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.NonpartitionedKeyGenerator
hoodie.metadata.enable=true
hoodie.cleaner.commits.retained=3
hoodie.clean.max.commits=5
hoodie.clean.async=false
hoodie.clean.automatic=true
hoodie.archive.async=false
hoodie.datasource.compaction.async.enable=true
hoodie.write.markers.type=DIRECT
hoodie.embed.timeline.server=true
hoodie.embed.timeline.server.async=false
hoodie.compact.schedule.inline=false
hoodie.compact.inline.max.delta.commits=2
```
Stacktrace:
```
22/10/19 15:36:18 ERROR UpsertPartitioner: Error trying to compute average
bytes/record
org.apache.hudi.exception.HoodieIOException: Could not read commit details
from
s3://app-util/hudi-bloom/multi-stream-105/cdc_test_db/dhdata_15/.hoodie/20221019152438682.commit
at
org.apache.hudi.common.table.timeline.HoodieActiveTimeline.readDataFromPath(HoodieActiveTimeline.java:761)
at
org.apache.hudi.common.table.timeline.HoodieActiveTimeline.getInstantDetails(HoodieActiveTimeline.java:266)
at
org.apache.hudi.common.table.timeline.HoodieDefaultTimeline.getInstantDetails(HoodieDefaultTimeline.java:372)
at
org.apache.hudi.table.action.commit.UpsertPartitioner.averageBytesPerRecord(UpsertPartitioner.java:373)
at
org.apache.hudi.table.action.commit.UpsertPartitioner.assignInserts(UpsertPartitioner.java:162)
at
org.apache.hudi.table.action.commit.UpsertPartitioner.<init>(UpsertPartitioner.java:95)
at
org.apache.hudi.table.action.deltacommit.SparkUpsertDeltaCommitPartitioner.<init>(SparkUpsertDeltaCommitPartitioner.java:50)
at
org.apache.hudi.table.action.deltacommit.BaseSparkDeltaCommitActionExecutor.getUpsertPartitioner(BaseSparkDeltaCommitActionExecutor.java:69)
at
org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.getPartitioner(BaseSparkCommitActionExecutor.java:217)
at
org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.execute(BaseSparkCommitActionExecutor.java:163)
at
org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor.execute(BaseSparkCommitActionExecutor.java:85)
at
org.apache.hudi.table.action.commit.BaseWriteHelper.write(BaseWriteHelper.java:57)
at
org.apache.hudi.table.action.deltacommit.SparkUpsertDeltaCommitActionExecutor.execute(SparkUpsertDeltaCommitActionExecutor.java:46)
at
org.apache.hudi.table.HoodieSparkMergeOnReadTable.upsert(HoodieSparkMergeOnReadTable.java:89)
at
org.apache.hudi.table.HoodieSparkMergeOnReadTable.upsert(HoodieSparkMergeOnReadTable.java:76)
at
org.apache.hudi.client.SparkRDDWriteClient.upsert(SparkRDDWriteClient.java:157)
at
org.apache.hudi.DataSourceUtils.doWriteOperation(DataSourceUtils.java:213)
at
org.apache.hudi.HoodieSparkSqlWriter$.write(HoodieSparkSqlWriter.scala:304)
at
org.apache.hudi.HoodieStreamingSink.$anonfun$addBatch$2(HoodieStreamingSink.scala:91)
at scala.util.Try$.apply(Try.scala:213)
at
org.apache.hudi.HoodieStreamingSink.$anonfun$addBatch$1(HoodieStreamingSink.scala:90)
at
org.apache.hudi.HoodieStreamingSink.retry(HoodieStreamingSink.scala:166)
at
org.apache.hudi.HoodieStreamingSink.addBatch(HoodieStreamingSink.scala:89)
at
org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$17(MicroBatchExecution.scala:600)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
at
org.apache.spark.sql.execution.SQLExecution$.executeQuery$1(SQLExecution.scala:110)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:135)
at
org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:107)
at
org.apache.spark.sql.execution.SQLExecution$.withTracker(SQLExecution.scala:232)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:135)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:253)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:134)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:68)
at
org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$16(MicroBatchExecution.scala:598)
at
org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
at
org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
at
org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
at
org.apache.spark.sql.execution.streaming.MicroBatchExecution.runBatch(MicroBatchExecution.scala:598)
at
org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$2(MicroBatchExecution.scala:228)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken(ProgressReporter.scala:375)
at
org.apache.spark.sql.execution.streaming.ProgressReporter.reportTimeTaken$(ProgressReporter.scala:373)
at
org.apache.spark.sql.execution.streaming.StreamExecution.reportTimeTaken(StreamExecution.scala:69)
at
org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runActivatedStream$1(MicroBatchExecution.scala:193)
at
org.apache.spark.sql.execution.streaming.ProcessingTimeExecutor.execute(TriggerExecutor.scala:57)
at
org.apache.spark.sql.execution.streaming.MicroBatchExecution.runActivatedStream(MicroBatchExecution.scala:187)
at
org.apache.spark.sql.execution.streaming.StreamExecution.$anonfun$runStream$1(StreamExecution.scala:303)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at
org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:286)
at
org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:209)
Caused by: java.io.FileNotFoundException: No such file or directory
's3://app-util/hudi-bloom/multi-stream-105/cdc_test_db/dhdata_15/.hoodie/20221019152438682.commit'
at
com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.getFileStatus(S3NativeFileSystem.java:521)
at
com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:932)
at
com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.open(S3NativeFileSystem.java:924)
at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:906)
at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.open(EmrFileSystem.java:194)
at
org.apache.hudi.common.fs.HoodieWrapperFileSystem.open(HoodieWrapperFileSystem.java:460)
at
org.apache.hudi.common.table.timeline.HoodieActiveTimeline.readDataFromPath(HoodieActiveTimeline.java:758)
... 52 more
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]