[
https://issues.apache.org/jira/browse/HUDI-9305?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Y Ethan Guo updated HUDI-9305:
------------------------------
Description:
When enabling MDT while reading Hudi table on Databricks Spark runtime, hitting
the following issue (with "hoodie.metadata.enable" set to false, there is no
issue)
{code:java}
spark.read.format("org.apache.hudi.Spark3DefaultSource").option("hoodie.metadata.enable",
"true").load(tablePath) {code}
{code:java}
Error while reading file
s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet.
Caused by: FileReadException: Error while reading file
s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet.
Caused by: InconsistentReadException: The file might have been updated during
query execution. Ensure that no pipeline updates existing files during query
execution and try again.
Caused by: RemoteFileChangedException: open
`s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet':
Change reported by S3 during open at position 436157. File
s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet
at given modTime (1000) was unavailable, null at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:744)
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:713)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:922)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
at scala.util.Success.$anonfun$map$1(Try.scala:255)
at scala.util.Success.map(Try.scala:213)
at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$4(SparkThreadLocalForwardingThreadPoolExecutor.scala:91)
at
com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:45)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:90)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:67)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:131)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Caused by: com.databricks.common.filesystem.InconsistentReadException: The file
might have been updated during query execution. Ensure that no pipeline updates
existing files during query execution and try again.
at
com.databricks.common.filesystem.LokiS3AInputStream.withExceptionRewrites(LokiS3FS.scala:247)
at
com.databricks.common.filesystem.LokiS3AInputStream.read(LokiS3FS.scala:251)
at java.io.FilterInputStream.read(FilterInputStream.java:83)
at
com.databricks.spark.metrics.FSInputStreamWithMetrics.$anonfun$read$2(FileSystemWithMetrics.scala:83)
at
com.databricks.spark.metrics.FSInputStreamWithMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:67)
at
com.databricks.spark.metrics.FSInputStreamWithMetrics.read(FileSystemWithMetrics.scala:82)
at java.io.FilterInputStream.read(FilterInputStream.java:83)
at
org.apache.parquet.io.DelegatingSeekableInputStream.read(DelegatingSeekableInputStream.java:61)
at
org.apache.parquet.bytes.BytesUtils.readIntLittleEndian(BytesUtils.java:83)
at
org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:560)
at
org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:802)
at
org.apache.parquet.hadoop.ParquetFileReader.open(ParquetFileReader.java:670)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:84)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:71)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:66)
at
org.apache.spark.sql.execution.datasources.parquet.Spark35ParquetReader.doRead(Spark35ParquetReader.scala:101)
at
org.apache.spark.sql.execution.datasources.parquet.SparkParquetReaderBase.read(SparkParquetReaderBase.scala:81)
at
org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.readBaseFile(HoodieFileGroupReaderBasedParquetFileFormat.scala:274)
at
org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.$anonfun$buildReaderWithPartitionValues$3(HoodieFileGroupReaderBasedParquetFileFormat.scala:192)
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:628)
... 21 more
Caused by:
shaded.databricks.org.apache.hadoop.fs.s3a.RemoteFileChangedException: open
`s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet':
Change reported by S3 during open at position 436157. File
s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet
at given modTime (1000) was unavailable, null
at
shaded.databricks.org.apache.hadoop.fs.s3a.impl.ChangeTracker.processResponse(ChangeTracker.java:210)
at
shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:307)
at
shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$2(S3AInputStream.java:469)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$3(Invoker.java:247)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:134)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:128)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$5(Invoker.java:371)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:435)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:367)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:245)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:289)
at
shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.lazySeek(S3AInputStream.java:462)
at
shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:495)
at java.io.FilterInputStream.read(FilterInputStream.java:83)
at
com.databricks.common.filesystem.LokiS3AInputStream.$anonfun$read$1(LokiS3FS.scala:251)
at
scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23)
at
com.databricks.common.filesystem.LokiS3AInputStream.withExceptionRewrites(LokiS3FS.scala:244)
... 40 more
Driver stacktrace:
at
org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:3874)
at
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:3796)
at
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:3783)
at
scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:3783)
at
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1661)
at
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1646)
at scala.Option.foreach(Option.scala:407)
at
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1646)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:4120)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:4032)
at
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:4020)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:54)
at
org.apache.spark.scheduler.DAGScheduler.$anonfun$runJob$1(DAGScheduler.scala:1323)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
at
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1311)
at org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:3082)
at
org.apache.spark.sql.execution.collect.Collector.$anonfun$runSparkJobs$1(Collector.scala:355)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
at
org.apache.spark.sql.execution.collect.Collector.runSparkJobs(Collector.scala:299)
at
org.apache.spark.sql.execution.collect.Collector.$anonfun$collect$1(Collector.scala:384)
at
com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
at
org.apache.spark.sql.execution.collect.Collector.collect(Collector.scala:381)
at
org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:122)
at
org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:131)
at
org.apache.spark.sql.execution.qrc.InternalRowFormat$.collect(cachedSparkResults.scala:94)
at
org.apache.spark.sql.execution.qrc.InternalRowFormat$.collect(cachedSparkResults.scala:90)
at
org.apache.spark.sql.execution.qrc.InternalRowFormat$.collect(cachedSparkResults.scala:78)
at
org.apache.spark.sql.execution.qrc.ResultCacheManager.$anonfun$computeResult$1(ResultCacheManager.scala:546)
at
com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
at
org.apache.spark.sql.execution.qrc.ResultCacheManager.collectResult$1(ResultCacheManager.scala:540)
at
org.apache.spark.sql.execution.qrc.ResultCacheManager.computeResult(ResultCacheManager.scala:557)
at
org.apache.spark.sql.execution.qrc.ResultCacheManager.$anonfun$getOrComputeResultInternal$1(ResultCacheManager.scala:400)
at scala.Option.getOrElse(Option.scala:189)
at
org.apache.spark.sql.execution.qrc.ResultCacheManager.getOrComputeResultInternal(ResultCacheManager.scala:400)
at
org.apache.spark.sql.execution.qrc.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:318)
at
org.apache.spark.sql.execution.SparkPlan.$anonfun$executeCollectResult$1(SparkPlan.scala:558)
at
com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
at
org.apache.spark.sql.execution.SparkPlan.executeCollectResult(SparkPlan.scala:555)
at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:3780)
at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:4736)
at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:3488)
at
org.apache.spark.sql.Dataset.$anonfun$withAction$3(Dataset.scala:4727)
at
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:1117)
at
org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:4725)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$9(SQLExecution.scala:406)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:716)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:278)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:1175)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:165)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:653)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4725)
at org.apache.spark.sql.Dataset.head(Dataset.scala:3488)
at org.apache.spark.sql.Dataset.take(Dataset.scala:3711)
at org.apache.spark.sql.Dataset.getRows(Dataset.scala:349)
at org.apache.spark.sql.Dataset.showString(Dataset.scala:385)
at org.apache.spark.sql.Dataset.show(Dataset.scala:932)
at org.apache.spark.sql.Dataset.show(Dataset.scala:909)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:5)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:82)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:84)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:86)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:88)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:90)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:92)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:94)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:96)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:98)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:100)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:102)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:104)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:106)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:108)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:110)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:112)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:114)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:116)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw.<init>(command-2662702539138025:118)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw.<init>(command-2662702539138025:120)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw.<init>(command-2662702539138025:122)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read.<init>(command-2662702539138025:124)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$.<init>(command-2662702539138025:128)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$read$.<clinit>(command-2662702539138025)
at
$line5d8afe6fc6d24b609a51c897bb67564584.$eval$.$print$lzycompute(<notebook>:7)
at $line5d8afe6fc6d24b609a51c897bb67564584.$eval$.$print(<notebook>:6)
at $line5d8afe6fc6d24b609a51c897bb67564584.$eval.$print(<notebook>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)
at
scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1020)
at
scala.tools.nsc.interpreter.IMain.$anonfun$interpret$1(IMain.scala:568)
at
scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:36)
at
scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:116)
at
scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:41)
at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:567)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:594)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:564)
at
com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:201)
at
com.databricks.backend.daemon.driver.ScalaDriverLocal.$anonfun$repl$3(ScalaDriverLocal.scala:268)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.threadSafeTrapExit(DriverLocal.scala:1624)
at
com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:1582)
at
com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:1506)
at
com.databricks.backend.daemon.driver.ScalaDriverLocal.executeCommand$1(ScalaDriverLocal.scala:268)
at
com.databricks.backend.daemon.driver.ScalaDriverLocal.$anonfun$repl$2(ScalaDriverLocal.scala:238)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at scala.Console$.withErr(Console.scala:196)
at
com.databricks.backend.daemon.driver.ScalaDriverLocal.$anonfun$repl$1(ScalaDriverLocal.scala:235)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at scala.Console$.withOut(Console.scala:167)
at
com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:235)
at
com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$33(DriverLocal.scala:1146)
at
com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:45)
at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:104)
at
com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$28(DriverLocal.scala:1137)
at
com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:426)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at
com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:216)
at
com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:424)
at
com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:418)
at
com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:99)
at
com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:472)
at
com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:455)
at
com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:99)
at
com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$1(DriverLocal.scala:1073)
at
com.databricks.backend.daemon.driver.DriverLocal$.$anonfun$maybeSynchronizeExecution$4(DriverLocal.scala:1534)
at
com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:750)
at
com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$2(DriverWrapper.scala:786)
at scala.util.Try$.apply(Try.scala:213)
at
com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$1(DriverWrapper.scala:778)
at
com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$3(DriverWrapper.scala:818)
at
com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:669)
at
com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:687)
at
com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:426)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at
com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:216)
at
com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:424)
at
com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:418)
at
com.databricks.backend.daemon.driver.DriverWrapper.withAttributionContext(DriverWrapper.scala:72)
at
com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:472)
at
com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:455)
at
com.databricks.backend.daemon.driver.DriverWrapper.withAttributionTags(DriverWrapper.scala:72)
at
com.databricks.logging.UsageLogging.recordOperationWithResultTags(UsageLogging.scala:664)
at
com.databricks.logging.UsageLogging.recordOperationWithResultTags$(UsageLogging.scala:582)
at
com.databricks.backend.daemon.driver.DriverWrapper.recordOperationWithResultTags(DriverWrapper.scala:72)
at
com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:818)
at
com.databricks.backend.daemon.driver.DriverWrapper.executeCommandAndGetError(DriverWrapper.scala:685)
at
com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:730)
at
com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$runInnerLoop$1(DriverWrapper.scala:560)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:426)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at
com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:216)
at
com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:424)
at
com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:418)
at
com.databricks.backend.daemon.driver.DriverWrapper.withAttributionContext(DriverWrapper.scala:72)
at
com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:560)
at
com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:482)
at
com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:290)
at java.lang.Thread.run(Thread.java:750)
Caused by: com.databricks.sql.io.FileReadException: Error while reading file
s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet.
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:744)
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:713)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:922)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
at scala.util.Success.$anonfun$map$1(Try.scala:255)
at scala.util.Success.map(Try.scala:213)
at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$4(SparkThreadLocalForwardingThreadPoolExecutor.scala:91)
at
com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:45)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:90)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:67)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:131)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Caused by: com.databricks.common.filesystem.InconsistentReadException: The file
might have been updated during query execution. Ensure that no pipeline updates
existing files during query execution and try again.
at
com.databricks.common.filesystem.LokiS3AInputStream.withExceptionRewrites(LokiS3FS.scala:247)
at
com.databricks.common.filesystem.LokiS3AInputStream.read(LokiS3FS.scala:251)
at java.io.FilterInputStream.read(FilterInputStream.java:83)
at
com.databricks.spark.metrics.FSInputStreamWithMetrics.$anonfun$read$2(FileSystemWithMetrics.scala:83)
at
com.databricks.spark.metrics.FSInputStreamWithMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:67)
at
com.databricks.spark.metrics.FSInputStreamWithMetrics.read(FileSystemWithMetrics.scala:82)
at java.io.FilterInputStream.read(FilterInputStream.java:83)
at
org.apache.parquet.io.DelegatingSeekableInputStream.read(DelegatingSeekableInputStream.java:61)
at
org.apache.parquet.bytes.BytesUtils.readIntLittleEndian(BytesUtils.java:83)
at
org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:560)
at
org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:802)
at
org.apache.parquet.hadoop.ParquetFileReader.open(ParquetFileReader.java:670)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:84)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:71)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:66)
at
org.apache.spark.sql.execution.datasources.parquet.Spark35ParquetReader.doRead(Spark35ParquetReader.scala:101)
at
org.apache.spark.sql.execution.datasources.parquet.SparkParquetReaderBase.read(SparkParquetReaderBase.scala:81)
at
org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.readBaseFile(HoodieFileGroupReaderBasedParquetFileFormat.scala:274)
at
org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.$anonfun$buildReaderWithPartitionValues$3(HoodieFileGroupReaderBasedParquetFileFormat.scala:192)
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:628)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:922)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
at scala.util.Success.$anonfun$map$1(Try.scala:255)
at scala.util.Success.map(Try.scala:213)
at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$4(SparkThreadLocalForwardingThreadPoolExecutor.scala:91)
at
com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:45)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:90)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:67)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:131)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Caused by:
shaded.databricks.org.apache.hadoop.fs.s3a.RemoteFileChangedException: open
`s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet':
Change reported by S3 during open at position 436157. File
s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet
at given modTime (1000) was unavailable, null
at
shaded.databricks.org.apache.hadoop.fs.s3a.impl.ChangeTracker.processResponse(ChangeTracker.java:210)
at
shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:307)
at
shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$2(S3AInputStream.java:469)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$3(Invoker.java:247)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:134)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:128)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$5(Invoker.java:371)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:435)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:367)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:245)
at
shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:289)
at
shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.lazySeek(S3AInputStream.java:462)
at
shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:495)
at java.io.FilterInputStream.read(FilterInputStream.java:83)
at
com.databricks.common.filesystem.LokiS3AInputStream.$anonfun$read$1(LokiS3FS.scala:251)
at
scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23)
at
com.databricks.common.filesystem.LokiS3AInputStream.withExceptionRewrites(LokiS3FS.scala:244)
at
com.databricks.common.filesystem.LokiS3AInputStream.read(LokiS3FS.scala:251)
at java.io.FilterInputStream.read(FilterInputStream.java:83)
at
com.databricks.spark.metrics.FSInputStreamWithMetrics.$anonfun$read$2(FileSystemWithMetrics.scala:83)
at
com.databricks.spark.metrics.FSInputStreamWithMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:67)
at
com.databricks.spark.metrics.FSInputStreamWithMetrics.read(FileSystemWithMetrics.scala:82)
at java.io.FilterInputStream.read(FilterInputStream.java:83)
at
org.apache.parquet.io.DelegatingSeekableInputStream.read(DelegatingSeekableInputStream.java:61)
at
org.apache.parquet.bytes.BytesUtils.readIntLittleEndian(BytesUtils.java:83)
at
org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:560)
at
org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:802)
at
org.apache.parquet.hadoop.ParquetFileReader.open(ParquetFileReader.java:670)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:84)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:71)
at
org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:66)
at
org.apache.spark.sql.execution.datasources.parquet.Spark35ParquetReader.doRead(Spark35ParquetReader.scala:101)
at
org.apache.spark.sql.execution.datasources.parquet.SparkParquetReaderBase.read(SparkParquetReaderBase.scala:81)
at
org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.readBaseFile(HoodieFileGroupReaderBasedParquetFileFormat.scala:274)
at
org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.$anonfun$buildReaderWithPartitionValues$3(HoodieFileGroupReaderBasedParquetFileFormat.scala:192)
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:628)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
at
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:922)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
at scala.util.Success.$anonfun$map$1(Try.scala:255)
at scala.util.Success.map(Try.scala:213)
at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$4(SparkThreadLocalForwardingThreadPoolExecutor.scala:91)
at
com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:45)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:90)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:67)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:131)
at
org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750){code}
> Fix Hudi table read with MDT on Databricks Spark runtime
> --------------------------------------------------------
>
> Key: HUDI-9305
> URL: https://issues.apache.org/jira/browse/HUDI-9305
> Project: Apache Hudi
> Issue Type: Improvement
> Reporter: Y Ethan Guo
> Priority: Major
> Fix For: 1.1.0
>
>
> When enabling MDT while reading Hudi table on Databricks Spark runtime,
> hitting the following issue (with "hoodie.metadata.enable" set to false,
> there is no issue)
> {code:java}
> spark.read.format("org.apache.hudi.Spark3DefaultSource").option("hoodie.metadata.enable",
> "true").load(tablePath) {code}
>
> {code:java}
> Error while reading file
> s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet.
> Caused by: FileReadException: Error while reading file
> s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet.
> Caused by: InconsistentReadException: The file might have been updated during
> query execution. Ensure that no pipeline updates existing files during query
> execution and try again.
> Caused by: RemoteFileChangedException: open
> `s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet':
> Change reported by S3 during open at position 436157. File
> s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet
> at given modTime (1000) was unavailable, null at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:744)
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:713)
> at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:922)
> at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
> at scala.util.Success.$anonfun$map$1(Try.scala:255)
> at scala.util.Success.map(Try.scala:213)
> at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
> at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
> at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
> at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
> at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> at
> com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$4(SparkThreadLocalForwardingThreadPoolExecutor.scala:91)
> at
> com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:45)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:90)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:67)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:131)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:750)
> Caused by: com.databricks.common.filesystem.InconsistentReadException: The
> file might have been updated during query execution. Ensure that no pipeline
> updates existing files during query execution and try again.
> at
> com.databricks.common.filesystem.LokiS3AInputStream.withExceptionRewrites(LokiS3FS.scala:247)
> at
> com.databricks.common.filesystem.LokiS3AInputStream.read(LokiS3FS.scala:251)
> at java.io.FilterInputStream.read(FilterInputStream.java:83)
> at
> com.databricks.spark.metrics.FSInputStreamWithMetrics.$anonfun$read$2(FileSystemWithMetrics.scala:83)
> at
> com.databricks.spark.metrics.FSInputStreamWithMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:67)
> at
> com.databricks.spark.metrics.FSInputStreamWithMetrics.read(FileSystemWithMetrics.scala:82)
> at java.io.FilterInputStream.read(FilterInputStream.java:83)
> at
> org.apache.parquet.io.DelegatingSeekableInputStream.read(DelegatingSeekableInputStream.java:61)
> at
> org.apache.parquet.bytes.BytesUtils.readIntLittleEndian(BytesUtils.java:83)
> at
> org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:560)
> at
> org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:802)
> at
> org.apache.parquet.hadoop.ParquetFileReader.open(ParquetFileReader.java:670)
> at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:84)
> at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:71)
> at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:66)
> at
> org.apache.spark.sql.execution.datasources.parquet.Spark35ParquetReader.doRead(Spark35ParquetReader.scala:101)
> at
> org.apache.spark.sql.execution.datasources.parquet.SparkParquetReaderBase.read(SparkParquetReaderBase.scala:81)
> at
> org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.readBaseFile(HoodieFileGroupReaderBasedParquetFileFormat.scala:274)
> at
> org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.$anonfun$buildReaderWithPartitionValues$3(HoodieFileGroupReaderBasedParquetFileFormat.scala:192)
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:628)
> ... 21 more
> Caused by:
> shaded.databricks.org.apache.hadoop.fs.s3a.RemoteFileChangedException: open
> `s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet':
> Change reported by S3 during open at position 436157. File
> s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet
> at given modTime (1000) was unavailable, null
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.impl.ChangeTracker.processResponse(ChangeTracker.java:210)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:307)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$2(S3AInputStream.java:469)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$3(Invoker.java:247)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:134)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:128)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$5(Invoker.java:371)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:435)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:367)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:245)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:289)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.lazySeek(S3AInputStream.java:462)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:495)
> at java.io.FilterInputStream.read(FilterInputStream.java:83)
> at
> com.databricks.common.filesystem.LokiS3AInputStream.$anonfun$read$1(LokiS3FS.scala:251)
> at
> scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23)
> at
> com.databricks.common.filesystem.LokiS3AInputStream.withExceptionRewrites(LokiS3FS.scala:244)
> ... 40 more
> Driver stacktrace:
> at
> org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:3874)
> at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:3796)
> at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:3783)
> at
> scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
> at
> scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
> at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:3783)
> at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1661)
> at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1646)
> at scala.Option.foreach(Option.scala:407)
> at
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1646)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:4120)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:4032)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:4020)
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:54)
> at
> org.apache.spark.scheduler.DAGScheduler.$anonfun$runJob$1(DAGScheduler.scala:1323)
> at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> at
> com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
> at
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1311)
> at org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:3082)
> at
> org.apache.spark.sql.execution.collect.Collector.$anonfun$runSparkJobs$1(Collector.scala:355)
> at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> at
> com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
> at
> org.apache.spark.sql.execution.collect.Collector.runSparkJobs(Collector.scala:299)
> at
> org.apache.spark.sql.execution.collect.Collector.$anonfun$collect$1(Collector.scala:384)
> at
> com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
> at
> org.apache.spark.sql.execution.collect.Collector.collect(Collector.scala:381)
> at
> org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:122)
> at
> org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:131)
> at
> org.apache.spark.sql.execution.qrc.InternalRowFormat$.collect(cachedSparkResults.scala:94)
> at
> org.apache.spark.sql.execution.qrc.InternalRowFormat$.collect(cachedSparkResults.scala:90)
> at
> org.apache.spark.sql.execution.qrc.InternalRowFormat$.collect(cachedSparkResults.scala:78)
> at
> org.apache.spark.sql.execution.qrc.ResultCacheManager.$anonfun$computeResult$1(ResultCacheManager.scala:546)
> at
> com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
> at
> org.apache.spark.sql.execution.qrc.ResultCacheManager.collectResult$1(ResultCacheManager.scala:540)
> at
> org.apache.spark.sql.execution.qrc.ResultCacheManager.computeResult(ResultCacheManager.scala:557)
> at
> org.apache.spark.sql.execution.qrc.ResultCacheManager.$anonfun$getOrComputeResultInternal$1(ResultCacheManager.scala:400)
> at scala.Option.getOrElse(Option.scala:189)
> at
> org.apache.spark.sql.execution.qrc.ResultCacheManager.getOrComputeResultInternal(ResultCacheManager.scala:400)
> at
> org.apache.spark.sql.execution.qrc.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:318)
> at
> org.apache.spark.sql.execution.SparkPlan.$anonfun$executeCollectResult$1(SparkPlan.scala:558)
> at
> com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
> at
> org.apache.spark.sql.execution.SparkPlan.executeCollectResult(SparkPlan.scala:555)
> at org.apache.spark.sql.Dataset.collectResult(Dataset.scala:3780)
> at org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:4736)
> at org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:3488)
> at
> org.apache.spark.sql.Dataset.$anonfun$withAction$3(Dataset.scala:4727)
> at
> org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:1117)
> at
> org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:4725)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$9(SQLExecution.scala:406)
> at
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:716)
> at
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:278)
> at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:1175)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId0(SQLExecution.scala:165)
> at
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:653)
> at org.apache.spark.sql.Dataset.withAction(Dataset.scala:4725)
> at org.apache.spark.sql.Dataset.head(Dataset.scala:3488)
> at org.apache.spark.sql.Dataset.take(Dataset.scala:3711)
> at org.apache.spark.sql.Dataset.getRows(Dataset.scala:349)
> at org.apache.spark.sql.Dataset.showString(Dataset.scala:385)
> at org.apache.spark.sql.Dataset.show(Dataset.scala:932)
> at org.apache.spark.sql.Dataset.show(Dataset.scala:909)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:5)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:82)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:84)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:86)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:88)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:90)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:92)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:94)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:96)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:98)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:100)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:102)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:104)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:106)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:108)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:110)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:112)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:114)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw$$iw.<init>(command-2662702539138025:116)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw$$iw.<init>(command-2662702539138025:118)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw$$iw.<init>(command-2662702539138025:120)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$$iw.<init>(command-2662702539138025:122)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read.<init>(command-2662702539138025:124)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$.<init>(command-2662702539138025:128)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$read$.<clinit>(command-2662702539138025)
> at
> $line5d8afe6fc6d24b609a51c897bb67564584.$eval$.$print$lzycompute(<notebook>:7)
> at $line5d8afe6fc6d24b609a51c897bb67564584.$eval$.$print(<notebook>:6)
> at $line5d8afe6fc6d24b609a51c897bb67564584.$eval.$print(<notebook>)
> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> at
> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> at
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> at java.lang.reflect.Method.invoke(Method.java:498)
> at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)
> at
> scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1020)
> at
> scala.tools.nsc.interpreter.IMain.$anonfun$interpret$1(IMain.scala:568)
> at
> scala.reflect.internal.util.ScalaClassLoader.asContext(ScalaClassLoader.scala:36)
> at
> scala.reflect.internal.util.ScalaClassLoader.asContext$(ScalaClassLoader.scala:116)
> at
> scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:41)
> at scala.tools.nsc.interpreter.IMain.loadAndRunReq$1(IMain.scala:567)
> at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:594)
> at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:564)
> at
> com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:201)
> at
> com.databricks.backend.daemon.driver.ScalaDriverLocal.$anonfun$repl$3(ScalaDriverLocal.scala:268)
> at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> at
> com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.threadSafeTrapExit(DriverLocal.scala:1624)
> at
> com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:1582)
> at
> com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:1506)
> at
> com.databricks.backend.daemon.driver.ScalaDriverLocal.executeCommand$1(ScalaDriverLocal.scala:268)
> at
> com.databricks.backend.daemon.driver.ScalaDriverLocal.$anonfun$repl$2(ScalaDriverLocal.scala:238)
> at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
> at scala.Console$.withErr(Console.scala:196)
> at
> com.databricks.backend.daemon.driver.ScalaDriverLocal.$anonfun$repl$1(ScalaDriverLocal.scala:235)
> at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
> at scala.Console$.withOut(Console.scala:167)
> at
> com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:235)
> at
> com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$33(DriverLocal.scala:1146)
> at
> com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:45)
> at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:104)
> at
> com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$28(DriverLocal.scala:1137)
> at
> com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:426)
> at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
> at
> com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:216)
> at
> com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:424)
> at
> com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:418)
> at
> com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:99)
> at
> com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:472)
> at
> com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:455)
> at
> com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:99)
> at
> com.databricks.backend.daemon.driver.DriverLocal.$anonfun$execute$1(DriverLocal.scala:1073)
> at
> com.databricks.backend.daemon.driver.DriverLocal$.$anonfun$maybeSynchronizeExecution$4(DriverLocal.scala:1534)
> at
> com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:750)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$2(DriverWrapper.scala:786)
> at scala.util.Try$.apply(Try.scala:213)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$1(DriverWrapper.scala:778)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$tryExecutingCommand$3(DriverWrapper.scala:818)
> at
> com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:669)
> at
> com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:687)
> at
> com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:426)
> at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
> at
> com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:216)
> at
> com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:424)
> at
> com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:418)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.withAttributionContext(DriverWrapper.scala:72)
> at
> com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:472)
> at
> com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:455)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.withAttributionTags(DriverWrapper.scala:72)
> at
> com.databricks.logging.UsageLogging.recordOperationWithResultTags(UsageLogging.scala:664)
> at
> com.databricks.logging.UsageLogging.recordOperationWithResultTags$(UsageLogging.scala:582)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.recordOperationWithResultTags(DriverWrapper.scala:72)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:818)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.executeCommandAndGetError(DriverWrapper.scala:685)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:730)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.$anonfun$runInnerLoop$1(DriverWrapper.scala:560)
> at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> at
> com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:426)
> at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
> at
> com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:216)
> at
> com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:424)
> at
> com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:418)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.withAttributionContext(DriverWrapper.scala:72)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:560)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:482)
> at
> com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:290)
> at java.lang.Thread.run(Thread.java:750)
> Caused by: com.databricks.sql.io.FileReadException: Error while reading file
> s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet.
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:744)
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:713)
> at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:922)
> at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
> at scala.util.Success.$anonfun$map$1(Try.scala:255)
> at scala.util.Success.map(Try.scala:213)
> at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
> at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
> at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
> at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
> at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> at
> com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$4(SparkThreadLocalForwardingThreadPoolExecutor.scala:91)
> at
> com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:45)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:90)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:67)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:131)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:750)
> Caused by: com.databricks.common.filesystem.InconsistentReadException: The
> file might have been updated during query execution. Ensure that no pipeline
> updates existing files during query execution and try again.
> at
> com.databricks.common.filesystem.LokiS3AInputStream.withExceptionRewrites(LokiS3FS.scala:247)
> at
> com.databricks.common.filesystem.LokiS3AInputStream.read(LokiS3FS.scala:251)
> at java.io.FilterInputStream.read(FilterInputStream.java:83)
> at
> com.databricks.spark.metrics.FSInputStreamWithMetrics.$anonfun$read$2(FileSystemWithMetrics.scala:83)
> at
> com.databricks.spark.metrics.FSInputStreamWithMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:67)
> at
> com.databricks.spark.metrics.FSInputStreamWithMetrics.read(FileSystemWithMetrics.scala:82)
> at java.io.FilterInputStream.read(FilterInputStream.java:83)
> at
> org.apache.parquet.io.DelegatingSeekableInputStream.read(DelegatingSeekableInputStream.java:61)
> at
> org.apache.parquet.bytes.BytesUtils.readIntLittleEndian(BytesUtils.java:83)
> at
> org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:560)
> at
> org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:802)
> at
> org.apache.parquet.hadoop.ParquetFileReader.open(ParquetFileReader.java:670)
> at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:84)
> at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:71)
> at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:66)
> at
> org.apache.spark.sql.execution.datasources.parquet.Spark35ParquetReader.doRead(Spark35ParquetReader.scala:101)
> at
> org.apache.spark.sql.execution.datasources.parquet.SparkParquetReaderBase.read(SparkParquetReaderBase.scala:81)
> at
> org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.readBaseFile(HoodieFileGroupReaderBasedParquetFileFormat.scala:274)
> at
> org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.$anonfun$buildReaderWithPartitionValues$3(HoodieFileGroupReaderBasedParquetFileFormat.scala:192)
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:628)
> at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:922)
> at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
> at scala.util.Success.$anonfun$map$1(Try.scala:255)
> at scala.util.Success.map(Try.scala:213)
> at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
> at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
> at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
> at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
> at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> at
> com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$4(SparkThreadLocalForwardingThreadPoolExecutor.scala:91)
> at
> com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:45)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:90)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:67)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:131)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:750)
> Caused by:
> shaded.databricks.org.apache.hadoop.fs.s3a.RemoteFileChangedException: open
> `s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet':
> Change reported by S3 during open at position 436157. File
> s3a://dbr-test/hudi_cow/san_francisco/01a40093-7eeb-4fbb-a127-2b5de8944d36-0_2-78-0_20250411000608490.parquet
> at given modTime (1000) was unavailable, null
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.impl.ChangeTracker.processResponse(ChangeTracker.java:210)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:307)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$2(S3AInputStream.java:469)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$3(Invoker.java:247)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:134)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:128)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$5(Invoker.java:371)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:435)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:367)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:245)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:289)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.lazySeek(S3AInputStream.java:462)
> at
> shaded.databricks.org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:495)
> at java.io.FilterInputStream.read(FilterInputStream.java:83)
> at
> com.databricks.common.filesystem.LokiS3AInputStream.$anonfun$read$1(LokiS3FS.scala:251)
> at
> scala.runtime.java8.JFunction0$mcI$sp.apply(JFunction0$mcI$sp.java:23)
> at
> com.databricks.common.filesystem.LokiS3AInputStream.withExceptionRewrites(LokiS3FS.scala:244)
> at
> com.databricks.common.filesystem.LokiS3AInputStream.read(LokiS3FS.scala:251)
> at java.io.FilterInputStream.read(FilterInputStream.java:83)
> at
> com.databricks.spark.metrics.FSInputStreamWithMetrics.$anonfun$read$2(FileSystemWithMetrics.scala:83)
> at
> com.databricks.spark.metrics.FSInputStreamWithMetrics.withTimeAndBytesReadMetric(FileSystemWithMetrics.scala:67)
> at
> com.databricks.spark.metrics.FSInputStreamWithMetrics.read(FileSystemWithMetrics.scala:82)
> at java.io.FilterInputStream.read(FilterInputStream.java:83)
> at
> org.apache.parquet.io.DelegatingSeekableInputStream.read(DelegatingSeekableInputStream.java:61)
> at
> org.apache.parquet.bytes.BytesUtils.readIntLittleEndian(BytesUtils.java:83)
> at
> org.apache.parquet.hadoop.ParquetFileReader.readFooter(ParquetFileReader.java:560)
> at
> org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:802)
> at
> org.apache.parquet.hadoop.ParquetFileReader.open(ParquetFileReader.java:670)
> at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:84)
> at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:71)
> at
> org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader.readFooter(ParquetFooterReader.java:66)
> at
> org.apache.spark.sql.execution.datasources.parquet.Spark35ParquetReader.doRead(Spark35ParquetReader.scala:101)
> at
> org.apache.spark.sql.execution.datasources.parquet.SparkParquetReaderBase.read(SparkParquetReaderBase.scala:81)
> at
> org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.readBaseFile(HoodieFileGroupReaderBasedParquetFileFormat.scala:274)
> at
> org.apache.spark.sql.execution.datasources.parquet.HoodieFileGroupReaderBasedParquetFileFormat.$anonfun$buildReaderWithPartitionValues$3(HoodieFileGroupReaderBasedParquetFileFormat.scala:192)
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:628)
> at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
> at
> org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:922)
> at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
> at scala.util.Success.$anonfun$map$1(Try.scala:255)
> at scala.util.Success.map(Try.scala:213)
> at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
> at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
> at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
> at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
> at
> scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
> at
> com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$4(SparkThreadLocalForwardingThreadPoolExecutor.scala:91)
> at
> com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:45)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:90)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:67)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:131)
> at
> org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:134)
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at java.lang.Thread.run(Thread.java:750){code}
>
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)