[
https://issues.apache.org/jira/browse/FLINK-28249?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17558722#comment-17558722
]
Yanfei Lei edited comment on FLINK-28249 at 6/25/22 4:59 AM:
-------------------------------------------------------------
Hi Jie, this log is just a warning and does not affect usage.
{code:java}
2022-06-22 23:14:28,477 WARN
org.apache.flink.runtime.checkpoint.CheckpointSubsumeHelper [] - Fail to
subsume the old checkpoint.{code}
Can you check if
{noformat}
gs://flink-checkpoint-dev-bucket/device-logs/flink-checkpoint/822475f36e7a9a4f4048cb82791c55e2/chk-1/_metadata{noformat}
exists in your google storage bucket? If yes, you can delete it manually.
was (Author: yanfei lei):
{{Hi Jie, this log is just a warning and does not affect usage.}}
2022-06-22 23:14:28,477 WARN
org.apache.flink.runtime.checkpoint.CheckpointSubsumeHelper [] - Fail to
subsume the old checkpoint.
{{Can you check if
gs://flink-checkpoint-dev-bucket/device-logs/flink-checkpoint/822475f36e7a9a4f4048cb82791c55e2/chk-1/_metadata
exists in your google storage bucket? if yes, you can delete it manually.}}
> Flink can not delete older checkpoint folders from google storage bucket
> ------------------------------------------------------------------------
>
> Key: FLINK-28249
> URL: https://issues.apache.org/jira/browse/FLINK-28249
> Project: Flink
> Issue Type: Bug
> Components: FileSystems
> Affects Versions: 1.12.0
> Reporter: Jie Zhang
> Priority: Critical
>
> We are running flink 1.12 with this config:
> https://nightlies.apache.org/flink/flink-docs-release-1.12/deployment/filesystems/gcs.html#libraries
>
> It is able to checkpoint to google storage bucket, but it can NOT delete
> older checkpoint folders from google storage bucket.
>
> logs:
> {code:java}
> 2022-06-22 23:14:28,477 WARN
> org.apache.flink.runtime.checkpoint.CheckpointSubsumeHelper [] - Fail to
> subsume the old checkpoint.
> java.io.IOException: Error deleting
> 'gs://flink-checkpoint-dev-bucket/device-logs/flink-checkpoint/822475f36e7a9a4f4048cb82791c55e2/chk-1/_metadata',
> stage 2 with generation 1655939488397964
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl$4.onFailure(GoogleCloudStorageImpl.java:937)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.BatchHelper.execute(BatchHelper.java:184)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.BatchHelper.lambda$queue$0(BatchHelper.java:164)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:131)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:74)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:82)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.common.util.concurrent.MoreExecutors$DirectExecutorService.execute(MoreExecutors.java:323)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> java.util.concurrent.AbstractExecutorService.submit(AbstractExecutorService.java:134)
> ~[?:1.8.0_332]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.common.util.concurrent.AbstractListeningExecutorService.submit(AbstractListeningExecutorService.java:69)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.common.util.concurrent.AbstractListeningExecutorService.submit(AbstractListeningExecutorService.java:36)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.BatchHelper.queue(BatchHelper.java:162)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.queueSingleObjectDelete(GoogleCloudStorageImpl.java:960)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.deleteObjects(GoogleCloudStorageImpl.java:891)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem.deleteInternal(GoogleCloudStorageFileSystem.java:432)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem.delete(GoogleCloudStorageFileSystem.java:398)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.delete(GoogleHadoopFileSystemBase.java:821)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.delete(HadoopFileSystem.java:160)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.state.filesystem.FileStateHandle.discardState(FileStateHandle.java:85)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.checkpoint.CompletedCheckpoint.discard(CompletedCheckpoint.java:249)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.checkpoint.CompletedCheckpoint.discardOnSubsume(CompletedCheckpoint.java:220)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.checkpoint.CheckpointSubsumeHelper.subsume(CheckpointSubsumeHelper.java:63)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore.addCheckpoint(StandaloneCompletedCheckpointStore.java:73)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.checkpoint.CheckpointCoordinator.completePendingCheckpoint(CheckpointCoordinator.java:1211)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.checkpoint.CheckpointCoordinator.receiveAcknowledgeMessage(CheckpointCoordinator.java:1082)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.scheduler.SchedulerBase.lambda$acknowledgeCheckpoint$7(SchedulerBase.java:1042)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> [?:1.8.0_332]
> at java.util.concurrent.FutureTask.run(FutureTask.java:266) [?:1.8.0_332]
> at
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
> [?:1.8.0_332]
> at
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
> [?:1.8.0_332]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> [?:1.8.0_332]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> [?:1.8.0_332]
> at java.lang.Thread.run(Thread.java:750) [?:1.8.0_332]
> Suppressed: java.nio.file.DirectoryNotEmptyException: Cannot delete a
> non-empty directory.
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem.delete(GoogleCloudStorageFileSystem.java:387)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.delete(GoogleHadoopFileSystemBase.java:821)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> org.apache.flink.runtime.fs.hdfs.HadoopFileSystem.delete(HadoopFileSystem.java:160)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.state.filesystem.FsCompletedCheckpointStorageLocation.disposeStorageLocation(FsCompletedCheckpointStorageLocation.java:74)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.checkpoint.CompletedCheckpoint.discard(CompletedCheckpoint.java:263)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.checkpoint.CompletedCheckpoint.discardOnSubsume(CompletedCheckpoint.java:220)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.checkpoint.CheckpointSubsumeHelper.subsume(CheckpointSubsumeHelper.java:63)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.checkpoint.StandaloneCompletedCheckpointStore.addCheckpoint(StandaloneCompletedCheckpointStore.java:73)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.checkpoint.CheckpointCoordinator.completePendingCheckpoint(CheckpointCoordinator.java:1211)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.checkpoint.CheckpointCoordinator.receiveAcknowledgeMessage(CheckpointCoordinator.java:1082)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> org.apache.flink.runtime.scheduler.SchedulerBase.lambda$acknowledgeCheckpoint$7(SchedulerBase.java:1042)
> ~[flink-dist_2.11-1.12.7.jar:1.12.7]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> [?:1.8.0_332]
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> [?:1.8.0_332]
> at
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
> [?:1.8.0_332]
> at
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
> [?:1.8.0_332]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> [?:1.8.0_332]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> [?:1.8.0_332]
> at java.lang.Thread.run(Thread.java:750) [?:1.8.0_332]
> Caused by:
> com.google.cloud.hadoop.repackaged.gcs.com.google.api.client.googleapis.json.GoogleJsonResponseException
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageExceptions.createJsonResponseException(GoogleCloudStorageExceptions.java:89)
> ~[gcs-connector-latest-hadoop2.jar:?]
> at
> com.google.cloud.hadoop.repackaged.gcs.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl$4.onFailure(GoogleCloudStorageImpl.java:917)
> ~[gcs-connector-latest-hadoop2.jar:?]
> ... 31 more
> 2022-06-22 23:14:28,487 INFO
> org.apache.flink.runtime.checkpoint.CheckpointCoordinator [] - Completed
> checkpoint 4 for job 822475f36e7a9a4f4048cb82791c55e2 (46652 bytes in 527
> ms). {code}
>
>
--
This message was sent by Atlassian Jira
(v8.20.7#820007)