[
https://issues.apache.org/jira/browse/FLINK-31106?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
xy updated FLINK-31106:
-----------------------
Description:
JsonResponseHistoryServerArchivist would archivist data on FileSystem when
TerminalState is not GLOBALLY. cause FileAlreadyExistsException like
https://issues.apache.org/jira/browse/FLINK-24232
exception as:
INFO org.apache.flink.runtime.dispatcher.MiniDispatcher [] - Could not archive
completed job
ctdb_dw_push_ivideo_send_link_monitor_hi_prd(70f90a6c7bb2490d203f6c0d1818708d)
to the history server. java.util.concurrent.CompletionException:
java.lang.RuntimeException: org.apache.hadoop.fs.FileAlreadyExistsException:
/flink/completed-jobs/70f90a6c7bb2490d203f6c0d1818708d for client
xxx.xxx.xxx.xxx already exists at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2967)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2856)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2741)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:620)
at
org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:115)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:412)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) at
org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2281) at
org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2277) at
java.security.AccessController.doPrivileged(Native Method) at
javax.security.auth.Subject.doAs(Subject.java:422) at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2275) at
java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273)
~[?:1.8.0_192] at
java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280)
[?:1.8.0_192] at
java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1629)
[?:1.8.0_192] at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[?:1.8.0_192] at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[?:1.8.0_192] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_192] Caused by:
java.lang.RuntimeException: org.apache.hadoop.fs.FileAlreadyExistsException:
/flink/completed-jobs/70f90a6c7bb2490d203f6c0d1818708d for client 10.194.100.17
already exists at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2967)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2856)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2741)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:620)
at
org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:115)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:412)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) at
org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2281) at
org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2277) at
java.security.AccessController.doPrivileged(Native Method) at
javax.security.auth.Subject.doAs(Subject.java:422) at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2275) at
org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:316)
~[flink-dist_2.11-1.13.2.vivo-SNAPSHOT.jar:1.13.2.vivo-SNAPSHOT] at
org.apache.flink.util.function.ThrowingRunnable.lambda$unchecked$0(ThrowingRunnable.java:51)
~[flink-dist_2.11-1.13.2.vivo-SNAPSHOT.jar:1.13.2.vivo-SNAPSHOT] at
java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1626)
~[?:1.8.0_192] ... 3 more Caused by:
org.apache.hadoop.fs.FileAlreadyExistsException:
/flink/completed-jobs/70f90a6c7bb2490d203f6c0d1818708d for client
xxx.xxx.xxx.xxx already exists at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2967)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2856)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2741)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:620)
at
org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:115)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:412)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) at
org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2281) at
org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2277) at
java.security.AccessController.doPrivileged(Native Method) at
javax.security.auth.Subject.doAs(Subject.java:422) at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2275)
was:
JsonResponseHistoryServerArchivist would archivist data on FileSystem when
TerminalState is not GLOBALLY. cause FileAlreadyExistsException like
https://issues.apache.org/jira/browse/FLINK-24232
exception as:
INFO org.apache.flink.runtime.dispatcher.MiniDispatcher [] - Could not archive
completed job
ctdb_dw_push_ivideo_send_link_monitor_hi_prd(70f90a6c7bb2490d203f6c0d1818708d)
to the history server. java.util.concurrent.CompletionException:
java.lang.RuntimeException: org.apache.hadoop.fs.FileAlreadyExistsException:
/flink/completed-jobs/70f90a6c7bb2490d203f6c0d1818708d for client 10.194.100.17
already exists at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2967)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2856)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2741)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:620)
at
org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:115)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:412)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) at
org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2281) at
org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2277) at
java.security.AccessController.doPrivileged(Native Method) at
javax.security.auth.Subject.doAs(Subject.java:422) at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2275) at
java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273)
~[?:1.8.0_192] at
java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280)
[?:1.8.0_192] at
java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1629)
[?:1.8.0_192] at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[?:1.8.0_192] at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[?:1.8.0_192] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_192] Caused by:
java.lang.RuntimeException: org.apache.hadoop.fs.FileAlreadyExistsException:
/flink/completed-jobs/70f90a6c7bb2490d203f6c0d1818708d for client 10.194.100.17
already exists at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2967)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2856)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2741)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:620)
at
org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:115)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:412)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) at
org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2281) at
org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2277) at
java.security.AccessController.doPrivileged(Native Method) at
javax.security.auth.Subject.doAs(Subject.java:422) at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2275) at
org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:316)
~[flink-dist_2.11-1.13.2.vivo-SNAPSHOT.jar:1.13.2.vivo-SNAPSHOT] at
org.apache.flink.util.function.ThrowingRunnable.lambda$unchecked$0(ThrowingRunnable.java:51)
~[flink-dist_2.11-1.13.2.vivo-SNAPSHOT.jar:1.13.2.vivo-SNAPSHOT] at
java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1626)
~[?:1.8.0_192] ... 3 more Caused by:
org.apache.hadoop.fs.FileAlreadyExistsException:
/flink/completed-jobs/70f90a6c7bb2490d203f6c0d1818708d for client 10.194.100.17
already exists at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2967)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2856)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2741)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:620)
at
org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:115)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:412)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) at
org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2281) at
org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2277) at
java.security.AccessController.doPrivileged(Native Method) at
javax.security.auth.Subject.doAs(Subject.java:422) at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2275)
> Skip history server archiving for suspended jobs on
> JsonResponseHistoryServerArchivist
> --------------------------------------------------------------------------------------
>
> Key: FLINK-31106
> URL: https://issues.apache.org/jira/browse/FLINK-31106
> Project: Flink
> Issue Type: Bug
> Components: Runtime / State Backends
> Affects Versions: 1.14.5, 1.15.1, 1.16.1
> Reporter: xy
> Priority: Major
> Labels: pull-request-available
>
> JsonResponseHistoryServerArchivist would archivist data on FileSystem when
> TerminalState is not GLOBALLY. cause FileAlreadyExistsException like
> https://issues.apache.org/jira/browse/FLINK-24232
> exception as:
> INFO org.apache.flink.runtime.dispatcher.MiniDispatcher [] - Could not
> archive completed job
> ctdb_dw_push_ivideo_send_link_monitor_hi_prd(70f90a6c7bb2490d203f6c0d1818708d)
> to the history server. java.util.concurrent.CompletionException:
> java.lang.RuntimeException: org.apache.hadoop.fs.FileAlreadyExistsException:
> /flink/completed-jobs/70f90a6c7bb2490d203f6c0d1818708d for client
> xxx.xxx.xxx.xxx already exists at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2967)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2856)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2741)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:620)
> at
> org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:115)
> at
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:412)
> at
> org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) at
> org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2281) at
> org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2277) at
> java.security.AccessController.doPrivileged(Native Method) at
> javax.security.auth.Subject.doAs(Subject.java:422) at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2275) at
> java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273)
> ~[?:1.8.0_192] at
> java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280)
> [?:1.8.0_192] at
> java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1629)
> [?:1.8.0_192] at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> [?:1.8.0_192] at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> [?:1.8.0_192] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_192] Caused
> by: java.lang.RuntimeException:
> org.apache.hadoop.fs.FileAlreadyExistsException:
> /flink/completed-jobs/70f90a6c7bb2490d203f6c0d1818708d for client
> 10.194.100.17 already exists at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2967)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2856)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2741)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:620)
> at
> org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:115)
> at
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:412)
> at
> org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) at
> org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2281) at
> org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2277) at
> java.security.AccessController.doPrivileged(Native Method) at
> javax.security.auth.Subject.doAs(Subject.java:422) at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2275) at
> org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:316)
> ~[flink-dist_2.11-1.13.2.vivo-SNAPSHOT.jar:1.13.2.vivo-SNAPSHOT] at
> org.apache.flink.util.function.ThrowingRunnable.lambda$unchecked$0(ThrowingRunnable.java:51)
> ~[flink-dist_2.11-1.13.2.vivo-SNAPSHOT.jar:1.13.2.vivo-SNAPSHOT] at
> java.util.concurrent.CompletableFuture$AsyncRun.run(CompletableFuture.java:1626)
> ~[?:1.8.0_192] ... 3 more Caused by:
> org.apache.hadoop.fs.FileAlreadyExistsException:
> /flink/completed-jobs/70f90a6c7bb2490d203f6c0d1818708d for client
> xxx.xxx.xxx.xxx already exists at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInternal(FSNamesystem.java:2967)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2856)
> at
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2741)
> at
> org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:620)
> at
> org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.create(AuthorizationProviderProxyClientProtocol.java:115)
> at
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:412)
> at
> org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073) at
> org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2281) at
> org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2277) at
> java.security.AccessController.doPrivileged(Native Method) at
> javax.security.auth.Subject.doAs(Subject.java:422) at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2275)
--
This message was sent by Atlassian Jira
(v8.20.10#820010)