[
https://issues.apache.org/jira/browse/FLINK-28949?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Tiago Rodrigues reopened FLINK-28949:
-------------------------------------
Tried the same approach with a flink cluster on 1.15 but still got an error
(new one) for the secondary job manager:
{code:java}
❯❯ curl http://<secondary_ip>:8081/v1/jobs/<job_id>/savepoints/<savepoint_id>
{
"errors": [
"org.apache.flink.runtime.rest.handler.RestHandlerException: Internal
server error while retrieving status of savepoint operation with
triggerId=8689c1f6c6cbe9fb5fef5962f4c808c2 for job <job_id>.\n\tat
org.apache.flink.runtime.rest.handler.job.savepoints.SavepointHandlers.createInternalServerError(SavepointHandlers.java:352)\n\tat
org.apache.flink.runtime.rest.handler.job.savepoints.SavepointHandlers.access$000(SavepointHandlers.java:115)\n\tat
org.apache.flink.runtime.rest.handler.job.savepoints.SavepointHandlers$SavepointStatusHandler.lambda$null$0(SavepointHandlers.java:311)\n\tat
java.base/java.util.Optional.orElseGet(Optional.java:369)\n\tat
org.apache.flink.runtime.rest.handler.job.savepoints.SavepointHandlers$SavepointStatusHandler.lambda$handleRequest$1(SavepointHandlers.java:309)\n\tat
java.base/java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:930)\n\tat
java.base/java.util.concurrent.CompletableFuture$UniHandle.tryFire(CompletableFuture.java:907)\n\tat
java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)\n\tat
java.base/java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:2088)\n\tat
org.apache.flink.runtime.rpc.akka.AkkaInvocationHandler.lambda$invokeRpc$1(AkkaInvocationHandler.java:252)\n\tat
java.base/java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:859)\n\tat
java.base/java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:837)\n\tat
java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)\n\tat
java.base/java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:2088)\n\tat
org.apache.flink.util.concurrent.FutureUtils.doForward(FutureUtils.java:1387)\n\tat
org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.lambda$null$1(ClassLoadingUtils.java:93)\n\tat
org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68)\n\tat
org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.lambda$guardCompletionWithContextClassLoader$2(ClassLoadingUtils.java:92)\n\tat
java.base/java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:859)\n\tat
java.base/java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:837)\n\tat
java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)\n\tat
java.base/java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:2088)\n\tat
org.apache.flink.runtime.concurrent.akka.AkkaFutureUtils$1.onComplete(AkkaFutureUtils.java:45)\n\tat
akka.dispatch.OnComplete.internal(Future.scala:299)\n\tat
akka.dispatch.OnComplete.internal(Future.scala:297)\n\tat
akka.dispatch.japi$CallbackBridge.apply(Future.scala:224)\n\tat
akka.dispatch.japi$CallbackBridge.apply(Future.scala:221)\n\tat
scala.concurrent.impl.CallbackRunnable.run(Promise.scala:60)\n\tat
org.apache.flink.runtime.concurrent.akka.AkkaFutureUtils$DirectExecutionContext.execute(AkkaFutureUtils.java:65)\n\tat
scala.concurrent.impl.CallbackRunnable.executeWithValue(Promise.scala:68)\n\tat
scala.concurrent.impl.Promise$DefaultPromise.$anonfun$tryComplete$1(Promise.scala:284)\n\tat
scala.concurrent.impl.Promise$DefaultPromise.$anonfun$tryComplete$1$adapted(Promise.scala:284)\n\tat
scala.concurrent.impl.Promise$DefaultPromise.tryComplete(Promise.scala:284)\n\tat
akka.pattern.PromiseActorRef.$bang(AskSupport.scala:621)\n\tat
akka.remote.DefaultMessageDispatcher.dispatch(Endpoint.scala:118)\n\tat
akka.remote.EndpointReader$$anonfun$receive$2.applyOrElse(Endpoint.scala:1144)\n\tat
akka.actor.Actor.aroundReceive(Actor.scala:537)\n\tat
akka.actor.Actor.aroundReceive$(Actor.scala:535)\n\tat
akka.remote.EndpointActor.aroundReceive(Endpoint.scala:540)\n\tat
akka.actor.ActorCell.receiveMessage(ActorCell.scala:580)\n\tat
akka.actor.ActorCell.invoke(ActorCell.scala:548)\n\tat
akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270)\n\tat
akka.dispatch.Mailbox.run(Mailbox.scala:231)\n\tat
akka.dispatch.Mailbox.exec(Mailbox.scala:243)\n\tat
java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:290)\n\tat
java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1020)\n\tat
java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1656)\n\tat
java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1594)\n\tat
java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:183)\nCaused
by: org.apache.flink.runtime.rpc.akka.exceptions.AkkaRpcException: Failed to
serialize the result for RPC call : getTriggeredSavepointStatus.\n\tat
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.serializeRemoteResultAndVerifySize(AkkaRpcActor.java:405)\n\tat
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.lambda$sendAsyncResponse$2(AkkaRpcActor.java:361)\n\tat
java.base/java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:930)\n\tat
java.base/java.util.concurrent.CompletableFuture.uniHandleStage(CompletableFuture.java:946)\n\tat
java.base/java.util.concurrent.CompletableFuture.handle(CompletableFuture.java:2266)\n\tat
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.sendAsyncResponse(AkkaRpcActor.java:353)\n\tat
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:320)\n\tat
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:217)\n\tat
org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:78)\n\tat
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:163)\n\tat
akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24)\n\tat
akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20)\n\tat
scala.PartialFunction.applyOrElse(PartialFunction.scala:123)\n\tat
scala.PartialFunction.applyOrElse$(PartialFunction.scala:122)\n\tat
akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20)\n\tat
scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)\n\tat
scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172)\n\tat
scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172)\n\tat
akka.actor.Actor.aroundReceive(Actor.scala:537)\n\tat
akka.actor.Actor.aroundReceive$(Actor.scala:535)\n\tat
akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220)\n\t... 10
more\nCaused by: java.io.NotSerializableException:
org.apache.flink.runtime.rest.handler.async.OperationResult\n\tat
java.base/java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1185)\n\tat
java.base/java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:349)\n\tat
org.apache.flink.util.InstantiationUtil.serializeObject(InstantiationUtil.java:632)\n\tat
org.apache.flink.runtime.rpc.akka.AkkaRpcSerializedValue.valueOf(AkkaRpcSerializedValue.java:66)\n\tat
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.serializeRemoteResultAndVerifySize(AkkaRpcActor.java:388)\n\t...
30 more\n"
]
}{code}
> Secondary job manager fails to retrieve savepoint
> -------------------------------------------------
>
> Key: FLINK-28949
> URL: https://issues.apache.org/jira/browse/FLINK-28949
> Project: Flink
> Issue Type: Bug
> Components: Test Infrastructure
> Affects Versions: 1.14.4
> Reporter: Tiago Rodrigues
> Priority: Minor
>
> After creating a savepoint, its information cannot be retrieved unless the
> API call is made to the active job manager.
> Active job manager response:
> {code:java}
> >>> curl http://<active_ip>:8081/v1/jobs/<job_id>/savepoints/<savepoint_id> |
> >>> jq
> {
> "status": {
> "id": "COMPLETED"
> },
> "operation": {
> "location": "file:/srv/flink/savepoints/<savepoint>"
> }
> }{code}
> Secondary job manager response:
> {code:java}
> >>> curl
> >>> http://<secondary_ip>:8081/v1/jobs/<job_id>/savepoints/<savepoint_id> |
> >>> jq
> {
> "errors": [
> "org.apache.flink.runtime.rest.NotFoundException: Operation not found
> under key:
> org.apache.flink.runtime.rest.handler.job.AsynchronousJobOperationKey@e25522a4\n\tat
>
> org.apache.flink.runtime.rest.handler.async.AbstractAsynchronousOperationHandlers$StatusHandler.handleRequest(AbstractAsynchronousOperationHandlers.java:182)\n\tat
>
> org.apache.flink.runtime.rest.handler.job.savepoints.SavepointHandlers$SavepointStatusHandler.handleRequest(SavepointHandlers.java:219)\n\tat
>
> org.apache.flink.runtime.rest.handler.AbstractRestHandler.respondToRequest(AbstractRestHandler.java:83)\n\tat
>
> org.apache.flink.runtime.rest.handler.AbstractHandler.respondAsLeader(AbstractHandler.java:195)\n\tat
>
> org.apache.flink.runtime.rest.handler.LeaderRetrievalHandler.lambda$channelRead0$0(LeaderRetrievalHandler.java:83)\n\tat
> java.base/java.util.Optional.ifPresent(Optional.java:183)\n\tat
> org.apache.flink.util.OptionalConsumer.ifPresent(OptionalConsumer.java:45)\n\tat
>
> org.apache.flink.runtime.rest.handler.LeaderRetrievalHandler.channelRead0(LeaderRetrievalHandler.java:80)\n\tat
>
> org.apache.flink.runtime.rest.handler.LeaderRetrievalHandler.channelRead0(LeaderRetrievalHandler.java:49)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:99)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)\n\tat
>
> org.apache.flink.runtime.rest.handler.router.RouterHandler.routed(RouterHandler.java:115)\n\tat
>
> org.apache.flink.runtime.rest.handler.router.RouterHandler.channelRead0(RouterHandler.java:94)\n\tat
>
> org.apache.flink.runtime.rest.handler.router.RouterHandler.channelRead0(RouterHandler.java:55)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:99)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)\n\tat
>
> org.apache.flink.runtime.rest.FileUploadHandler.channelRead0(FileUploadHandler.java:238)\n\tat
>
> org.apache.flink.runtime.rest.FileUploadHandler.channelRead0(FileUploadHandler.java:71)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:99)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.CombinedChannelDuplexHandler$DelegatingChannelHandlerContext.fireChannelRead(CombinedChannelDuplexHandler.java:436)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:324)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:296)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.CombinedChannelDuplexHandler.channelRead(CombinedChannelDuplexHandler.java:251)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:166)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:719)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:655)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:581)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n\tat
>
> org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n\tat
> java.base/java.lang.Thread.run(Thread.java:829)\nCaused by:
> org.apache.flink.runtime.rest.handler.async.UnknownOperationKeyException: No
> ongoing operation for
> org.apache.flink.runtime.rest.handler.job.AsynchronousJobOperationKey@e25522a4\n\tat
>
> org.apache.flink.runtime.rest.handler.async.CompletedOperationCache.get(CompletedOperationCache.java:158)\n\tat
>
> org.apache.flink.runtime.rest.handler.async.AbstractAsynchronousOperationHandlers$StatusHandler.handleRequest(AbstractAsynchronousOperationHandlers.java:180)\n\t...
> 48 more\n"
> ]
> }{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)