[ 
https://issues.apache.org/jira/browse/FLINK-28949?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Tiago Rodrigues reopened FLINK-28949:
-------------------------------------

Tried the same approach with a flink cluster on 1.15 but still got an error 
(new one) for the secondary job manager: 


{code:java}
❯❯ curl http://<secondary_ip>:8081/v1/jobs/<job_id>/savepoints/<savepoint_id>

{
  "errors": [
    "org.apache.flink.runtime.rest.handler.RestHandlerException: Internal 
server error while retrieving status of savepoint operation with 
triggerId=8689c1f6c6cbe9fb5fef5962f4c808c2 for job <job_id>.\n\tat 
org.apache.flink.runtime.rest.handler.job.savepoints.SavepointHandlers.createInternalServerError(SavepointHandlers.java:352)\n\tat
 
org.apache.flink.runtime.rest.handler.job.savepoints.SavepointHandlers.access$000(SavepointHandlers.java:115)\n\tat
 
org.apache.flink.runtime.rest.handler.job.savepoints.SavepointHandlers$SavepointStatusHandler.lambda$null$0(SavepointHandlers.java:311)\n\tat
 java.base/java.util.Optional.orElseGet(Optional.java:369)\n\tat 
org.apache.flink.runtime.rest.handler.job.savepoints.SavepointHandlers$SavepointStatusHandler.lambda$handleRequest$1(SavepointHandlers.java:309)\n\tat
 
java.base/java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:930)\n\tat
 
java.base/java.util.concurrent.CompletableFuture$UniHandle.tryFire(CompletableFuture.java:907)\n\tat
 
java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)\n\tat
 
java.base/java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:2088)\n\tat
 
org.apache.flink.runtime.rpc.akka.AkkaInvocationHandler.lambda$invokeRpc$1(AkkaInvocationHandler.java:252)\n\tat
 
java.base/java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:859)\n\tat
 
java.base/java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:837)\n\tat
 
java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)\n\tat
 
java.base/java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:2088)\n\tat
 
org.apache.flink.util.concurrent.FutureUtils.doForward(FutureUtils.java:1387)\n\tat
 
org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.lambda$null$1(ClassLoadingUtils.java:93)\n\tat
 
org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:68)\n\tat
 
org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.lambda$guardCompletionWithContextClassLoader$2(ClassLoadingUtils.java:92)\n\tat
 
java.base/java.util.concurrent.CompletableFuture.uniWhenComplete(CompletableFuture.java:859)\n\tat
 
java.base/java.util.concurrent.CompletableFuture$UniWhenComplete.tryFire(CompletableFuture.java:837)\n\tat
 
java.base/java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:506)\n\tat
 
java.base/java.util.concurrent.CompletableFuture.completeExceptionally(CompletableFuture.java:2088)\n\tat
 
org.apache.flink.runtime.concurrent.akka.AkkaFutureUtils$1.onComplete(AkkaFutureUtils.java:45)\n\tat
 akka.dispatch.OnComplete.internal(Future.scala:299)\n\tat 
akka.dispatch.OnComplete.internal(Future.scala:297)\n\tat 
akka.dispatch.japi$CallbackBridge.apply(Future.scala:224)\n\tat 
akka.dispatch.japi$CallbackBridge.apply(Future.scala:221)\n\tat 
scala.concurrent.impl.CallbackRunnable.run(Promise.scala:60)\n\tat 
org.apache.flink.runtime.concurrent.akka.AkkaFutureUtils$DirectExecutionContext.execute(AkkaFutureUtils.java:65)\n\tat
 
scala.concurrent.impl.CallbackRunnable.executeWithValue(Promise.scala:68)\n\tat 
scala.concurrent.impl.Promise$DefaultPromise.$anonfun$tryComplete$1(Promise.scala:284)\n\tat
 
scala.concurrent.impl.Promise$DefaultPromise.$anonfun$tryComplete$1$adapted(Promise.scala:284)\n\tat
 
scala.concurrent.impl.Promise$DefaultPromise.tryComplete(Promise.scala:284)\n\tat
 akka.pattern.PromiseActorRef.$bang(AskSupport.scala:621)\n\tat 
akka.remote.DefaultMessageDispatcher.dispatch(Endpoint.scala:118)\n\tat 
akka.remote.EndpointReader$$anonfun$receive$2.applyOrElse(Endpoint.scala:1144)\n\tat
 akka.actor.Actor.aroundReceive(Actor.scala:537)\n\tat 
akka.actor.Actor.aroundReceive$(Actor.scala:535)\n\tat 
akka.remote.EndpointActor.aroundReceive(Endpoint.scala:540)\n\tat 
akka.actor.ActorCell.receiveMessage(ActorCell.scala:580)\n\tat 
akka.actor.ActorCell.invoke(ActorCell.scala:548)\n\tat 
akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270)\n\tat 
akka.dispatch.Mailbox.run(Mailbox.scala:231)\n\tat 
akka.dispatch.Mailbox.exec(Mailbox.scala:243)\n\tat 
java.base/java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:290)\n\tat 
java.base/java.util.concurrent.ForkJoinPool$WorkQueue.topLevelExec(ForkJoinPool.java:1020)\n\tat
 java.base/java.util.concurrent.ForkJoinPool.scan(ForkJoinPool.java:1656)\n\tat 
java.base/java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1594)\n\tat
 
java.base/java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:183)\nCaused
 by: org.apache.flink.runtime.rpc.akka.exceptions.AkkaRpcException: Failed to 
serialize the result for RPC call : getTriggeredSavepointStatus.\n\tat 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.serializeRemoteResultAndVerifySize(AkkaRpcActor.java:405)\n\tat
 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.lambda$sendAsyncResponse$2(AkkaRpcActor.java:361)\n\tat
 
java.base/java.util.concurrent.CompletableFuture.uniHandle(CompletableFuture.java:930)\n\tat
 
java.base/java.util.concurrent.CompletableFuture.uniHandleStage(CompletableFuture.java:946)\n\tat
 
java.base/java.util.concurrent.CompletableFuture.handle(CompletableFuture.java:2266)\n\tat
 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.sendAsyncResponse(AkkaRpcActor.java:353)\n\tat
 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:320)\n\tat
 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:217)\n\tat
 
org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:78)\n\tat
 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:163)\n\tat
 akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24)\n\tat 
akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20)\n\tat 
scala.PartialFunction.applyOrElse(PartialFunction.scala:123)\n\tat 
scala.PartialFunction.applyOrElse$(PartialFunction.scala:122)\n\tat 
akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20)\n\tat 
scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)\n\tat 
scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172)\n\tat 
scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172)\n\tat 
akka.actor.Actor.aroundReceive(Actor.scala:537)\n\tat 
akka.actor.Actor.aroundReceive$(Actor.scala:535)\n\tat 
akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220)\n\t... 10 
more\nCaused by: java.io.NotSerializableException: 
org.apache.flink.runtime.rest.handler.async.OperationResult\n\tat 
java.base/java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1185)\n\tat
 
java.base/java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:349)\n\tat
 
org.apache.flink.util.InstantiationUtil.serializeObject(InstantiationUtil.java:632)\n\tat
 
org.apache.flink.runtime.rpc.akka.AkkaRpcSerializedValue.valueOf(AkkaRpcSerializedValue.java:66)\n\tat
 
org.apache.flink.runtime.rpc.akka.AkkaRpcActor.serializeRemoteResultAndVerifySize(AkkaRpcActor.java:388)\n\t...
 30 more\n"
  ]
}{code}

> Secondary job manager fails to retrieve savepoint
> -------------------------------------------------
>
>                 Key: FLINK-28949
>                 URL: https://issues.apache.org/jira/browse/FLINK-28949
>             Project: Flink
>          Issue Type: Bug
>          Components: Test Infrastructure
>    Affects Versions: 1.14.4
>            Reporter: Tiago Rodrigues
>            Priority: Minor
>
> After creating a savepoint, its information cannot be retrieved unless the 
> API call is made to the active job manager.
> Active job manager response:
> {code:java}
> >>> curl http://<active_ip>:8081/v1/jobs/<job_id>/savepoints/<savepoint_id> | 
> >>> jq 
> {
>   "status": {
>     "id": "COMPLETED"
>   },
>   "operation": {
>     "location": "file:/srv/flink/savepoints/<savepoint>"
>   }
> }{code}
> Secondary job manager response:
> {code:java}
> >>> curl 
> >>> http://<secondary_ip>:8081/v1/jobs/<job_id>/savepoints/<savepoint_id> | 
> >>> jq 
> {
>   "errors": [
>     "org.apache.flink.runtime.rest.NotFoundException: Operation not found 
> under key: 
> org.apache.flink.runtime.rest.handler.job.AsynchronousJobOperationKey@e25522a4\n\tat
>  
> org.apache.flink.runtime.rest.handler.async.AbstractAsynchronousOperationHandlers$StatusHandler.handleRequest(AbstractAsynchronousOperationHandlers.java:182)\n\tat
>  
> org.apache.flink.runtime.rest.handler.job.savepoints.SavepointHandlers$SavepointStatusHandler.handleRequest(SavepointHandlers.java:219)\n\tat
>  
> org.apache.flink.runtime.rest.handler.AbstractRestHandler.respondToRequest(AbstractRestHandler.java:83)\n\tat
>  
> org.apache.flink.runtime.rest.handler.AbstractHandler.respondAsLeader(AbstractHandler.java:195)\n\tat
>  
> org.apache.flink.runtime.rest.handler.LeaderRetrievalHandler.lambda$channelRead0$0(LeaderRetrievalHandler.java:83)\n\tat
>  java.base/java.util.Optional.ifPresent(Optional.java:183)\n\tat 
> org.apache.flink.util.OptionalConsumer.ifPresent(OptionalConsumer.java:45)\n\tat
>  
> org.apache.flink.runtime.rest.handler.LeaderRetrievalHandler.channelRead0(LeaderRetrievalHandler.java:80)\n\tat
>  
> org.apache.flink.runtime.rest.handler.LeaderRetrievalHandler.channelRead0(LeaderRetrievalHandler.java:49)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:99)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)\n\tat
>  
> org.apache.flink.runtime.rest.handler.router.RouterHandler.routed(RouterHandler.java:115)\n\tat
>  
> org.apache.flink.runtime.rest.handler.router.RouterHandler.channelRead0(RouterHandler.java:94)\n\tat
>  
> org.apache.flink.runtime.rest.handler.router.RouterHandler.channelRead0(RouterHandler.java:55)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:99)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)\n\tat
>  
> org.apache.flink.runtime.rest.FileUploadHandler.channelRead0(FileUploadHandler.java:238)\n\tat
>  
> org.apache.flink.runtime.rest.FileUploadHandler.channelRead0(FileUploadHandler.java:71)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:99)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.CombinedChannelDuplexHandler$DelegatingChannelHandlerContext.fireChannelRead(CombinedChannelDuplexHandler.java:436)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:324)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:296)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.CombinedChannelDuplexHandler.channelRead(CombinedChannelDuplexHandler.java:251)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:166)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:719)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:655)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:581)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:493)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:989)\n\tat
>  
> org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)\n\tat
>  java.base/java.lang.Thread.run(Thread.java:829)\nCaused by: 
> org.apache.flink.runtime.rest.handler.async.UnknownOperationKeyException: No 
> ongoing operation for 
> org.apache.flink.runtime.rest.handler.job.AsynchronousJobOperationKey@e25522a4\n\tat
>  
> org.apache.flink.runtime.rest.handler.async.CompletedOperationCache.get(CompletedOperationCache.java:158)\n\tat
>  
> org.apache.flink.runtime.rest.handler.async.AbstractAsynchronousOperationHandlers$StatusHandler.handleRequest(AbstractAsynchronousOperationHandlers.java:180)\n\t...
>  48 more\n"
>   ]
> }{code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to