Nilotpal Nandi created HDDS-1057: ------------------------------------ Summary: get key operation fails when client cannot communicate with 2 of the datanodes in 3 node cluster Key: HDDS-1057 URL: https://issues.apache.org/jira/browse/HDDS-1057 Project: Hadoop Distributed Data Store Issue Type: Bug Components: Ozone Client Reporter: Nilotpal Nandi
steps taken : ------------------ # created 3 node docker cluster. # wrote a key # created partition such that 2 out of 3 datanodes cannot communicate with any other node. # Third datanode can communicate with all other nodes. # Tried to read the key Exception seen : ------------------------ {noformat} Failed to execute command cmdType: GetBlock E traceID: "9b3ebd93-e598-4ca2-a6f4-2389f2d35f63" E containerID: 22 E datanodeUuid: "15345663-15c9-4fe3-9b8f-a46123ba8a6e" E getBlock { E blockID { E containerID: 22 E localID: 101545011736215553 E blockCommitSequenceId: 5 E } E } E on datanode 15345663-15c9-4fe3-9b8f-a46123ba8a6e E java.util.concurrent.ExecutionException: org.apache.ratis.thirdparty.io.grpc.StatusRuntimeException: UNAVAILABLE: io exception E at java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) E at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1895) E at org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommandWithRetry(XceiverClientGrpc.java:220) E at org.apache.hadoop.hdds.scm.XceiverClientGrpc.sendCommand(XceiverClientGrpc.java:201) E at org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.getBlock(ContainerProtocolCalls.java:118) E at org.apache.hadoop.ozone.client.io.KeyInputStream.getFromOmKeyInfo(KeyInputStream.java:305) E at org.apache.hadoop.ozone.client.rpc.RpcClient.getKey(RpcClient.java:608) E at org.apache.hadoop.ozone.client.OzoneBucket.readKey(OzoneBucket.java:284) E at org.apache.hadoop.ozone.web.ozShell.keys.GetKeyHandler.call(GetKeyHandler.java:95) E at org.apache.hadoop.ozone.web.ozShell.keys.GetKeyHandler.call(GetKeyHandler.java:48) E at picocli.CommandLine.execute(CommandLine.java:919) E at picocli.CommandLine.access$700(CommandLine.java:104) E at picocli.CommandLine$RunLast.handle(CommandLine.java:1083) E at picocli.CommandLine$RunLast.handle(CommandLine.java:1051) E at picocli.CommandLine$AbstractParseResultHandler.handleParseResult(CommandLine.java:959) E at picocli.CommandLine.parseWithHandlers(CommandLine.java:1242) E at picocli.CommandLine.parseWithHandler(CommandLine.java:1181) E at org.apache.hadoop.hdds.cli.GenericCli.execute(GenericCli.java:61) E at org.apache.hadoop.hdds.cli.GenericCli.run(GenericCli.java:52) E at org.apache.hadoop.ozone.web.ozShell.Shell.main(Shell.java:83) E Caused by: org.apache.ratis.thirdparty.io.grpc.StatusRuntimeException: UNAVAILABLE: io exception E at org.apache.ratis.thirdparty.io.grpc.Status.asRuntimeException(Status.java:526) E at org.apache.ratis.thirdparty.io.grpc.stub.ClientCalls$StreamObserverToCallListenerAdapter.onClose(ClientCalls.java:434) E at org.apache.ratis.thirdparty.io.grpc.PartialForwardingClientCallListener.onClose(PartialForwardingClientCallListener.java:39) E at org.apache.ratis.thirdparty.io.grpc.ForwardingClientCallListener.onClose(ForwardingClientCallListener.java:23) E at org.apache.ratis.thirdparty.io.grpc.ForwardingClientCallListener$SimpleForwardingClientCallListener.onClose(ForwardingClientCallListener.java:40) E at org.apache.ratis.thirdparty.io.grpc.internal.CensusStatsModule$StatsClientInterceptor$1$1.onClose(CensusStatsModule.java:678) E at org.apache.ratis.thirdparty.io.grpc.PartialForwardingClientCallListener.onClose(PartialForwardingClientCallListener.java:39) E at org.apache.ratis.thirdparty.io.grpc.ForwardingClientCallListener.onClose(ForwardingClientCallListener.java:23) E at org.apache.ratis.thirdparty.io.grpc.ForwardingClientCallListener$SimpleForwardingClientCallListener.onClose(ForwardingClientCallListener.java:40) E at org.apache.ratis.thirdparty.io.grpc.internal.CensusTracingModule$TracingClientInterceptor$1$1.onClose(CensusTracingModule.java:397) E at org.apache.ratis.thirdparty.io.grpc.internal.ClientCallImpl.closeObserver(ClientCallImpl.java:459) E at org.apache.ratis.thirdparty.io.grpc.internal.ClientCallImpl.access$300(ClientCallImpl.java:63) E at org.apache.ratis.thirdparty.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl.close(ClientCallImpl.java:546) E at org.apache.ratis.thirdparty.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl.access$600(ClientCallImpl.java:467) E at org.apache.ratis.thirdparty.io.grpc.internal.ClientCallImpl$ClientStreamListenerImpl$1StreamClosed.runInContext(ClientCallImpl.java:584) E at org.apache.ratis.thirdparty.io.grpc.internal.ContextRunnable.run(ContextRunnable.java:37) E at org.apache.ratis.thirdparty.io.grpc.internal.SerializingExecutor.run(SerializingExecutor.java:123) E at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) E at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) E at java.lang.Thread.run(Thread.java:748) E Caused by: org.apache.ratis.thirdparty.io.netty.channel.ConnectTimeoutException: connection timed out: /172.20.0.7:9859 E at org.apache.ratis.thirdparty.io.netty.channel.nio.AbstractNioChannel$AbstractNioUnsafe$1.run(AbstractNioChannel.java:267) E at org.apache.ratis.thirdparty.io.netty.util.concurrent.PromiseTask$RunnableAdapter.call(PromiseTask.java:38) E at org.apache.ratis.thirdparty.io.netty.util.concurrent.ScheduledFutureTask.run(ScheduledFutureTask.java:127) E at org.apache.ratis.thirdparty.io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:163) E at org.apache.ratis.thirdparty.io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:404) E at org.apache.ratis.thirdparty.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:462) E at org.apache.ratis.thirdparty.io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:897) E at org.apache.ratis.thirdparty.io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) E ... 1 more E Failed to execute command cmdType: GetBlock] {noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: hdfs-dev-unsubscr...@hadoop.apache.org For additional commands, e-mail: hdfs-dev-h...@hadoop.apache.org