[
https://issues.apache.org/jira/browse/HDDS-10747?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Saketa Chalamchala resolved HDDS-10747.
---------------------------------------
Resolution: Duplicate
> Key get for EC fails when DNs are stopped and SCM is restarted
> --------------------------------------------------------------
>
> Key: HDDS-10747
> URL: https://issues.apache.org/jira/browse/HDDS-10747
> Project: Apache Ozone
> Issue Type: Bug
> Components: EC
> Reporter: Saketa Chalamchala
> Assignee: Saketa Chalamchala
> Priority: Major
>
> {code:java}
> ozone sh volume create testvol
> ozone sh bucket create testvol/testbucket -t EC -r rs-3-2-1024k
> ozone sh key put /testvol/testbucket/key1 /tmp/file1
> ozone sh key get /testvol/testbucket/key1 tmp1 ### This is successful
> ### Stop 2 DNs
> ### Restart SCM
> ozone sh key get /testvol/testbucket/key1 tmp2 ## Fails with below error
> ozone sh key get /testvol/testbucket/key1 tmp2
> 24/04/24 00:24:50 INFO retry.RetryInvocationHandler:
> com.google.protobuf.ServiceException:
> org.apache.hadoop.ipc.RemoteException(java.lang.NullPointerException):
> java.lang.NullPointerException
> at java.util.LinkedList.addAll(LinkedList.java:408)
> at java.util.LinkedList.addAll(LinkedList.java:387)
> at java.util.LinkedList.<init>(LinkedList.java:119)
> at
> org.apache.hadoop.hdds.scm.pipeline.Pipeline$Builder.setNodesInOrder(Pipeline.java:606)
> at
> org.apache.hadoop.hdds.scm.pipeline.Pipeline.copyWithNodesInOrder(Pipeline.java:423)
> at
> org.apache.hadoop.ozone.om.KeyManagerImpl.sortDatanodes(KeyManagerImpl.java:1791)
> at
> org.apache.hadoop.ozone.om.KeyManagerImpl.getKeyInfo(KeyManagerImpl.java:1975)
> at
> org.apache.hadoop.ozone.om.OmMetadataReader.getKeyInfo(OmMetadataReader.java:221)
> at
> org.apache.hadoop.ozone.om.OzoneManager.getKeyInfo(OzoneManager.java:2869)
> at
> org.apache.hadoop.ozone.protocolPB.OzoneManagerRequestHandler.getKeyInfo(OzoneManagerRequestHandler.java:616)
> at
> org.apache.hadoop.ozone.protocolPB.OzoneManagerRequestHandler.handleReadRequest(OzoneManagerRequestHandler.java:311)
> at
> org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.submitReadRequestToOM(OzoneManagerProtocolServerSideTranslatorPB.java:220)
> at
> org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.processRequest(OzoneManagerProtocolServerSideTranslatorPB.java:174)
> at
> org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:87)
> at
> org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.submitRequest(OzoneManagerProtocolServerSideTranslatorPB.java:143)
> at
> org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$OzoneManagerService$2.callBlockingMethod(OzoneManagerProtocolProtos.java)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:994)
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:922)
> at java.security.AccessController.doPrivileged(Native Method)
> at javax.security.auth.Subject.doAs(Subject.java:422)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2899){code}
>
> *Error from OM and SCM:*
> {code:java}
> 12:14:49.227 AM WARN [IPC Server handler 7 on
> 9863]-org.apache.hadoop.ipc.Server: IPC Server handler 7 on 9863, call
> Call#107 Retry#0
> org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol.send from
> 10.140.125.15:45717java.lang.IndexOutOfBoundsException: Index: 3, Size: 3
> at java.util.ArrayList.rangeCheck(ArrayList.java:657) at
> java.util.ArrayList.get(ArrayList.java:433) at
> org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl.sortByDistanceCost(NetworkTopologyImpl.java:767)
> at
> org.apache.hadoop.hdds.scm.server.SCMBlockProtocolServer.sortDatanodes(SCMBlockProtocolServer.java:356)
> at
> org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocolServerSideTranslatorPB.sortDatanodes(ScmBlockLocationProtocolServerSideTranslatorPB.java:267)
> at
> org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocolServerSideTranslatorPB.processMessage(ScmBlockLocationProtocolServerSideTranslatorPB.java:158)
> at
> org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:87)
> at
> org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocolServerSideTranslatorPB.send(ScmBlockLocationProtocolServerSideTranslatorPB.java:113)
> at
> org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos$ScmBlockLocationProtocolService$2.callBlockingMethod(ScmBlockLocationProtocolProtos.java:14238)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070) at
> org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:994) at
> org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:922) at
> java.security.AccessController.doPrivileged(Native Method) at
> javax.security.auth.Subject.doAs(Subject.java:422) at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2899) {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]