Saketa Chalamchala created HDDS-10747:
-----------------------------------------
Summary: Key get for EC fails when DNs are stopped and SCM is
restarted
Key: HDDS-10747
URL: https://issues.apache.org/jira/browse/HDDS-10747
Project: Apache Ozone
Issue Type: Bug
Components: EC
Affects Versions: 1.4.0
Reporter: Saketa Chalamchala
Assignee: Saketa Chalamchala
{code:java}
ozone sh volume create testvol
ozone sh bucket create testvol/testbucket -t EC -r rs-3-2-1024k
ozone sh key put /testvol/testbucket/key1 /tmp/file1
ozone sh key get /testvol/testbucket/key1 tmp1 ### This is successful
### Stop 2 DNs
### Restart SCM
ozone sh key get /testvol/testbucket/key1 tmp2 ## Fails with below error
ozone sh key get /testvol/testbucket/key1 tmp2
24/04/24 00:24:50 INFO retry.RetryInvocationHandler:
com.google.protobuf.ServiceException:
org.apache.hadoop.ipc.RemoteException(java.lang.NullPointerException):
java.lang.NullPointerException
at java.util.LinkedList.addAll(LinkedList.java:408)
at java.util.LinkedList.addAll(LinkedList.java:387)
at java.util.LinkedList.<init>(LinkedList.java:119)
at
org.apache.hadoop.hdds.scm.pipeline.Pipeline$Builder.setNodesInOrder(Pipeline.java:606)
at
org.apache.hadoop.hdds.scm.pipeline.Pipeline.copyWithNodesInOrder(Pipeline.java:423)
at
org.apache.hadoop.ozone.om.KeyManagerImpl.sortDatanodes(KeyManagerImpl.java:1791)
at
org.apache.hadoop.ozone.om.KeyManagerImpl.getKeyInfo(KeyManagerImpl.java:1975)
at
org.apache.hadoop.ozone.om.OmMetadataReader.getKeyInfo(OmMetadataReader.java:221)
at
org.apache.hadoop.ozone.om.OzoneManager.getKeyInfo(OzoneManager.java:2869)
at
org.apache.hadoop.ozone.protocolPB.OzoneManagerRequestHandler.getKeyInfo(OzoneManagerRequestHandler.java:616)
at
org.apache.hadoop.ozone.protocolPB.OzoneManagerRequestHandler.handleReadRequest(OzoneManagerRequestHandler.java:311)
at
org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.submitReadRequestToOM(OzoneManagerProtocolServerSideTranslatorPB.java:220)
at
org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.processRequest(OzoneManagerProtocolServerSideTranslatorPB.java:174)
at
org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:87)
at
org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.submitRequest(OzoneManagerProtocolServerSideTranslatorPB.java:143)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$OzoneManagerService$2.callBlockingMethod(OzoneManagerProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:994)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:922)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2899){code}
*Error from OM and SCM:*
{code:java}
12:14:49.227 AM WARN [IPC Server handler 7 on
9863]-org.apache.hadoop.ipc.Server: IPC Server handler 7 on 9863, call Call#107
Retry#0 org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol.send from
10.140.125.15:45717java.lang.IndexOutOfBoundsException: Index: 3, Size: 3 at
java.util.ArrayList.rangeCheck(ArrayList.java:657) at
java.util.ArrayList.get(ArrayList.java:433) at
org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl.sortByDistanceCost(NetworkTopologyImpl.java:767)
at
org.apache.hadoop.hdds.scm.server.SCMBlockProtocolServer.sortDatanodes(SCMBlockProtocolServer.java:356)
at
org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocolServerSideTranslatorPB.sortDatanodes(ScmBlockLocationProtocolServerSideTranslatorPB.java:267)
at
org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocolServerSideTranslatorPB.processMessage(ScmBlockLocationProtocolServerSideTranslatorPB.java:158)
at
org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:87)
at
org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocolServerSideTranslatorPB.send(ScmBlockLocationProtocolServerSideTranslatorPB.java:113)
at
org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos$ScmBlockLocationProtocolService$2.callBlockingMethod(ScmBlockLocationProtocolProtos.java:14238)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070) at
org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:994) at
org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:922) at
java.security.AccessController.doPrivileged(Native Method) at
javax.security.auth.Subject.doAs(Subject.java:422) at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2899) {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]