Saketa Chalamchala created HDDS-10747:
-----------------------------------------

             Summary: Key get for EC fails when DNs are stopped and SCM is 
restarted
                 Key: HDDS-10747
                 URL: https://issues.apache.org/jira/browse/HDDS-10747
             Project: Apache Ozone
          Issue Type: Bug
          Components: EC
    Affects Versions: 1.4.0
            Reporter: Saketa Chalamchala
            Assignee: Saketa Chalamchala


{code:java}
ozone sh volume create testvol
ozone sh bucket create testvol/testbucket -t EC -r rs-3-2-1024k
ozone sh key put /testvol/testbucket/key1 /tmp/file1
ozone sh key get /testvol/testbucket/key1 tmp1 ### This is successful

### Stop 2 DNs
### Restart SCM

ozone sh key get /testvol/testbucket/key1 tmp2 ## Fails with below error
ozone sh key get /testvol/testbucket/key1 tmp2
24/04/24 00:24:50 INFO retry.RetryInvocationHandler: 
com.google.protobuf.ServiceException: 
org.apache.hadoop.ipc.RemoteException(java.lang.NullPointerException): 
java.lang.NullPointerException
    at java.util.LinkedList.addAll(LinkedList.java:408)
    at java.util.LinkedList.addAll(LinkedList.java:387)
    at java.util.LinkedList.<init>(LinkedList.java:119)
    at 
org.apache.hadoop.hdds.scm.pipeline.Pipeline$Builder.setNodesInOrder(Pipeline.java:606)
    at 
org.apache.hadoop.hdds.scm.pipeline.Pipeline.copyWithNodesInOrder(Pipeline.java:423)
    at 
org.apache.hadoop.ozone.om.KeyManagerImpl.sortDatanodes(KeyManagerImpl.java:1791)
    at 
org.apache.hadoop.ozone.om.KeyManagerImpl.getKeyInfo(KeyManagerImpl.java:1975)
    at 
org.apache.hadoop.ozone.om.OmMetadataReader.getKeyInfo(OmMetadataReader.java:221)
    at 
org.apache.hadoop.ozone.om.OzoneManager.getKeyInfo(OzoneManager.java:2869)
    at 
org.apache.hadoop.ozone.protocolPB.OzoneManagerRequestHandler.getKeyInfo(OzoneManagerRequestHandler.java:616)
    at 
org.apache.hadoop.ozone.protocolPB.OzoneManagerRequestHandler.handleReadRequest(OzoneManagerRequestHandler.java:311)
    at 
org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.submitReadRequestToOM(OzoneManagerProtocolServerSideTranslatorPB.java:220)
    at 
org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.processRequest(OzoneManagerProtocolServerSideTranslatorPB.java:174)
    at 
org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:87)
    at 
org.apache.hadoop.ozone.protocolPB.OzoneManagerProtocolServerSideTranslatorPB.submitRequest(OzoneManagerProtocolServerSideTranslatorPB.java:143)
    at 
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos$OzoneManagerService$2.callBlockingMethod(OzoneManagerProtocolProtos.java)
    at 
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
    at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:994)
    at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:922)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:422)
    at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2899){code}
 

*Error from OM and SCM:* 
{code:java}
12:14:49.227 AM WARN    [IPC    Server handler 7 on 
9863]-org.apache.hadoop.ipc.Server: IPC Server handler 7 on 9863, call Call#107 
Retry#0 org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocol.send from 
10.140.125.15:45717java.lang.IndexOutOfBoundsException: Index: 3, Size: 3    at 
java.util.ArrayList.rangeCheck(ArrayList.java:657)   at 
java.util.ArrayList.get(ArrayList.java:433)  at 
org.apache.hadoop.hdds.scm.net.NetworkTopologyImpl.sortByDistanceCost(NetworkTopologyImpl.java:767)
  at 
org.apache.hadoop.hdds.scm.server.SCMBlockProtocolServer.sortDatanodes(SCMBlockProtocolServer.java:356)
      at 
org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocolServerSideTranslatorPB.sortDatanodes(ScmBlockLocationProtocolServerSideTranslatorPB.java:267)
    at 
org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocolServerSideTranslatorPB.processMessage(ScmBlockLocationProtocolServerSideTranslatorPB.java:158)
   at 
org.apache.hadoop.hdds.server.OzoneProtocolMessageDispatcher.processRequest(OzoneProtocolMessageDispatcher.java:87)
  at 
org.apache.hadoop.hdds.scm.protocol.ScmBlockLocationProtocolServerSideTranslatorPB.send(ScmBlockLocationProtocolServerSideTranslatorPB.java:113)
     at 
org.apache.hadoop.hdds.protocol.proto.ScmBlockLocationProtocolProtos$ScmBlockLocationProtocolService$2.callBlockingMethod(ScmBlockLocationProtocolProtos.java:14238)
 at 
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
   at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070) at 
org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:994)    at 
org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:922)    at 
java.security.AccessController.doPrivileged(Native Method)   at 
javax.security.auth.Subject.doAs(Subject.java:422)   at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1899)
 at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2899) {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to