[
https://issues.apache.org/jira/browse/HDDS-14649?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Siyao Meng updated HDDS-14649:
------------------------------
Description:
Symptom: Observed on a cluster (custom branch):
{code}
$ sudo -u om ozone admin om snapshot defrag --service-id=ozone1771242317
--node-id=om1546336036
Triggering Snapshot Defrag Service ...
com.google.protobuf.ServiceException:
org.apache.hadoop.ipc.RemoteException(java.lang.NullPointerException):
java.lang.NullPointerException
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos$TriggerSnapshotDefragResponse$Builder.setErrorMsg(OzoneManagerAdminProtocolProtos.java:5369)
at
org.apache.hadoop.ozone.protocolPB.OMAdminProtocolServerSideImpl.triggerSnapshotDefrag(OMAdminProtocolServerSideImpl.java:133)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos$OzoneManagerAdminService$2.callBlockingMethod(OzoneManagerAdminProtocolProtos.java:5549)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:995)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:923)
at
java.base/java.security.AccessController.doPrivileged(AccessController.java:712)
at java.base/javax.security.auth.Subject.doAs(Subject.java:439)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1910)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2905)
, while invoking $Proxy20.triggerSnapshotDefrag over null. Retrying after
sleeping for 1000ms.
com.google.protobuf.ServiceException:
org.apache.hadoop.ipc.RemoteException(java.lang.NullPointerException):
java.lang.NullPointerException
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos$TriggerSnapshotDefragResponse$Builder.setErrorMsg(OzoneManagerAdminProtocolProtos.java:5369)
at
org.apache.hadoop.ozone.protocolPB.OMAdminProtocolServerSideImpl.triggerSnapshotDefrag(OMAdminProtocolServerSideImpl.java:133)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos$OzoneManagerAdminService$2.callBlockingMethod(OzoneManagerAdminProtocolProtos.java:5549)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:995)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:923)
at
java.base/java.security.AccessController.doPrivileged(AccessController.java:712)
at java.base/javax.security.auth.Subject.doAs(Subject.java:439)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1910)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2905)
, while invoking $Proxy20.triggerSnapshotDefrag over null. Retrying after
sleeping for 1000ms.
{code}
Problem is, exception doesn't always carry a detailMessage, when it doesn't, it
becomes null:
{code}
/**
* Returns the detail message string of this throwable.
*
* @return the detail message string of this {@code Throwable} instance
* (which may be {@code null}).
*/
public String getMessage() {
return detailMessage;
}
{code}
Solution: Fall back to printing stack trace when ex.getMessage() is null.
was:
Symptom:
Observed on a cluster (with a custom branch):
{code}
$ sudo -u om ozone admin om snapshot defrag --service-id=ozone1771242317
--node-id=om1546336036
Triggering Snapshot Defrag Service ...
com.google.protobuf.ServiceException:
org.apache.hadoop.ipc.RemoteException(java.lang.NullPointerException):
java.lang.NullPointerException
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos$TriggerSnapshotDefragResponse$Builder.setErrorMsg(OzoneManagerAdminProtocolProtos.java:5369)
at
org.apache.hadoop.ozone.protocolPB.OMAdminProtocolServerSideImpl.triggerSnapshotDefrag(OMAdminProtocolServerSideImpl.java:133)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos$OzoneManagerAdminService$2.callBlockingMethod(OzoneManagerAdminProtocolProtos.java:5549)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:995)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:923)
at
java.base/java.security.AccessController.doPrivileged(AccessController.java:712)
at java.base/javax.security.auth.Subject.doAs(Subject.java:439)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1910)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2905)
, while invoking $Proxy20.triggerSnapshotDefrag over null. Retrying after
sleeping for 1000ms.
com.google.protobuf.ServiceException:
org.apache.hadoop.ipc.RemoteException(java.lang.NullPointerException):
java.lang.NullPointerException
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos$TriggerSnapshotDefragResponse$Builder.setErrorMsg(OzoneManagerAdminProtocolProtos.java:5369)
at
org.apache.hadoop.ozone.protocolPB.OMAdminProtocolServerSideImpl.triggerSnapshotDefrag(OMAdminProtocolServerSideImpl.java:133)
at
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos$OzoneManagerAdminService$2.callBlockingMethod(OzoneManagerAdminProtocolProtos.java:5549)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:995)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:923)
at
java.base/java.security.AccessController.doPrivileged(AccessController.java:712)
at java.base/javax.security.auth.Subject.doAs(Subject.java:439)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1910)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2905)
, while invoking $Proxy20.triggerSnapshotDefrag over null. Retrying after
sleeping for 1000ms.
{code}
Problem is, exception doesn't always carry a detailMessage, when it doesn't, it
becomes null:
{code}
/**
* Returns the detail message string of this throwable.
*
* @return the detail message string of this {@code Throwable} instance
* (which may be {@code null}).
*/
public String getMessage() {
return detailMessage;
}
{code}
Solution: Fall back to printing stack trace when ex.getMessage() is null.
> setErrorMsg() throws NPE when ex.getMessage() is null, hiding the real cause
> ----------------------------------------------------------------------------
>
> Key: HDDS-14649
> URL: https://issues.apache.org/jira/browse/HDDS-14649
> Project: Apache Ozone
> Issue Type: Bug
> Reporter: Siyao Meng
> Assignee: Siyao Meng
> Priority: Major
>
> Symptom: Observed on a cluster (custom branch):
> {code}
> $ sudo -u om ozone admin om snapshot defrag --service-id=ozone1771242317
> --node-id=om1546336036
> Triggering Snapshot Defrag Service ...
> com.google.protobuf.ServiceException:
> org.apache.hadoop.ipc.RemoteException(java.lang.NullPointerException):
> java.lang.NullPointerException
> at
> org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos$TriggerSnapshotDefragResponse$Builder.setErrorMsg(OzoneManagerAdminProtocolProtos.java:5369)
> at
> org.apache.hadoop.ozone.protocolPB.OMAdminProtocolServerSideImpl.triggerSnapshotDefrag(OMAdminProtocolServerSideImpl.java:133)
> at
> org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos$OzoneManagerAdminService$2.callBlockingMethod(OzoneManagerAdminProtocolProtos.java:5549)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:995)
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:923)
> at
> java.base/java.security.AccessController.doPrivileged(AccessController.java:712)
> at java.base/javax.security.auth.Subject.doAs(Subject.java:439)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1910)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2905)
> , while invoking $Proxy20.triggerSnapshotDefrag over null. Retrying after
> sleeping for 1000ms.
> com.google.protobuf.ServiceException:
> org.apache.hadoop.ipc.RemoteException(java.lang.NullPointerException):
> java.lang.NullPointerException
> at
> org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos$TriggerSnapshotDefragResponse$Builder.setErrorMsg(OzoneManagerAdminProtocolProtos.java:5369)
> at
> org.apache.hadoop.ozone.protocolPB.OMAdminProtocolServerSideImpl.triggerSnapshotDefrag(OMAdminProtocolServerSideImpl.java:133)
> at
> org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos$OzoneManagerAdminService$2.callBlockingMethod(OzoneManagerAdminProtocolProtos.java:5549)
> at
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:533)
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1070)
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:995)
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:923)
> at
> java.base/java.security.AccessController.doPrivileged(AccessController.java:712)
> at java.base/javax.security.auth.Subject.doAs(Subject.java:439)
> at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1910)
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2905)
> , while invoking $Proxy20.triggerSnapshotDefrag over null. Retrying after
> sleeping for 1000ms.
> {code}
> Problem is, exception doesn't always carry a detailMessage, when it doesn't,
> it becomes null:
> {code}
> /**
> * Returns the detail message string of this throwable.
> *
> * @return the detail message string of this {@code Throwable} instance
> * (which may be {@code null}).
> */
> public String getMessage() {
> return detailMessage;
> }
> {code}
> Solution: Fall back to printing stack trace when ex.getMessage() is null.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]