Pratyush Bhatt created HDDS-10609:
-------------------------------------

             Summary: [HBase Ozone] HMaster aborts with "RENAME_OPEN_FILE 
org.apache.hadoop.ozone.om.exceptions.OMException"
                 Key: HDDS-10609
                 URL: https://issues.apache.org/jira/browse/HDDS-10609
             Project: Apache Ozone
          Issue Type: Bug
          Components: OM
            Reporter: Pratyush Bhatt


_HMaster_ continuously throws "RENAME_OPEN_FILE 
org.apache.hadoop.ozone.om.exceptions.OMException" for mostly a single 
file({color:#0747a6}_key=hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.xyz.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.xyz%2C22001%2C1711528430307.1711534016466_{color})
 for approx 18 minutes and then ABORTS.
{code:java}
2024-03-27 10:51:43,369 ERROR 
org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem: rename key failed: Open 
file cannot be renamed since it is hsync'ed: volumeName=volhbase1, 
bucketName=buckethbase1, 
key=hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.xyz,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.xyz%2C22001%2C1711528430307.1711534016466.
 
source:volhbase1/buckethbase1/hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.xyz,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.xyz%2C22001%2C1711528430307.1711534016466,
 
destin:volhbase1/buckethbase1/hbase/MasterData/oldWALs/ccycloud-8.ozn-hb932chf3oz.xyz%2C22001%2C1711528430307.1711534016466
2024-03-27 10:51:43,369 ERROR 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL: Failed log archiving 
for the log 
ofs://ozone1711476173/volhbase1/buckethbase1/hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.xyz,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.xyz%2C22001%2C1711528430307.1711534016466,
RENAME_OPEN_FILE org.apache.hadoop.ozone.om.exceptions.OMException: Open file 
cannot be renamed since it is hsync'ed: volumeName=volhbase1, 
bucketName=buckethbase1, 
key=hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466
        at 
org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB.handleError(OzoneManagerProtocolClientSideTranslatorPB.java:756)
        at 
org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB.renameKey(OzoneManagerProtocolClientSideTranslatorPB.java:932)
        at 
org.apache.hadoop.ozone.client.rpc.RpcClient.renameKey(RpcClient.java:1612)
        at 
org.apache.hadoop.ozone.client.OzoneBucket.renameKey(OzoneBucket.java:645)
        at 
org.apache.hadoop.fs.ozone.BasicRootedOzoneClientAdapterImpl.rename(BasicRootedOzoneClientAdapterImpl.java:497)
        at 
org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem.renameFSO(BasicRootedOzoneFileSystem.java:487)
        at 
org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem.rename(BasicRootedOzoneFileSystem.java:400)
        at 
org.apache.hadoop.hbase.util.CommonFSUtils.renameAndSetModifyTime(CommonFSUtils.java:711)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.archiveLogFile(AbstractFSWAL.java:785)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.archive(AbstractFSWAL.java:750)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.lambda$cleanOldLogs$1(AbstractFSWAL.java:738)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
2024-03-27 10:51:43,370 INFO 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL: Archiving 
ofs://ozone1711476173/volhbase1/buckethbase1/hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466
 to 
ofs://ozone1711476173/volhbase1/buckethbase1/hbase/MasterData/oldWALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466
~
search hit BOTTOM, continuing at TOP
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
2024-03-27 10:51:43,125 ERROR 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL: Failed log archiving 
for the log 
ofs://ozone1711476173/volhbase1/buckethbase1/hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466,
RENAME_OPEN_FILE org.apache.hadoop.ozone.om.exceptions.OMException: Open file 
cannot be renamed since it is hsync'ed: volumeName=volhbase1, 
bucketName=buckethbase1, 
key=hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466
        at 
org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB.handleError(OzoneManagerProtocolClientSideTranslatorPB.java:756)
        at 
org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB.renameKey(OzoneManagerProtocolClientSideTranslatorPB.java:932)
        at 
org.apache.hadoop.ozone.client.rpc.RpcClient.renameKey(RpcClient.java:1612)
        at 
org.apache.hadoop.ozone.client.OzoneBucket.renameKey(OzoneBucket.java:645)
        at 
org.apache.hadoop.fs.ozone.BasicRootedOzoneClientAdapterImpl.rename(BasicRootedOzoneClientAdapterImpl.java:497)
        at 
org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem.renameFSO(BasicRootedOzoneFileSystem.java:487)
        at 
org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem.rename(BasicRootedOzoneFileSystem.java:400)
        at 
org.apache.hadoop.hbase.util.CommonFSUtils.renameAndSetModifyTime(CommonFSUtils.java:711)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.archiveLogFile(AbstractFSWAL.java:785)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.archive(AbstractFSWAL.java:750)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.lambda$cleanOldLogs$1(AbstractFSWAL.java:738)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
2024-03-27 10:51:43,125 INFO 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL: Archiving 
ofs://ozone1711476173/volhbase1/buckethbase1/hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466
 to 
ofs://ozone1711476173/volhbase1/buckethbase1/hbase/MasterData/oldWALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466
2024-03-27 10:51:43,133 ERROR 
org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem: rename key failed: Open 
file cannot be renamed since it is hsync'ed: volumeName=volhbase1, 
bucketName=buckethbase1, 
key=hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466.
 
source:volhbase1/buckethbase1/hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466,
 
destin:volhbase1/buckethbase1/hbase/MasterData/oldWALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466
2024-03-27 10:51:43,133 ERROR 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL: Failed log archiving 
for the log 
ofs://ozone1711476173/volhbase1/buckethbase1/hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466,
RENAME_OPEN_FILE org.apache.hadoop.ozone.om.exceptions.OMException: Open file 
cannot be renamed since it is hsync'ed: volumeName=volhbase1, 
bucketName=buckethbase1, 
key=hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466
        at 
org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB.handleError(OzoneManagerProtocolClientSideTranslatorPB.java:756)
        at 
org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB.renameKey(OzoneManagerProtocolClientSideTranslatorPB.java:932)
        at 
org.apache.hadoop.ozone.client.rpc.RpcClient.renameKey(RpcClient.java:1612)
        at 
org.apache.hadoop.ozone.client.OzoneBucket.renameKey(OzoneBucket.java:645)
        at 
org.apache.hadoop.fs.ozone.BasicRootedOzoneClientAdapterImpl.rename(BasicRootedOzoneClientAdapterImpl.java:497)
        at 
org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem.renameFSO(BasicRootedOzoneFileSystem.java:487)
        at 
org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem.rename(BasicRootedOzoneFileSystem.java:400)
        at 
org.apache.hadoop.hbase.util.CommonFSUtils.renameAndSetModifyTime(CommonFSUtils.java:711)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.archiveLogFile(AbstractFSWAL.java:785)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.archive(AbstractFSWAL.java:750)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.lambda$cleanOldLogs$1(AbstractFSWAL.java:738)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
/ABORTING
        at 
org.apache.hadoop.hbase.wal.AbstractWALRoller.run(AbstractWALRoller.java:240)
2024-03-27 10:51:43,196 ERROR 
org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem: rename key failed: Open 
file cannot be renamed since it is hsync'ed: volumeName=volhbase1, 
bucketName=buckethbase1, 
key=hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466.
 
source:volhbase1/buckethbase1/hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466,
 
destin:volhbase1/buckethbase1/hbase/MasterData/oldWALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466
2024-03-27 10:51:43,196 ERROR 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL: Failed log archiving 
for the log 
ofs://ozone1711476173/volhbase1/buckethbase1/hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.xyz,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.root.comops.site%2C22001%2C1711528430307.1711534016466,
RENAME_OPEN_FILE org.apache.hadoop.ozone.om.exceptions.OMException: Open file 
cannot be renamed since it is hsync'ed: volumeName=volhbase1, 
bucketName=buckethbase1, 
key=hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.root.comops.site,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.xyz%2C22001%2C1711528430307.1711534016466
        at 
org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB.handleError(OzoneManagerProtocolClientSideTranslatorPB.java:756)
        at 
org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB.renameKey(OzoneManagerProtocolClientSideTranslatorPB.java:932)
        at 
org.apache.hadoop.ozone.client.rpc.RpcClient.renameKey(RpcClient.java:1612)
        at 
org.apache.hadoop.ozone.client.OzoneBucket.renameKey(OzoneBucket.java:645)
        at 
org.apache.hadoop.fs.ozone.BasicRootedOzoneClientAdapterImpl.rename(BasicRootedOzoneClientAdapterImpl.java:497)
        at 
org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem.renameFSO(BasicRootedOzoneFileSystem.java:487)
        at 
org.apache.hadoop.fs.ozone.BasicRootedOzoneFileSystem.rename(BasicRootedOzoneFileSystem.java:400)
        at 
org.apache.hadoop.hbase.util.CommonFSUtils.renameAndSetModifyTime(CommonFSUtils.java:711)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.archiveLogFile(AbstractFSWAL.java:785)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.archive(AbstractFSWAL.java:750)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.lambda$cleanOldLogs$1(AbstractFSWAL.java:738)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
        at java.lang.Thread.run(Thread.java:748)
2024-03-27 10:51:43,197 INFO 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL: Archiving 
ofs://ozone1711476173/volhbase1/buckethbase1/hbase/MasterData/WALs/ccycloud-8.ozn-hb932chf3oz.xyz,22001,1711528430307/ccycloud-8.ozn-hb932chf3oz.xyz%2C22001%2C1711528430307.1711534016466
 to 
ofs://ozone1711476173/volhbase1/buckethbase1/hbase/MasterData/oldWALs/ccycloud-8.ozn-hb932chf3oz.xyz%2C22001%2C1711528430307.1711534016466
 {code}
 Then aborts
{code:java}
016466
2024-03-27 10:51:43,199 ERROR org.apache.hadoop.hbase.master.HMaster: Master 
server abort: loaded coprocessors are: 
[org.apache.ranger.authorization.hbase.RangerAuthorizationCoprocessor]
2024-03-27 10:51:43,199 ERROR org.apache.hadoop.hbase.master.HMaster: ***** 
ABORTING master ccycloud-8.ozn-hb932chf3oz.xyz,22001,1711528430307: Log rolling 
failed *****
java.lang.RuntimeException
        at 
org.apache.hadoop.hbase.regionserver.wal.AsyncProtobufLogWriter.writeWALMetadata(AsyncProtobufLogWriter.java:217)
        at 
org.apache.hadoop.hbase.regionserver.wal.AsyncProtobufLogWriter.writeMagicAndWALHeader(AsyncProtobufLogWriter.java:223)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractProtobufLogWriter.init(AbstractProtobufLogWriter.java:164)
        at 
org.apache.hadoop.hbase.wal.AsyncFSWALProvider.createAsyncWriter(AsyncFSWALProvider.java:116)
        at 
org.apache.hadoop.hbase.regionserver.wal.AsyncFSWAL.createWriterInstance(AsyncFSWAL.java:726)
        at 
org.apache.hadoop.hbase.regionserver.wal.AsyncFSWAL.createWriterInstance(AsyncFSWAL.java:129)
        at 
org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL.rollWriter(AbstractFSWAL.java:886)
        at 
org.apache.hadoop.hbase.wal.AbstractWALRoller$RollController.rollWal(AbstractWALRoller.java:304)
        at 
org.apache.hadoop.hbase.wal.AbstractWALRoller.run(AbstractWALRoller.java:211)
2024-03-27 10:51:43,200 INFO org.apache.ranger.plugin.util.PolicyRefresher: 
PolicyRefresher(serviceName=cm_hbase).run(): interrupted! Exiting thread
java.lang.InterruptedException
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.reportInterruptAfterWait(AbstractQueuedSynchronizer.java:2014)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2048)
        at 
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
        at 
org.apache.ranger.plugin.util.PolicyRefresher.run(PolicyRefresher.java:208)
2024-03-27 10:51:43,200 INFO 
org.apache.ranger.audit.provider.AuditProviderFactory: ==> 
JVMShutdownHook.run() {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to