[ 
https://issues.apache.org/jira/browse/HDFS-7414?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14232582#comment-14232582
 ] 

Brahma Reddy Battula commented on HDFS-7414:
--------------------------------------------

 Following is my analysis,two things might cause edits missing,1)snapshot 
2)delete dir while tailing edits inprogress..

 *{color:blue}StandBy Namenode{color}* 

{noformat}
2014-11-27 23:57:05,835 | INFO  | Edit log tailer | Triggering log roll on 
remote NameNode linux157/*.*.*.157:25000 | 
org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer.triggerActiveLogRoll(EditLogTailer.java:269)
2014-11-27 23:57:06,129 | INFO  | Edit log tailer | Reading 
org.apache.hadoop.hdfs.server.namenode.RedundantEditLogInputStream@419cf8c9 
expecting start txid #1034147 | 
org.apache.hadoop.hdfs.server.namenode.FSImage.loadEdits(FSImage.java:802)
2014-11-27 23:57:06,130 | INFO  | Edit log tailer | Start loading edits file 
https://linux158:8481/getJournal?jid=hacluster&segmentTxId=1034147&storageInfo=-56%3A295084204%3A0%3Amyhacluster,
 
https://linux156:8481/getJournal?jid=hacluster&segmentTxId=1034147&storageInfo=-56%3A295084204%3A0%3Amyhacluster
 | 
org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.loadFSEdits(FSEditLogLoader.java:132)
2014-11-27 23:57:06,130 | INFO  | Edit log tailer | Fast-forwarding stream 
'https://linux158:8481/getJournal?jid=hacluster&segmentTxId=1034147&storageInfo=-56%3A295084204%3A0%3Amyhacluster,
 
https://linux156:8481/getJournal?jid=hacluster&segmentTxId=1034147&storageInfo=-56%3A295084204%3A0%3Amyhacluster'
 to transaction ID 1034147 | 
org.apache.hadoop.hdfs.server.namenode.RedundantEditLogInputStream.nextOp(RedundantEditLogInputStream.java:176)
2014-11-27 23:57:06,130 | INFO  | Edit log tailer | Fast-forwarding stream 
'https://linux158:8481/getJournal?jid=hacluster&segmentTxId=1034147&storageInfo=-56%3A295084204%3A0%3Amyhacluster'
 to transaction ID 1034147 | 
org.apache.hadoop.hdfs.server.namenode.RedundantEditLogInputStream.nextOp(RedundantEditLogInputStream.java:176)
2014-11-27 23:57:06,170 | INFO  | Edit log tailer | BLOCK* addStoredBlock: 
blockMap updated: *.*.*.157:25009 is added to 
blk_1073994325_253503{blockUCState=UNDER_CONSTRUCTION, primaryNodeIndex=-1, 
replicas=[ReplicaUnderConstruction[[DISK]DS-70872ee7-101c-4c50-a7d2-8876bf23271b:NORMAL|RBW]]}
 size 0 | 
org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.logAddStoredBlock(BlockManager.java:2393)
2014-11-27 23:57:06,170 | INFO  | Edit log tailer | BLOCK* addStoredBlock: 
blockMap updated: *.*.*.157:25009 is added to 
blk_1073994326_253504{blockUCState=UNDER_CONSTRUCTION, primaryNodeIndex=-1, 
replicas=[ReplicaUnderConstruction[[DISK]DS-70872ee7-101c-4c50-a7d2-8876bf23271b:NORMAL|RBW]]}
 size 0 | 
org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.logAddStoredBlock(BlockManager.java:2393)
2014-11-27 23:57:06,171 | INFO  | Edit log tailer | BLOCK* addStoredBlock: 
blockMap updated: *.*.*.157:25009 is added to 
blk_1073994327_253505{blockUCState=UNDER_CONSTRUCTION, primaryNodeIndex=-1, 
replicas=[ReplicaUnderConstruction[[DISK]DS-70872ee7-101c-4c50-a7d2-8876bf23271b:NORMAL|RBW]]}
 size 0 | 
org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.logAddStoredBlock(BlockManager.java:2393)
2014-11-27 23:57:06,171 | INFO  | Edit log tailer | BLOCK* addStoredBlock: 
blockMap updated: *.*.*.157:25009 is added to 
blk_1073994328_253506{blockUCState=UNDER_CONSTRUCTION, primaryNodeIndex=-1, 
replicas=[ReplicaUnderConstruction[[DISK]DS-70872ee7-101c-4c50-a7d2-8876bf23271b:NORMAL|RBW]]}
 size 0 | 
org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.logAddStoredBlock(BlockManager.java:2393)
2014-11-27 23:57:06,171 | INFO  | Edit log tailer | BLOCK* addStoredBlock: 
blockMap updated: *.*.*.157:25009 is added to 
blk_1073994329_253507{blockUCState=UNDER_CONSTRUCTION, primaryNodeIndex=-1, 
replicas=[ReplicaUnderConstruction[[DISK]DS-70872ee7-101c-4c50-a7d2-8876bf23271b:NORMAL|RBW]]}
 size 0 | 
org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.logAddStoredBlock(BlockManager.java:2393)
2014-11-27 23:57:06,173 | INFO  | Edit log tailer | BLOCK* addStoredBlock: 
blockMap updated: *.*.*.157:25009 is added to 
blk_1073994330_253508{blockUCState=UNDER_CONSTRUCTION, primaryNodeIndex=-1, 
replicas=[ReplicaUnderConstruction[[DISK]DS-70872ee7-101c-4c50-a7d2-8876bf23271b:NORMAL|RBW]]}
 size 0 | 
org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.logAddStoredBlock(BlockManager.java:2393)
2014-11-27 23:57:06,173 | INFO  | Edit log tailer | BLOCK* addStoredBlock: 
blockMap updated: *.*.*.157:25009 is added to 
blk_1073994331_253509{blockUCState=UNDER_CONSTRUCTION, primaryNodeIndex=-1, 
replicas=[ReplicaUnderConstruction[[DISK]DS-70872ee7-101c-4c50-a7d2-8876bf23271b:NORMAL|RBW]]}
 size 0 | 
org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.logAddStoredBlock(BlockManager.java:2393)
2014-11-27 23:57:06,175 | ERROR | Edit log tailer | Encountered exception on 
operation CloseOp [length=0, inodeId=0, 
path=/outDir/_temporary/1/_temporary/attempt_1417011927625_0166_m_000003_0/part-m-00003,
 replication=3, mtime=1417103710975, atime=1417103621195, blockSize=67108864, 
blocks=[blk_1073994159_253337, blk_1073994183_253361, blk_1073994200_253378, 
blk_1073994218_253396, blk_1073994232_253410, blk_1073994250_253428, 
blk_1073994262_253440, blk_1073994276_253454, blk_1073994283_253461, 
blk_1073994287_253465, blk_1073994297_253475, blk_1073994306_253484, 
blk_1073994317_253495], permissions=hdfs:supergroup:rw-r--r--, aclEntries=null, 
clientName=, clientMachine=, opCode=OP_CLOSE, txid=1034186] | 
org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.loadEditRecords(FSEditLogLoader.java:232)
java.io.FileNotFoundException: File does not exist: 
/outDir/_temporary/1/_temporary/attempt_1417011927625_0166_m_000003_0/part-m-00003
        at 
org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:65)
        at 
org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:55)
        at 
org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.applyEditLogOp(FSEditLogLoader.java:409)
        at 
org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.loadEditRecords(FSEditLogLoader.java:224)
        at 
org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.loadFSEdits(FSEditLogLoader.java:133)
        at 
org.apache.hadoop.hdfs.server.namenode.FSImage.loadEdits(FSImage.java:805)
        at 
org.apache.hadoop.hdfs.server.namenode.FSImage.loadEdits(FSImage.java:786)
        at 
org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer.doTailEdits(EditLogTailer.java:230)
        at 
org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer$EditLogTailerThread.doWork(EditLogTailer.java:324)
        at 
org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer$EditLogTailerThread.access$200(EditLogTailer.java:282)
        at 
org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer$EditLogTailerThread$1.run(EditLogTailer.java:299)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:356)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1592)
        at 
org.apache.hadoop.security.SecurityUtil.doAsLoginUserOrFatal(SecurityUtil.java:413)
        at 
org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer$EditLogTailerThread.run(EditLogTailer.java:295)
2014-11-27 23:57:06,180 | FATAL | Edit log tailer | Unknown error encountered 
while tailing edits. Shutting down standby NN. | 
org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer$EditLogTailerThread.doWork(EditLogTailer.java:331)
java.io.FileNotFoundException: File does not exist: 
/outDir/_temporary/1/_temporary/attempt_1417011927625_0166_m_000003_0/part-m-00003
{noformat}

 *{color:blue}Active Namenode{color}* 
{noformat}
2014-11-27 23:57:05,845 | INFO  | Socket Reader #1 for port 25000 | Auth 
successful for hdfs/[email protected] (auth:KERBEROS) | 
org.apache.hadoop.ipc.Server$Connection.saslProcess(Server.java:1294)
2014-11-27 23:57:05,847 | INFO  | Socket Reader #1 for port 25000 | 
Authorization successful for hdfs/[email protected] (auth:KERBEROS) for 
protocol=interface org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol | 
org.apache.hadoop.security.authorize.ServiceAuthorizationManager.authorize(ServiceAuthorizationManager.java:114)
2014-11-27 23:57:05,848 | INFO  | IPC Server handler 25 on 25000 | Roll Edit 
Log from *.*.*.158 | 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.rollEditLog(FSNamesystem.java:5478)
2014-11-27 23:57:05,848 | INFO  | IPC Server handler 25 on 25000 | Rolling edit 
logs | 
org.apache.hadoop.hdfs.server.namenode.FSEditLog.rollEditLog(FSEditLog.java:1096)
2014-11-27 23:57:05,848 | INFO  | IPC Server handler 25 on 25000 | Ending log 
segment 1034147 | 
org.apache.hadoop.hdfs.server.namenode.FSEditLog.endCurrentLogSegment(FSEditLog.java:1153)
2014-11-27 23:57:05,915 | INFO  | IPC Server handler 25 on 25000 | Number of 
transactions: 82 Total time for transactions(ms): 28 Number of transactions 
batched in Syncs: 9 Number of syncs: 47 SyncTimes(ms): 846 874  | 
org.apache.hadoop.hdfs.server.namenode.FSEditLog.printStatistics(FSEditLog.java:673)
2014-11-27 23:57:05,927 | INFO  | IPC Server handler 25 on 25000 | Finalizing 
edits file /srv/BigData/namenode/current/edits_inprogress_0000000000001034147 
-> /srv/BigData/namenode/current/edits_0000000000001034147-0000000000001034228 
| 
org.apache.hadoop.hdfs.server.namenode.FileJournalManager.finalizeLogSegment(FileJournalManager.java:132)
2014-11-27 23:57:05,927 | INFO  | IPC Server handler 25 on 25000 | Starting log 
segment at 1034229 | 
org.apache.hadoop.hdfs.server.namenode.FSEditLog.startLogSegment(FSEditLog.java:1112)
{noformat}
 *Auditlog* 
{noformat}
2014-11-27 23:56:48,044 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=renameSnapshot      
src=/.snapshot/snap_17  dst=/.snapshot/snapnew_17       perm=null
2014-11-27 23:56:50,921 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=getfileinfo 
src=/outwritwe  dst=null        perm=null
2014-11-27 23:56:50,939 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=getfileinfo 
src=/.snapshot/snapnew_17/outwritwe     dst=null        perm=null
2014-11-27 23:56:50,941 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=getfileinfo src=/   
dst=null        perm=null
2014-11-27 23:57:20,454 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=getfileinfo 
src=/outDir     dst=null        perm=null
2014-11-27 23:57:20,473 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=getfileinfo 
src=/tmp/hadoop-yarn/staging/hdfs/.staging      dst=null        perm=null
2014-11-27 23:57:20,476 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=getfileinfo 
src=/tmp/hadoop-yarn/staging/hdfs/.staging      dst=null        perm=null
2014-11-27 23:57:20,517 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=getfileinfo 
src=/tmp/hadoop-yarn/staging/hdfs/.staging/job_1417011927625_0167       
dst=null        perm=null
2014-11-27 23:57:20,557 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=mkdirs      
2014-11-27 23:57:20,600 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=getfileinfo 
src=/tmp/hadoop-yarn/staging/hdfs/.staging/job_1417011927625_0167/job.jar       
dst=null        perm=null
2014-11-27 23:57:20,643 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=create      
src=/tmp/hadoop-yarn/staging/hdfs/.staging/job_1417011927625_0167/job.jar       
dst=null        perm=hdfs:hadoop:rw-r--r--
2014-11-27 23:57:21,040 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=setReplication      
src=/tmp/hadoop-yarn/staging/hdfs/.staging/job_1417011927625_0167/job.jar       
dst=null        perm=null
2014-11-27 23:57:21,085 INFO FSNamesystem.audit: allowed=true   
[email protected] (auth:KERBEROS)     ip=/*.*.*.156   cmd=setPermission       
src=/tmp/hadoop-yarn/staging/hdfs/.staging/job_1417011927625_0167/job.jar       
dst=null        perm=hdfs:hadoop:rw-r--r--
{noformat}

> Namenode got shutdown and can't recover where edit update might be missed
> -------------------------------------------------------------------------
>
>                 Key: HDFS-7414
>                 URL: https://issues.apache.org/jira/browse/HDFS-7414
>             Project: Hadoop HDFS
>          Issue Type: Bug
>    Affects Versions: 2.4.1, 2.5.1
>            Reporter: Brahma Reddy Battula
>            Assignee: Brahma Reddy Battula
>            Priority: Blocker
>
> Scenario:
> ========
> Was running mapreduce job.
> CPU usage crossed 190% for Datanode and machine became slow..
> and seen the following exception .. 
>  *Did not get the exact root cause, but as cpu usage more edit log updation 
> might be missed...Need dig to more...anyone have any thoughts.* 
> {noformat}
> 2014-11-20 05:01:18,430 | ERROR | main | Encountered exception on operation 
> CloseOp [length=0, inodeId=0, 
> path=/outDir2/_temporary/1/_temporary/attempt_1416390004064_0002_m_000025_1/part-m-00025,
>  replication=2, mtime=1416409309023, atime=1416409290816, blockSize=67108864, 
> blocks=[blk_1073766144_25321, blk_1073766154_25331, blk_1073766160_25337], 
> permissions=mapred:supergroup:rw-r--r--, aclEntries=null, clientName=, 
> clientMachine=, opCode=OP_CLOSE, txid=162982] | 
> org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.loadEditRecords(FSEditLogLoader.java:232)
> java.io.FileNotFoundException: File does not exist: 
> /outDir2/_temporary/1/_temporary/attempt_1416390004064_0002_m_000025_1/part-m-00025
>         at 
> org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:65)
>         at 
> org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:55)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.applyEditLogOp(FSEditLogLoader.java:409)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.loadEditRecords(FSEditLogLoader.java:224)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.loadFSEdits(FSEditLogLoader.java:133)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSImage.loadEdits(FSImage.java:805)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSImage.loadFSImage(FSImage.java:665)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSImage.recoverTransitionRead(FSImage.java:272)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFSImage(FSNamesystem.java:893)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFromDisk(FSNamesystem.java:640)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNode.loadNamesystem(NameNode.java:519)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNode.initialize(NameNode.java:575)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:741)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:724)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNode.createNameNode(NameNode.java:1387)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNode.main(NameNode.java:1459)
> 2014-11-20 05:01:18,654 | WARN  | main | Encountered exception loading 
> fsimage | 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFromDisk(FSNamesystem.java:642)
> java.io.FileNotFoundException: File does not exist: 
> /outDir2/_temporary/1/_temporary/attempt_1416390004064_0002_m_000025_1/part-m-00025
>         at 
> org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:65)
>         at 
> org.apache.hadoop.hdfs.server.namenode.INodeFile.valueOf(INodeFile.java:55)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.applyEditLogOp(FSEditLogLoader.java:409)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.loadEditRecords(FSEditLogLoader.java:224)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.loadFSEdits(FSEditLogLoader.java:133)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSImage.loadEdits(FSImage.java:805)
> at 
> org.apache.hadoop.hdfs.server.namenode.FSImage.loadFSImage(FSImage.java:665)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSImage.recoverTransitionRead(FSImage.java:272)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFSImage(FSNamesystem.java:893)
>         at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFromDisk(FSNamesystem.java:640)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNode.loadNamesystem(NameNode.java:519)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNode.initialize(NameNode.java:575)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:741)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:724)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNode.createNameNode(NameNode.java:1387)
>         at 
> org.apache.hadoop.hdfs.server.namenode.NameNode.main(NameNode.java:1459)
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to