[ 
https://issues.apache.org/jira/browse/EAGLE-568?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Hao Chen updated EAGLE-568:
---------------------------
    Description: 
h1. Problem and Exception

{code}
2016-09-23 16:20:03 Thread-2-mrHistoryJobExecutor 
org.apache.eagle.jpm.mr.history.storm.JobHistorySpout [ERROR] fail crawling job 
history file and continue ...
java.lang.RuntimeException: org.apache.hadoop.hdfs.BlockMissingException: Could 
not obtain block: 
BP-1687617170-10.8.211.11-1404135347814:blk_6965456919_1107923643088 
file=/mapred/history/done/2016/09/23/000716/job_1470077190071_716284_conf.xml
        at 
org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:2398) 
~[stormjar.jar:na]
        at 
org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:2261) 
~[stormjar.jar:na]
        at 
org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2168) 
~[stormjar.jar:na]
        at 
org.apache.hadoop.conf.Configuration.iterator(Configuration.java:2210) 
~[stormjar.jar:na]
        at java.lang.Iterable.forEach(Iterable.java:74) ~[na:1.8.0_91]
        at org.apache.eagle.jpm.util.Utils.fetchJobType(Utils.java:112) 
~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.parser.JHFEventReaderBase.<init>(JHFEventReaderBase.java:121)
 ~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.parser.JHFMRVer2EventReader.<init>(JHFMRVer2EventReader.java:47)
 ~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.parser.JHFParserFactory.getParser(JHFParserFactory.java:49)
 ~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.crawler.DefaultJHFInputStreamCallback.onInputStream(DefaultJHFInputStreamCallback.java:55)
 ~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.crawler.AbstractJobHistoryDAO.readFileContent(AbstractJobHistoryDAO.java:163)
 ~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.crawler.JHFCrawlerDriverImpl.crawl(JHFCrawlerDriverImpl.java:173)
 ~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.storm.JobHistorySpout.nextTuple(JobHistorySpout.java:166)
 ~[stormjar.jar:na]
        at 
backtype.storm.daemon.executor$fn__5629$fn__5644$fn__5673.invoke(executor.clj:585)
 [storm-core-0.9.3.2.2.0.0-2041.jar:0.9.3.2.2.0.0-2041]
        at backtype.storm.util$async_loop$fn__452.invoke(util.clj:465) 
[storm-core-0.9.3.2.2.0.0-2041.jar:0.9.3.2.2.0.0-2041]
        at clojure.lang.AFn.run(AFn.java:24) [clojure-1.5.1.jar:na]
        at java.lang.Thread.run(Thread.java:745) [na:1.8.0_91]
Caused by: org.apache.hadoop.hdfs.BlockMissingException: Could not obtain 
block: BP-1687617170-10.8.211.11-1404135347814:blk_6965456919_1107923643088 
file=/mapred/history/done/2016/09/23/000716/job_1470077190071_716284_conf.xml
        at 
org.apache.hadoop.hdfs.DFSInputStream.chooseDataNode(DFSInputStream.java:880) 
~[stormjar.jar:na]
        at 
org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:560) 
~[stormjar.jar:na]
        at 
org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:790) 
~[stormjar.jar:na]
        at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:837) 
~[stormjar.jar:na]
        at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:645) 
~[stormjar.jar:na]
        at java.io.FilterInputStream.read(FilterInputStream.java:83) 
~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.impl.XMLEntityManager$RewindableInputStream.read(XMLEntityManager.java:2899)
 ~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(XMLEntityManager.java:674)
 ~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(XMLVersionDetector.java:189)
 ~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:812)
 ~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:777)
 ~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141) 
~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.parsers.DOMParser.parse(DOMParser.java:243) 
~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderImpl.parse(DocumentBuilderImpl.java:339)
 ~[na:1.8.0_91]
        at javax.xml.parsers.DocumentBuilder.parse(DocumentBuilder.java:121) 
~[na:1.8.0_91]
        at org.apache.hadoop.conf.Configuration.parse(Configuration.java:2239) 
~[stormjar.jar:na]
        at 
org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:2312) 
~[stormjar.jar:na]
        ... 16 common frames omitted
{code}

h1. Proposal Solution
Set max retry times for HDFS problems, otherwise skip instead of blocking whole 
topology

  was:
{code}
2016-09-23 16:20:03 Thread-2-mrHistoryJobExecutor 
org.apache.eagle.jpm.mr.history.storm.JobHistorySpout [ERROR] fail crawling job 
history file and continue ...
java.lang.RuntimeException: org.apache.hadoop.hdfs.BlockMissingException: Could 
not obtain block: 
BP-1687617170-10.8.211.11-1404135347814:blk_6965456919_1107923643088 
file=/mapred/history/done/2016/09/23/000716/job_1470077190071_716284_conf.xml
        at 
org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:2398) 
~[stormjar.jar:na]
        at 
org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:2261) 
~[stormjar.jar:na]
        at 
org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2168) 
~[stormjar.jar:na]
        at 
org.apache.hadoop.conf.Configuration.iterator(Configuration.java:2210) 
~[stormjar.jar:na]
        at java.lang.Iterable.forEach(Iterable.java:74) ~[na:1.8.0_91]
        at org.apache.eagle.jpm.util.Utils.fetchJobType(Utils.java:112) 
~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.parser.JHFEventReaderBase.<init>(JHFEventReaderBase.java:121)
 ~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.parser.JHFMRVer2EventReader.<init>(JHFMRVer2EventReader.java:47)
 ~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.parser.JHFParserFactory.getParser(JHFParserFactory.java:49)
 ~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.crawler.DefaultJHFInputStreamCallback.onInputStream(DefaultJHFInputStreamCallback.java:55)
 ~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.crawler.AbstractJobHistoryDAO.readFileContent(AbstractJobHistoryDAO.java:163)
 ~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.crawler.JHFCrawlerDriverImpl.crawl(JHFCrawlerDriverImpl.java:173)
 ~[stormjar.jar:na]
        at 
org.apache.eagle.jpm.mr.history.storm.JobHistorySpout.nextTuple(JobHistorySpout.java:166)
 ~[stormjar.jar:na]
        at 
backtype.storm.daemon.executor$fn__5629$fn__5644$fn__5673.invoke(executor.clj:585)
 [storm-core-0.9.3.2.2.0.0-2041.jar:0.9.3.2.2.0.0-2041]
        at backtype.storm.util$async_loop$fn__452.invoke(util.clj:465) 
[storm-core-0.9.3.2.2.0.0-2041.jar:0.9.3.2.2.0.0-2041]
        at clojure.lang.AFn.run(AFn.java:24) [clojure-1.5.1.jar:na]
        at java.lang.Thread.run(Thread.java:745) [na:1.8.0_91]
Caused by: org.apache.hadoop.hdfs.BlockMissingException: Could not obtain 
block: BP-1687617170-10.8.211.11-1404135347814:blk_6965456919_1107923643088 
file=/mapred/history/done/2016/09/23/000716/job_1470077190071_716284_conf.xml
        at 
org.apache.hadoop.hdfs.DFSInputStream.chooseDataNode(DFSInputStream.java:880) 
~[stormjar.jar:na]
        at 
org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:560) 
~[stormjar.jar:na]
        at 
org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:790) 
~[stormjar.jar:na]
        at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:837) 
~[stormjar.jar:na]
        at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:645) 
~[stormjar.jar:na]
        at java.io.FilterInputStream.read(FilterInputStream.java:83) 
~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.impl.XMLEntityManager$RewindableInputStream.read(XMLEntityManager.java:2899)
 ~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(XMLEntityManager.java:674)
 ~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(XMLVersionDetector.java:189)
 ~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:812)
 ~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:777)
 ~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141) 
~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.parsers.DOMParser.parse(DOMParser.java:243) 
~[na:1.8.0_91]
        at 
com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderImpl.parse(DocumentBuilderImpl.java:339)
 ~[na:1.8.0_91]
        at javax.xml.parsers.DocumentBuilder.parse(DocumentBuilder.java:121) 
~[na:1.8.0_91]
        at org.apache.hadoop.conf.Configuration.parse(Configuration.java:2239) 
~[stormjar.jar:na]
        at 
org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:2312) 
~[stormjar.jar:na]
        ... 16 common frames omitted
{code}


> Set max retry times for HDFS problems, otherwise skip instead of blocking 
> whole topology
> ----------------------------------------------------------------------------------------
>
>                 Key: EAGLE-568
>                 URL: https://issues.apache.org/jira/browse/EAGLE-568
>             Project: Eagle
>          Issue Type: Bug
>    Affects Versions: v0.5.0
>            Reporter: Hao Chen
>            Assignee: wujinhu
>              Labels: eagle-jpm, eagle-jpm-mr-history
>             Fix For: v0.5.0
>
>
> h1. Problem and Exception
> {code}
> 2016-09-23 16:20:03 Thread-2-mrHistoryJobExecutor 
> org.apache.eagle.jpm.mr.history.storm.JobHistorySpout [ERROR] fail crawling 
> job history file and continue ...
> java.lang.RuntimeException: org.apache.hadoop.hdfs.BlockMissingException: 
> Could not obtain block: 
> BP-1687617170-10.8.211.11-1404135347814:blk_6965456919_1107923643088 
> file=/mapred/history/done/2016/09/23/000716/job_1470077190071_716284_conf.xml
>         at 
> org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:2398) 
> ~[stormjar.jar:na]
>         at 
> org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:2261) 
> ~[stormjar.jar:na]
>         at 
> org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2168) 
> ~[stormjar.jar:na]
>         at 
> org.apache.hadoop.conf.Configuration.iterator(Configuration.java:2210) 
> ~[stormjar.jar:na]
>         at java.lang.Iterable.forEach(Iterable.java:74) ~[na:1.8.0_91]
>         at org.apache.eagle.jpm.util.Utils.fetchJobType(Utils.java:112) 
> ~[stormjar.jar:na]
>         at 
> org.apache.eagle.jpm.mr.history.parser.JHFEventReaderBase.<init>(JHFEventReaderBase.java:121)
>  ~[stormjar.jar:na]
>         at 
> org.apache.eagle.jpm.mr.history.parser.JHFMRVer2EventReader.<init>(JHFMRVer2EventReader.java:47)
>  ~[stormjar.jar:na]
>         at 
> org.apache.eagle.jpm.mr.history.parser.JHFParserFactory.getParser(JHFParserFactory.java:49)
>  ~[stormjar.jar:na]
>         at 
> org.apache.eagle.jpm.mr.history.crawler.DefaultJHFInputStreamCallback.onInputStream(DefaultJHFInputStreamCallback.java:55)
>  ~[stormjar.jar:na]
>         at 
> org.apache.eagle.jpm.mr.history.crawler.AbstractJobHistoryDAO.readFileContent(AbstractJobHistoryDAO.java:163)
>  ~[stormjar.jar:na]
>         at 
> org.apache.eagle.jpm.mr.history.crawler.JHFCrawlerDriverImpl.crawl(JHFCrawlerDriverImpl.java:173)
>  ~[stormjar.jar:na]
>         at 
> org.apache.eagle.jpm.mr.history.storm.JobHistorySpout.nextTuple(JobHistorySpout.java:166)
>  ~[stormjar.jar:na]
>         at 
> backtype.storm.daemon.executor$fn__5629$fn__5644$fn__5673.invoke(executor.clj:585)
>  [storm-core-0.9.3.2.2.0.0-2041.jar:0.9.3.2.2.0.0-2041]
>         at backtype.storm.util$async_loop$fn__452.invoke(util.clj:465) 
> [storm-core-0.9.3.2.2.0.0-2041.jar:0.9.3.2.2.0.0-2041]
>         at clojure.lang.AFn.run(AFn.java:24) [clojure-1.5.1.jar:na]
>         at java.lang.Thread.run(Thread.java:745) [na:1.8.0_91]
> Caused by: org.apache.hadoop.hdfs.BlockMissingException: Could not obtain 
> block: BP-1687617170-10.8.211.11-1404135347814:blk_6965456919_1107923643088 
> file=/mapred/history/done/2016/09/23/000716/job_1470077190071_716284_conf.xml
>         at 
> org.apache.hadoop.hdfs.DFSInputStream.chooseDataNode(DFSInputStream.java:880) 
> ~[stormjar.jar:na]
>         at 
> org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:560) 
> ~[stormjar.jar:na]
>         at 
> org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:790)
>  ~[stormjar.jar:na]
>         at 
> org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:837) 
> ~[stormjar.jar:na]
>         at 
> org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:645) 
> ~[stormjar.jar:na]
>         at java.io.FilterInputStream.read(FilterInputStream.java:83) 
> ~[na:1.8.0_91]
>         at 
> com.sun.org.apache.xerces.internal.impl.XMLEntityManager$RewindableInputStream.read(XMLEntityManager.java:2899)
>  ~[na:1.8.0_91]
>         at 
> com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(XMLEntityManager.java:674)
>  ~[na:1.8.0_91]
>         at 
> com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(XMLVersionDetector.java:189)
>  ~[na:1.8.0_91]
>         at 
> com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:812)
>  ~[na:1.8.0_91]
>         at 
> com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:777)
>  ~[na:1.8.0_91]
>         at 
> com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141)
>  ~[na:1.8.0_91]
>         at 
> com.sun.org.apache.xerces.internal.parsers.DOMParser.parse(DOMParser.java:243)
>  ~[na:1.8.0_91]
>         at 
> com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderImpl.parse(DocumentBuilderImpl.java:339)
>  ~[na:1.8.0_91]
>         at javax.xml.parsers.DocumentBuilder.parse(DocumentBuilder.java:121) 
> ~[na:1.8.0_91]
>         at 
> org.apache.hadoop.conf.Configuration.parse(Configuration.java:2239) 
> ~[stormjar.jar:na]
>         at 
> org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:2312) 
> ~[stormjar.jar:na]
>         ... 16 common frames omitted
> {code}
> h1. Proposal Solution
> Set max retry times for HDFS problems, otherwise skip instead of blocking 
> whole topology



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to