This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new f8967c4 NUTCH-2923: Added JobId in Job Failure logs (#721) f8967c4 is described below commit f8967c431a680eb8802fb5d9c396abcb69d11d29 Author: Prakhar Chaube <52599016+prakharcha...@users.noreply.github.com> AuthorDate: Thu Jan 27 21:33:51 2022 +0530 NUTCH-2923: Added JobId in Job Failure logs (#721) * added JobId in Job Failure logs * moved job failure log message logic to NutchJob.java * added description for throws in JavaDoc * logging only state from Job Status and Simplified Job name for SitemapProcessor --- src/java/org/apache/nutch/crawl/CrawlDb.java | 5 ++--- src/java/org/apache/nutch/crawl/CrawlDbMerger.java | 4 +--- src/java/org/apache/nutch/crawl/CrawlDbReader.java | 16 ++++----------- .../org/apache/nutch/crawl/DeduplicationJob.java | 8 ++------ src/java/org/apache/nutch/crawl/Generator.java | 12 +++-------- src/java/org/apache/nutch/crawl/Injector.java | 4 +--- src/java/org/apache/nutch/crawl/LinkDb.java | 8 ++------ src/java/org/apache/nutch/crawl/LinkDbMerger.java | 4 +--- src/java/org/apache/nutch/crawl/LinkDbReader.java | 4 +--- src/java/org/apache/nutch/fetcher/Fetcher.java | 4 +--- src/java/org/apache/nutch/hostdb/ReadHostDb.java | 5 ++--- src/java/org/apache/nutch/hostdb/UpdateHostDb.java | 4 +--- src/java/org/apache/nutch/indexer/CleaningJob.java | 4 +--- src/java/org/apache/nutch/indexer/IndexingJob.java | 4 +--- src/java/org/apache/nutch/parse/ParseSegment.java | 4 +--- .../apache/nutch/scoring/webgraph/LinkDumper.java | 10 ++++----- .../apache/nutch/scoring/webgraph/LinkRank.java | 18 ++++++---------- .../apache/nutch/scoring/webgraph/NodeDumper.java | 4 +--- .../nutch/scoring/webgraph/ScoreUpdater.java | 5 ++--- .../apache/nutch/scoring/webgraph/WebGraph.java | 14 +++++-------- .../org/apache/nutch/segment/SegmentMerger.java | 4 +--- .../org/apache/nutch/segment/SegmentReader.java | 4 +--- src/java/org/apache/nutch/tools/FreeGenerator.java | 4 +--- .../apache/nutch/tools/arc/ArcSegmentCreator.java | 5 ++--- .../org/apache/nutch/tools/warc/WARCExporter.java | 4 +--- .../apache/nutch/util/CrawlCompletionStats.java | 4 +--- src/java/org/apache/nutch/util/NutchJob.java | 24 ++++++++++++++++++++++ .../nutch/util/ProtocolStatusStatistics.java | 4 +--- .../org/apache/nutch/util/SitemapProcessor.java | 5 ++--- .../apache/nutch/util/domain/DomainStatistics.java | 5 ++--- 30 files changed, 78 insertions(+), 126 deletions(-) diff --git a/src/java/org/apache/nutch/crawl/CrawlDb.java b/src/java/org/apache/nutch/crawl/CrawlDb.java index 5d91b0a..3819bb3 100644 --- a/src/java/org/apache/nutch/crawl/CrawlDb.java +++ b/src/java/org/apache/nutch/crawl/CrawlDb.java @@ -129,9 +129,8 @@ public class CrawlDb extends NutchTool implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "CrawlDb update job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("CrawlDb update", + job); LOG.error(message); NutchJob.cleanupAfterFailure(outPath, lock, fs); throw new RuntimeException(message); diff --git a/src/java/org/apache/nutch/crawl/CrawlDbMerger.java b/src/java/org/apache/nutch/crawl/CrawlDbMerger.java index 6fef03a..70c6513 100644 --- a/src/java/org/apache/nutch/crawl/CrawlDbMerger.java +++ b/src/java/org/apache/nutch/crawl/CrawlDbMerger.java @@ -144,9 +144,7 @@ public class CrawlDbMerger extends Configured implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "CrawlDbMerger job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("CrawlDbMerger", job); LOG.error(message); NutchJob.cleanupAfterFailure(outPath, lock, fs); throw new RuntimeException(message); diff --git a/src/java/org/apache/nutch/crawl/CrawlDbReader.java b/src/java/org/apache/nutch/crawl/CrawlDbReader.java index f31210a..f5c782a 100644 --- a/src/java/org/apache/nutch/crawl/CrawlDbReader.java +++ b/src/java/org/apache/nutch/crawl/CrawlDbReader.java @@ -556,9 +556,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "CrawlDbReader job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("CrawlDbReader", job); LOG.error(message); fileSystem.delete(tmpFolder, true); throw new RuntimeException(message); @@ -847,9 +845,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "CrawlDbReader job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("CrawlDbReader", job); LOG.error(message); throw new RuntimeException(message); } @@ -959,9 +955,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "CrawlDbReader job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("CrawlDbReader", job); LOG.error(message); fs.delete(tempDir, true); throw new RuntimeException(message); @@ -993,9 +987,7 @@ public class CrawlDbReader extends AbstractChecker implements Closeable { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "CrawlDbReader job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("CrawlDbReader", job); LOG.error(message); fs.delete(tempDir, true); throw new RuntimeException(message); diff --git a/src/java/org/apache/nutch/crawl/DeduplicationJob.java b/src/java/org/apache/nutch/crawl/DeduplicationJob.java index 5f1172d..ae5ac37 100644 --- a/src/java/org/apache/nutch/crawl/DeduplicationJob.java +++ b/src/java/org/apache/nutch/crawl/DeduplicationJob.java @@ -331,9 +331,7 @@ public class DeduplicationJob extends NutchTool implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "Crawl job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Crawl", job); LOG.error(message); fs.delete(tempDir, true); throw new RuntimeException(message); @@ -365,9 +363,7 @@ public class DeduplicationJob extends NutchTool implements Tool { try { boolean success = mergeJob.waitForCompletion(true); if (!success) { - String message = "Crawl job did not succeed, job status:" - + mergeJob.getStatus().getState() + ", reason: " - + mergeJob.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Crawl", mergeJob); LOG.error(message); fs.delete(tempDir, true); NutchJob.cleanupAfterFailure(outPath, lock, fs); diff --git a/src/java/org/apache/nutch/crawl/Generator.java b/src/java/org/apache/nutch/crawl/Generator.java index 9fec0ec..7eee5e7 100644 --- a/src/java/org/apache/nutch/crawl/Generator.java +++ b/src/java/org/apache/nutch/crawl/Generator.java @@ -891,9 +891,7 @@ public class Generator extends NutchTool implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "Generator job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Generator", job); LOG.error(message); NutchJob.cleanupAfterFailure(tempDir, lock, fs); throw new RuntimeException(message); @@ -969,9 +967,7 @@ public class Generator extends NutchTool implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "Generator job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Generator", job); LOG.error(message); NutchJob.cleanupAfterFailure(tempDir, lock, fs); NutchJob.cleanupAfterFailure(tempDir2, lock, fs); @@ -1035,9 +1031,7 @@ public class Generator extends NutchTool implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "Generator job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Generator", job); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/crawl/Injector.java b/src/java/org/apache/nutch/crawl/Injector.java index 84dc812..a3512f6 100644 --- a/src/java/org/apache/nutch/crawl/Injector.java +++ b/src/java/org/apache/nutch/crawl/Injector.java @@ -435,9 +435,7 @@ public class Injector extends NutchTool implements Tool { // run the job boolean success = job.waitForCompletion(true); if (!success) { - String message = "Injector job did not succeed, job status: " - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Injector", job); LOG.error(message); NutchJob.cleanupAfterFailure(tempCrawlDb, lock, fs); // throw exception so that calling routine can exit with error diff --git a/src/java/org/apache/nutch/crawl/LinkDb.java b/src/java/org/apache/nutch/crawl/LinkDb.java index e53411f..2b3d2ed 100644 --- a/src/java/org/apache/nutch/crawl/LinkDb.java +++ b/src/java/org/apache/nutch/crawl/LinkDb.java @@ -224,9 +224,7 @@ public class LinkDb extends NutchTool implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "LinkDb job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("LinkDb", job); LOG.error(message); LockUtil.removeLockFile(fs, lock); throw new RuntimeException(message); @@ -248,9 +246,7 @@ public class LinkDb extends NutchTool implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "LinkDb job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("LinkDb", job); LOG.error(message); NutchJob.cleanupAfterFailure(newLinkDb, lock, fs); throw new RuntimeException(message); diff --git a/src/java/org/apache/nutch/crawl/LinkDbMerger.java b/src/java/org/apache/nutch/crawl/LinkDbMerger.java index 6b93329..f696c59 100644 --- a/src/java/org/apache/nutch/crawl/LinkDbMerger.java +++ b/src/java/org/apache/nutch/crawl/LinkDbMerger.java @@ -124,9 +124,7 @@ public class LinkDbMerger extends Configured implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "LinkDbMerge job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("LinkDbMerge", job); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/crawl/LinkDbReader.java b/src/java/org/apache/nutch/crawl/LinkDbReader.java index 4cacd81..2bcceee 100644 --- a/src/java/org/apache/nutch/crawl/LinkDbReader.java +++ b/src/java/org/apache/nutch/crawl/LinkDbReader.java @@ -183,9 +183,7 @@ public class LinkDbReader extends AbstractChecker implements Closeable { try{ boolean success = job.waitForCompletion(true); if (!success) { - String message = "LinkDbRead job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("LinkDbRead", job); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/fetcher/Fetcher.java b/src/java/org/apache/nutch/fetcher/Fetcher.java index 9456c58..7cc87f4 100644 --- a/src/java/org/apache/nutch/fetcher/Fetcher.java +++ b/src/java/org/apache/nutch/fetcher/Fetcher.java @@ -521,9 +521,7 @@ public class Fetcher extends NutchTool implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "Fetcher job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Fetcher", job); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/hostdb/ReadHostDb.java b/src/java/org/apache/nutch/hostdb/ReadHostDb.java index f4bd742..930e62c 100644 --- a/src/java/org/apache/nutch/hostdb/ReadHostDb.java +++ b/src/java/org/apache/nutch/hostdb/ReadHostDb.java @@ -41,6 +41,7 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.nutch.util.NutchConfiguration; +import org.apache.nutch.util.NutchJob; import org.apache.nutch.util.TimingUtil; import org.apache.nutch.util.SegmentReaderUtil; @@ -197,9 +198,7 @@ public class ReadHostDb extends Configured implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "ReadHostDb job did not succeed, job status: " - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("ReadHostDb", job); LOG.error(message); // throw exception so that calling routine can exit with error throw new RuntimeException(message); diff --git a/src/java/org/apache/nutch/hostdb/UpdateHostDb.java b/src/java/org/apache/nutch/hostdb/UpdateHostDb.java index c903822..ffa68d0 100644 --- a/src/java/org/apache/nutch/hostdb/UpdateHostDb.java +++ b/src/java/org/apache/nutch/hostdb/UpdateHostDb.java @@ -132,9 +132,7 @@ public class UpdateHostDb extends Configured implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "UpdateHostDb job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("UpdateHostDb", job); LOG.error(message); NutchJob.cleanupAfterFailure(tempHostDb, lock, fs); throw new RuntimeException(message); diff --git a/src/java/org/apache/nutch/indexer/CleaningJob.java b/src/java/org/apache/nutch/indexer/CleaningJob.java index ca1198e..dc3ed69 100644 --- a/src/java/org/apache/nutch/indexer/CleaningJob.java +++ b/src/java/org/apache/nutch/indexer/CleaningJob.java @@ -164,9 +164,7 @@ public class CleaningJob implements Tool { try{ boolean success = job.waitForCompletion(true); if (!success) { - String message = "CleaningJob did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("CleaningJob", job); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/indexer/IndexingJob.java b/src/java/org/apache/nutch/indexer/IndexingJob.java index 0fe29a7..ff46bc0 100644 --- a/src/java/org/apache/nutch/indexer/IndexingJob.java +++ b/src/java/org/apache/nutch/indexer/IndexingJob.java @@ -145,9 +145,7 @@ public class IndexingJob extends NutchTool implements Tool { try{ boolean success = job.waitForCompletion(true); if (!success) { - String message = "Indexing job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Indexing", job); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/parse/ParseSegment.java b/src/java/org/apache/nutch/parse/ParseSegment.java index f7c5797..7e4707d 100644 --- a/src/java/org/apache/nutch/parse/ParseSegment.java +++ b/src/java/org/apache/nutch/parse/ParseSegment.java @@ -254,9 +254,7 @@ public class ParseSegment extends NutchTool implements Tool { try{ boolean success = job.waitForCompletion(true); if (!success) { - String message = "Parse job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Parse", job); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java b/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java index 733edbc..6e2c629 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java +++ b/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java @@ -355,9 +355,8 @@ public class LinkDumper extends Configured implements Tool { LOG.info("LinkDumper: running inverter"); boolean success = inverter.waitForCompletion(true); if (!success) { - String message = "LinkDumper inverter job did not succeed, job status:" - + inverter.getStatus().getState() + ", reason: " - + inverter.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("LinkDumper inverter", + inverter); LOG.error(message); throw new RuntimeException(message); } @@ -385,9 +384,8 @@ public class LinkDumper extends Configured implements Tool { LOG.info("LinkDumper: running merger"); boolean success = merger.waitForCompletion(true); if (!success) { - String message = "LinkDumper merger job did not succeed, job status:" - + merger.getStatus().getState() + ", reason: " - + merger.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("LinkDumper merger", + merger); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java b/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java index 39a9c63..739fe6c 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java +++ b/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java @@ -117,9 +117,8 @@ public class LinkRank extends Configured implements Tool { try { boolean success = counter.waitForCompletion(true); if (!success) { - String message = "Link counter job did not succeed, job status:" - + counter.getStatus().getState() + ", reason: " - + counter.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Link counter", + counter); LOG.error(message); throw new RuntimeException(message); } @@ -216,9 +215,8 @@ public class LinkRank extends Configured implements Tool { try { boolean success = initializer.waitForCompletion(true); if (!success) { - String message = "Initialization job did not succeed, job status:" - + initializer.getStatus().getState() + ", reason: " - + initializer.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Initialization", + initializer); LOG.error(message); throw new RuntimeException(message); } @@ -270,9 +268,7 @@ public class LinkRank extends Configured implements Tool { try { boolean success = inverter.waitForCompletion(true); if (!success) { - String message = "Inverter job did not succeed, job status:" - + inverter.getStatus().getState() + ", reason: " - + inverter.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Inverter", inverter); LOG.error(message); throw new RuntimeException(message); } @@ -334,9 +330,7 @@ public class LinkRank extends Configured implements Tool { try { boolean success = analyzer.waitForCompletion(true); if (!success) { - String message = "Analysis job did not succeed, job status:" - + analyzer.getStatus().getState() + ", reason: " - + analyzer.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Analysis", analyzer); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java b/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java index fc2875e..ede9fa1 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java +++ b/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java @@ -349,9 +349,7 @@ public class NodeDumper extends Configured implements Tool { LOG.info("NodeDumper: running"); boolean success = dumper.waitForCompletion(true); if (!success) { - String message = "NodeDumper job did not succeed, job status:" - + dumper.getStatus().getState() + ", reason: " - + dumper.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("NodeDumper", dumper); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java b/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java index 6cc604f..130e1b2 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java +++ b/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java @@ -188,9 +188,8 @@ public class ScoreUpdater extends Configured implements Tool{ try { boolean success = updater.waitForCompletion(true); if (!success) { - String message = "Update CrawlDb from WebGraph job did not succeed, job status:" - + updater.getStatus().getState() + ", reason: " - + updater.getStatus().getFailureInfo(); + String message = NutchJob + .getJobFailureLogMessage("Update CrawlDb from WebGraph", updater); LOG.error(message); // remove the temp crawldb on error FileSystem fs = newCrawlDb.getFileSystem(conf); diff --git a/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java b/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java index 5b7a3fd..63d0ead 100644 --- a/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java +++ b/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java @@ -603,9 +603,8 @@ public class WebGraph extends Configured implements Tool { LOG.info("OutlinkDb: running"); boolean success = outlinkJob.waitForCompletion(true); if (!success) { - String message = "OutlinkDb job did not succeed, job status:" - + outlinkJob.getStatus().getState() + ", reason: " - + outlinkJob.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("OutlinkDb", + outlinkJob); LOG.error(message); NutchJob.cleanupAfterFailure(tempOutlinkDb, lock, fs); throw new RuntimeException(message); @@ -651,9 +650,8 @@ public class WebGraph extends Configured implements Tool { LOG.info("InlinkDb: running"); boolean success = inlinkJob.waitForCompletion(true); if (!success) { - String message = "InlinkDb job did not succeed, job status:" - + inlinkJob.getStatus().getState() + ", reason: " - + inlinkJob.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("InlinkDb", + inlinkJob); LOG.error(message); NutchJob.cleanupAfterFailure(tempInlinkDb, lock, fs); throw new RuntimeException(message); @@ -698,9 +696,7 @@ public class WebGraph extends Configured implements Tool { LOG.info("NodeDb: running"); boolean success = nodeJob.waitForCompletion(true); if (!success) { - String message = "NodeDb job did not succeed, job status:" - + nodeJob.getStatus().getState() + ", reason: " - + nodeJob.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("NodeDb", nodeJob); LOG.error(message); // remove lock file and and temporary directory if an error occurs NutchJob.cleanupAfterFailure(tempNodeDb, lock, fs); diff --git a/src/java/org/apache/nutch/segment/SegmentMerger.java b/src/java/org/apache/nutch/segment/SegmentMerger.java index 2270647..056df3c 100644 --- a/src/java/org/apache/nutch/segment/SegmentMerger.java +++ b/src/java/org/apache/nutch/segment/SegmentMerger.java @@ -732,9 +732,7 @@ public class SegmentMerger extends Configured implements Tool{ try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "SegmentMerger job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("SegmentMerger", job); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/segment/SegmentReader.java b/src/java/org/apache/nutch/segment/SegmentReader.java index 2f2fefd..877aeb6 100644 --- a/src/java/org/apache/nutch/segment/SegmentReader.java +++ b/src/java/org/apache/nutch/segment/SegmentReader.java @@ -235,9 +235,7 @@ public class SegmentReader extends Configured implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "SegmentReader job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("SegmentReader", job); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/tools/FreeGenerator.java b/src/java/org/apache/nutch/tools/FreeGenerator.java index 57344bb..039bcca 100644 --- a/src/java/org/apache/nutch/tools/FreeGenerator.java +++ b/src/java/org/apache/nutch/tools/FreeGenerator.java @@ -218,9 +218,7 @@ public class FreeGenerator extends Configured implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "FreeGenerator job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("FreeGenerator", job); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java b/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java index c0ebb2d..d95ba61 100644 --- a/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java +++ b/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java @@ -390,9 +390,8 @@ public class ArcSegmentCreator extends Configured implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "ArcSegmentCreator job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("ArcSegmentCreator", + job); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/tools/warc/WARCExporter.java b/src/java/org/apache/nutch/tools/warc/WARCExporter.java index 6943008..cf000ba 100644 --- a/src/java/org/apache/nutch/tools/warc/WARCExporter.java +++ b/src/java/org/apache/nutch/tools/warc/WARCExporter.java @@ -474,9 +474,7 @@ public class WARCExporter extends Configured implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "WARCExporter job did not succeed, job status:" - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("WARCExporter", job); LOG.error(message); throw new RuntimeException(message); } diff --git a/src/java/org/apache/nutch/util/CrawlCompletionStats.java b/src/java/org/apache/nutch/util/CrawlCompletionStats.java index 621484c..7210ee8 100644 --- a/src/java/org/apache/nutch/util/CrawlCompletionStats.java +++ b/src/java/org/apache/nutch/util/CrawlCompletionStats.java @@ -170,9 +170,7 @@ public class CrawlCompletionStats extends Configured implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = jobName + " job did not succeed, job status: " - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage(jobName, job); LOG.error(message); // throw exception so that calling routine can exit with error throw new RuntimeException(message); diff --git a/src/java/org/apache/nutch/util/NutchJob.java b/src/java/org/apache/nutch/util/NutchJob.java index 3e852eb..478b24f 100644 --- a/src/java/org/apache/nutch/util/NutchJob.java +++ b/src/java/org/apache/nutch/util/NutchJob.java @@ -33,6 +33,8 @@ public class NutchJob extends Job { private static final Logger LOG = LoggerFactory .getLogger(MethodHandles.lookup().lookupClass()); + private static final String JOB_FAILURE_LOG_FORMAT = "%s job did not succeed, job id: %s, job status: %s, reason: %s"; + @SuppressWarnings("deprecation") public NutchJob(Configuration conf, String jobName) throws IOException { super(conf, jobName); @@ -87,4 +89,26 @@ public class NutchJob extends Job { } } + /** + * Method to return job failure log message. To be used across all Jobs + * + * @param name + * Name/Type of the job + * @param job + * Job Object for Job details + * @return job failure log message + * @throws IOException + * Can occur during fetching job status + * @throws InterruptedException + * Can occur during fetching job status + */ + public static String getJobFailureLogMessage(String name, Job job) + throws IOException, InterruptedException { + if (job != null) { + return String.format(JOB_FAILURE_LOG_FORMAT, name, job.getJobID(), + job.getStatus().getState(), job.getStatus().getFailureInfo()); + } + return ""; + } + } diff --git a/src/java/org/apache/nutch/util/ProtocolStatusStatistics.java b/src/java/org/apache/nutch/util/ProtocolStatusStatistics.java index 213c1c2..2499da0 100644 --- a/src/java/org/apache/nutch/util/ProtocolStatusStatistics.java +++ b/src/java/org/apache/nutch/util/ProtocolStatusStatistics.java @@ -120,9 +120,7 @@ public class ProtocolStatusStatistics extends Configured implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = jobName + " job did not succeed, job status: " - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage(jobName, job); LOG.error(message); // throw exception so that calling routine can exit with error throw new RuntimeException(message); diff --git a/src/java/org/apache/nutch/util/SitemapProcessor.java b/src/java/org/apache/nutch/util/SitemapProcessor.java index 1a1955e..98f7df8 100644 --- a/src/java/org/apache/nutch/util/SitemapProcessor.java +++ b/src/java/org/apache/nutch/util/SitemapProcessor.java @@ -411,9 +411,8 @@ public class SitemapProcessor extends Configured implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "SitemapProcessor_" + crawldb.toString() - + " job did not succeed, job status: " + job.getStatus().getState() - + ", reason: " + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("SitemapProcessor", + job); LOG.error(message); NutchJob.cleanupAfterFailure(tempCrawlDb, lock, fs); // throw exception so that calling routine can exit with error diff --git a/src/java/org/apache/nutch/util/domain/DomainStatistics.java b/src/java/org/apache/nutch/util/domain/DomainStatistics.java index 0d789ed..638b6c9 100644 --- a/src/java/org/apache/nutch/util/domain/DomainStatistics.java +++ b/src/java/org/apache/nutch/util/domain/DomainStatistics.java @@ -38,6 +38,7 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.util.NutchConfiguration; +import org.apache.nutch.util.NutchJob; import org.apache.nutch.util.TimingUtil; import org.apache.nutch.util.URLUtil; import org.slf4j.Logger; @@ -140,9 +141,7 @@ public class DomainStatistics extends Configured implements Tool { try { boolean success = job.waitForCompletion(true); if (!success) { - String message = "Injector job did not succeed, job status: " - + job.getStatus().getState() + ", reason: " - + job.getStatus().getFailureInfo(); + String message = NutchJob.getJobFailureLogMessage("Injector", job); LOG.error(message); // throw exception so that calling routine can exit with error throw new RuntimeException(message);