Author: dogacan Date: Mon Sep 10 12:40:20 2007 New Revision: 574344 URL: http://svn.apache.org/viewvc?rev=574344&view=rev Log: NUTCH-550 - Parse fails if db.max.outlinks.per.page is -1.
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=574344&r1=574343&r2=574344&view=diff ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Mon Sep 10 12:40:20 2007 @@ -129,6 +129,8 @@ 43. NUTCH-532 - CrawlDbMerger: wrong computation of last fetch time. (Emmanuel Joke via dogacan) +44. NUTCH-550 - Parse fails if db.max.outlinks.per.page is -1. (dogacan) + Release 0.9 - 2007-04-02 1. Changed log4j confiquration to log to stdout on commandline Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java?rev=574344&r1=574343&r2=574344&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java Mon Sep 10 12:40:20 2007 @@ -84,7 +84,9 @@ final UrlValidator validator = UrlValidator.get(); final int interval = job.getInt("db.fetch.interval.default", 2592000); final boolean ignoreExternalLinks = job.getBoolean("db.ignore.external.links", false); - final int maxOutlinks = job.getInt("db.max.outlinks.per.page", 100); + int maxOutlinksPerPage = job.getInt("db.max.outlinks.per.page", 100); + final int maxOutlinks = (maxOutlinksPerPage < 0) ? Integer.MAX_VALUE + : maxOutlinksPerPage; final CompressionType compType = SequenceFile.getCompressionType(job); Path text =