Author: ab Date: Sat Nov 28 22:41:06 2009 New Revision: 885159 URL: http://svn.apache.org/viewvc?rev=885159&view=rev Log: NUTCH-712 ParseOutputFormat should catch java.net.MalformedURLException coming from normalizers.
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=885159&r1=885158&r2=885159&view=diff ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Sat Nov 28 22:41:06 2009 @@ -2,6 +2,9 @@ Unreleased Changes +* NUTCH-712 ParseOutputFormat should catch java.net.MalformedURLException + coming from normalizers (Julien Nioche via ab) + * NUTCH-741 Job file includes multiple copies of nutch config files (Kirby Bohling via ab) Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java?rev=885159&r1=885158&r2=885159&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java Sat Nov 28 22:41:06 2009 @@ -140,9 +140,13 @@ pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) { String newUrl = pstatus.getMessage(); int refreshTime = Integer.valueOf(pstatus.getArgs()[1]); - newUrl = normalizers.normalize(newUrl, - URLNormalizers.SCOPE_FETCHER); - newUrl = filters.filter(newUrl); + try { + newUrl = normalizers.normalize(newUrl, + URLNormalizers.SCOPE_FETCHER); + } catch (MalformedURLException mfue) { + newUrl = null; + } + if (newUrl != null) newUrl = filters.filter(newUrl); String url = key.toString(); if (newUrl != null && !newUrl.equals(url)) { String reprUrl =