Author: ab Date: Mon Sep 25 11:14:31 2006 New Revision: 449765 URL: http://svn.apache.org/viewvc?view=rev&rev=449765 Log: Catch exception on invalid urls, and continue collecting valid ones.
Modified: lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java Modified: lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java?view=diff&rev=449765&r1=449764&r2=449765 ============================================================================== --- lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java (original) +++ lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java Mon Sep 25 11:14:31 2006 @@ -16,6 +16,7 @@ package org.apache.nutch.parse; +import java.net.MalformedURLException; import java.util.ArrayList; import java.util.List; @@ -108,7 +109,13 @@ } result = matcher.getMatch(); url = result.group(0); - outlinks.add(new Outlink(url, anchor, conf)); + url = result.group(0); + try { + Outlink outlink = new Outlink(url, anchor, conf); + outlinks.add(new Outlink(url, anchor, conf)); + } catch (MalformedURLException mue) { + LOG.warn("Invalid url: '" + url + "', skipping."); + } } } catch (Exception ex) { // if the matcher fails (perhaps a malformed URL) we just log it and move on ------------------------------------------------------------------------- Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys -- and earn cash http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs