Author: lewismc
Date: Tue Jun 12 00:12:41 2012
New Revision: 1349076
URL: http://svn.apache.org/viewvc?rev=1349076&view=rev
Log:
commit to address NUTCH-1364 and update to CHANGES.txt
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1349076&r1=1349075&r2=1349076&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jun 12 00:12:41 2012
@@ -2,6 +2,8 @@ Nutch Change Log
(trunk) Current Development:
+* NUTCH-1364 Add a counter in Generator for malformed urls (lewismc)
+
* NUTCH-1360 Suport the storing of IP address connected to when web crawling
(lewismc)
* NUTCH-1262 Map `duplicating` content-types to a single type (markus)
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=1349076&r1=1349075&r2=1349076&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Tue Jun 12
00:12:41 2012
@@ -23,7 +23,7 @@ import java.util.List;
import java.util.Map.Entry;
import java.io.IOException;
-// Commons Logging imports
+// Logging imports
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=1349076&r1=1349075&r2=1349076&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Tue Jun 12
00:12:41 2012
@@ -22,7 +22,7 @@ import java.net.*;
import java.util.*;
import java.text.*;
-// Commons Logging imports
+// rLogging imports
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -267,6 +267,7 @@ public class Generator extends Configure
} catch (Exception e) {
LOG.warn("Malformed URL: '" + urlString + "', skipping ("
+ StringUtils.stringifyException(e) + ")");
+ reporter.getCounter("Generator", "MALFORMED_URL").increment(1);
continue;
}