Author: lewismc
Date: Tue Jun 12 00:12:41 2012
New Revision: 1349076

URL: http://svn.apache.org/viewvc?rev=1349076&view=rev
Log:
commit to address NUTCH-1364 and update to CHANGES.txt

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
    nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1349076&r1=1349075&r2=1349076&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jun 12 00:12:41 2012
@@ -2,6 +2,8 @@ Nutch Change Log
 
 (trunk) Current Development:
 
+* NUTCH-1364 Add a counter in Generator for malformed urls (lewismc)
+
 * NUTCH-1360 Suport the storing of IP address connected to when web crawling 
(lewismc)
 
 * NUTCH-1262 Map `duplicating` content-types to a single type (markus)

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=1349076&r1=1349075&r2=1349076&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Tue Jun 12 
00:12:41 2012
@@ -23,7 +23,7 @@ import java.util.List;
 import java.util.Map.Entry;
 import java.io.IOException;
 
-// Commons Logging imports
+// Logging imports
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=1349076&r1=1349075&r2=1349076&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Tue Jun 12 
00:12:41 2012
@@ -22,7 +22,7 @@ import java.net.*;
 import java.util.*;
 import java.text.*;
 
-// Commons Logging imports
+// rLogging imports
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -267,6 +267,7 @@ public class Generator extends Configure
         } catch (Exception e) {
           LOG.warn("Malformed URL: '" + urlString + "', skipping ("
               + StringUtils.stringifyException(e) + ")");
+          reporter.getCounter("Generator", "MALFORMED_URL").increment(1);
           continue;
         }
 


Reply via email to