Author: snagel Date: Mon Mar 17 21:56:32 2014 New Revision: 1578620 URL: http://svn.apache.org/r1578620 Log: NUTCH-1671 indexchecker to add digest field
Modified: nutch/branches/2.x/CHANGES.txt nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java Modified: nutch/branches/2.x/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1578620&r1=1578619&r2=1578620&view=diff ============================================================================== --- nutch/branches/2.x/CHANGES.txt (original) +++ nutch/branches/2.x/CHANGES.txt Mon Mar 17 21:56:32 2014 @@ -2,6 +2,8 @@ Nutch Change Log Current Development +* NUTCH-1671 indexchecker to add digest field (snagel, lufeng) + * NUTCH-1645 Junit Test Case for Adaptive Fetch Schedule class (Yasin Kılınç, lufeng, Sertac TURKEL via snagel) * NUTCH-1478 Parse-metatags and index-metadata plugin for Nutch 2.x series (kiran, Nguyen Manh Tien, Talat UYARER, Vangelis Karvounis via lewismc) Modified: nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java?rev=1578620&r1=1578619&r2=1578620&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java Mon Mar 17 21:56:32 2014 @@ -37,6 +37,7 @@ import org.apache.nutch.protocol.Protoco import org.apache.nutch.protocol.ProtocolStatusUtils; import org.apache.nutch.storage.WebPage; import org.apache.nutch.util.NutchConfiguration; +import org.apache.nutch.util.StringUtil; import org.apache.nutch.util.URLUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -122,6 +123,7 @@ public class IndexingFiltersChecker exte } NutchDocument doc = new NutchDocument(); + doc.add("digest", StringUtil.toHexString(page.getSignature())); try { doc = indexers.filter(doc, url, page);