Author: markus
Date: Wed Oct 16 14:56:37 2013
New Revision: 1532785
URL: http://svn.apache.org/r1532785
Log:
NUTCH-1656 ParseMeta not passed to CrawlDatum for not_modified
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1532785&r1=1532784&r2=1532785&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Oct 16 14:56:37 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Development Trunk
+* NUTCH-1656 ParseMeta not passed to CrawlDatum for not_modified (markus)
+
* NUTCH-1606 Check that Factory classes use the cache in a thread safe way
(jnioche)
* NUTCH-1653 AbstractScoringFilter (jnioche)
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=1532785&r1=1532784&r2=1532785&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Wed Oct 16
14:56:37 2013
@@ -243,12 +243,15 @@ public class CrawlDbReducer implements R
else result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
}
result.setSignature(signature);
- if (metaFromParse != null) {
- for (Entry<Writable, Writable> e : metaFromParse.entrySet()) {
- result.getMetaData().put(e.getKey(), e.getValue());
- }
- }
}
+
+ // https://issues.apache.org/jira/browse/NUTCH-1656
+ if (metaFromParse != null) {
+ for (Entry<Writable, Writable> e : metaFromParse.entrySet()) {
+ result.getMetaData().put(e.getKey(), e.getValue());
+ }
+ }
+
// if fetchInterval is larger than the system-wide maximum, trigger
// an unconditional recrawl. This prevents the page to be stuck at
// NOTMODIFIED state, when the old fetched copy was already removed with