Author: lewismc
Date: Tue Jun 18 18:21:51 2013
New Revision: 1494234
URL: http://svn.apache.org/r1494234
Log:
NUTCH-1475 Index-More Plugin -- A better fall back value for date field
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1494234&r1=1494233&r2=1494234&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jun 18 18:21:51 2013
@@ -2,6 +2,8 @@ Nutch Change Log
(trunk): Current Development
+* NUTCH-1475 Index-More Plugin -- A better fall back value for date field
(James Sullivan, snagel via lewismc)
+
* NUTCH-1560 index-metadata to add all values of multivalued metadata (snagel)
* NUTCH-1467 Not able to parse mutliValued metatags (kiran via snagel)
Modified:
nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=1494234&r1=1494233&r2=1494234&view=diff
==============================================================================
---
nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
(original)
+++
nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
Tue Jun 18 18:21:51 2013
@@ -102,8 +102,11 @@ public class MoreIndexingFilter implemen
doc.add("lastModified", new Date(time));
}
- if (time == -1) { // if no last-modified
- time = datum.getFetchTime(); // use fetch time
+ if (time == -1) { // if no last-modified
specified in HTTP header
+ time = datum.getModifiedTime(); // use value in CrawlDatum
+ if (time <= 0) { // if also unset
+ time = new Date().getTime(); // use current time
+ }
}
// un-stored, indexed and un-tokenized