Author: markus
Date: Fri Jul  5 08:52:51 2013
New Revision: 1499948

URL: http://svn.apache.org/r1499948
Log:
NUTCH-1520 SegmentMerger looses records

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1499948&r1=1499947&r2=1499948&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Jul  5 08:52:51 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Development Trunk
 
+* NUTCH-1520 SegmentMerger looses records (markus)
+
 * NUTCH-1602 improve the readability of metadata in readdb dump normal (lufeng)
 
 * NUTCH-1596 HeadingsParseFilter not thread safe (snagel via markus)

Modified: nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?rev=1499948&r1=1499947&r2=1499948&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Fri Jul  5 
08:52:51 2013
@@ -412,10 +412,14 @@ public class SegmentMerger extends Confi
             lastF = val;
             lastFname = sp.segmentName;
           } else {
-            // take newer
-            if (lastFname.compareTo(sp.segmentName) < 0) {
-              lastF = val;
-              lastFname = sp.segmentName;
+            // only consider fetch status
+            // https://issues.apache.org/jira/browse/NUTCH-1520
+            if (CrawlDatum.hasFetchStatus(val)) {
+              // take newer
+              if (lastFname.compareTo(sp.segmentName) < 0) {
+                lastF = val;
+                lastFname = sp.segmentName;
+              }
             }
           }
         } else if (sp.partName.equals(CrawlDatum.PARSE_DIR_NAME)) {
@@ -480,7 +484,7 @@ public class SegmentMerger extends Confi
                                                   linked.isEmpty() ? null : 
linked.lastEntry().getValue())){
       return;
     }
-       
+
     curCount++;
     String sliceName = null;
     MetaWrapper wrapper = new MetaWrapper();


Reply via email to