Author: lewismc
Date: Mon Apr  8 00:33:23 2013
New Revision: 1465521

URL: http://svn.apache.org/r1465521
Log:
NUTCH-1551 Improve WebTableReader field order and display batchId

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1465521&r1=1465520&r2=1465521&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Apr  8 00:33:23 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 2.2 - Current Development
 
+* NUTCH-1551 Improve WebTableReader field order and display batchId (lewismc)
+
 * NUTCH-1552 possibility of a NPE in index-more plugin (kaveh minooie via 
lewismc)
 
 * NUTCH-1547 BasicIndexingFilter - Problem to index full title (Feng)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java?rev=1465521&r1=1465520&r2=1465521&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java 
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java Mon 
Apr  8 00:33:23 2013
@@ -341,24 +341,34 @@ public class WebTableReader extends Nutc
     sb.append("baseUrl:\t" + page.getBaseUrl()).append("\n");
     sb.append("status:\t").append(page.getStatus()).append(" (").append(
         CrawlStatus.getName((byte) page.getStatus())).append(")\n");
-    sb.append("fetchInterval:\t" + page.getFetchInterval()).append("\n");
     sb.append("fetchTime:\t" + page.getFetchTime()).append("\n");
     sb.append("prevFetchTime:\t" + page.getPrevFetchTime()).append("\n");
-    sb.append("retries:\t" + page.getRetriesSinceFetch()).append("\n");
+    sb.append("fetchInterval:\t" + page.getFetchInterval()).append("\n"); 
+    sb.append("retriesSinceFetch:\t" + 
page.getRetriesSinceFetch()).append("\n");
     sb.append("modifiedTime:\t" + page.getModifiedTime()).append("\n");
+    sb.append("prevModifiedTime:\t" + page.getPrevModifiedTime()).append("\n");
     sb.append("protocolStatus:\t" +
         ProtocolStatusUtils.toString(page.getProtocolStatus())).append("\n");
-    sb.append("parseStatus:\t" +
-        ParseStatusUtils.toString(page.getParseStatus())).append("\n");
-    sb.append("title:\t" + page.getTitle()).append("\n");
-    sb.append("score:\t" + page.getScore()).append("\n");
+    ByteBuffer prevSig = page.getPrevSignature();
+        if (prevSig != null) {
+      sb.append("prevSignature:\t" + 
StringUtil.toHexString(prevSig.array())).append("\n");
+    }
     ByteBuffer sig = page.getSignature();
     if (sig != null) {
       sb.append("signature:\t" + 
StringUtil.toHexString(sig.array())).append("\n");
     }
+    sb.append("parseStatus:\t" +
+        ParseStatusUtils.toString(page.getParseStatus())).append("\n");
+    sb.append("title:\t" + page.getTitle()).append("\n");
+    sb.append("score:\t" + page.getScore()).append("\n");
+
     Map<Utf8, Utf8> markers = page.getMarkers();
     sb.append("markers:\t" + markers).append("\n");
-
+    sb.append("reprUrl:\t" + page.getReprUrl()).append("\n");
+    Utf8 batchId = page.getBatchId();
+    if (batchId != null) {
+      sb.append("batchId:\t" + batchId.toString()).append("\n");
+    }
     Map<Utf8, ByteBuffer> metadata = page.getMetadata();
     if (metadata != null) {
       Iterator<Entry<Utf8, ByteBuffer>> iterator = metadata.entrySet()


Reply via email to