Author: lewismc
Date: Mon Apr 8 00:33:23 2013
New Revision: 1465521
URL: http://svn.apache.org/r1465521
Log:
NUTCH-1551 Improve WebTableReader field order and display batchId
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1465521&r1=1465520&r2=1465521&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Mon Apr 8 00:33:23 2013
@@ -2,6 +2,8 @@ Nutch Change Log
Release 2.2 - Current Development
+* NUTCH-1551 Improve WebTableReader field order and display batchId (lewismc)
+
* NUTCH-1552 possibility of a NPE in index-more plugin (kaveh minooie via
lewismc)
* NUTCH-1547 BasicIndexingFilter - Problem to index full title (Feng)
Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java?rev=1465521&r1=1465520&r2=1465521&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java
(original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/WebTableReader.java Mon
Apr 8 00:33:23 2013
@@ -341,24 +341,34 @@ public class WebTableReader extends Nutc
sb.append("baseUrl:\t" + page.getBaseUrl()).append("\n");
sb.append("status:\t").append(page.getStatus()).append(" (").append(
CrawlStatus.getName((byte) page.getStatus())).append(")\n");
- sb.append("fetchInterval:\t" + page.getFetchInterval()).append("\n");
sb.append("fetchTime:\t" + page.getFetchTime()).append("\n");
sb.append("prevFetchTime:\t" + page.getPrevFetchTime()).append("\n");
- sb.append("retries:\t" + page.getRetriesSinceFetch()).append("\n");
+ sb.append("fetchInterval:\t" + page.getFetchInterval()).append("\n");
+ sb.append("retriesSinceFetch:\t" +
page.getRetriesSinceFetch()).append("\n");
sb.append("modifiedTime:\t" + page.getModifiedTime()).append("\n");
+ sb.append("prevModifiedTime:\t" + page.getPrevModifiedTime()).append("\n");
sb.append("protocolStatus:\t" +
ProtocolStatusUtils.toString(page.getProtocolStatus())).append("\n");
- sb.append("parseStatus:\t" +
- ParseStatusUtils.toString(page.getParseStatus())).append("\n");
- sb.append("title:\t" + page.getTitle()).append("\n");
- sb.append("score:\t" + page.getScore()).append("\n");
+ ByteBuffer prevSig = page.getPrevSignature();
+ if (prevSig != null) {
+ sb.append("prevSignature:\t" +
StringUtil.toHexString(prevSig.array())).append("\n");
+ }
ByteBuffer sig = page.getSignature();
if (sig != null) {
sb.append("signature:\t" +
StringUtil.toHexString(sig.array())).append("\n");
}
+ sb.append("parseStatus:\t" +
+ ParseStatusUtils.toString(page.getParseStatus())).append("\n");
+ sb.append("title:\t" + page.getTitle()).append("\n");
+ sb.append("score:\t" + page.getScore()).append("\n");
+
Map<Utf8, Utf8> markers = page.getMarkers();
sb.append("markers:\t" + markers).append("\n");
-
+ sb.append("reprUrl:\t" + page.getReprUrl()).append("\n");
+ Utf8 batchId = page.getBatchId();
+ if (batchId != null) {
+ sb.append("batchId:\t" + batchId.toString()).append("\n");
+ }
Map<Utf8, ByteBuffer> metadata = page.getMetadata();
if (metadata != null) {
Iterator<Entry<Utf8, ByteBuffer>> iterator = metadata.entrySet()