Author: lewismc
Date: Wed Sep 23 19:58:57 2015
New Revision: 1704928
URL: http://svn.apache.org/viewvc?rev=1704928&view=rev
Log:
NUTCH-2115 - Add total counts to mimetype stats this closes #65
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1704928&r1=1704927&r2=1704928&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Sep 23 19:58:57 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.11-SNAPSHOT
+* NUTCH-2115 - Add total counts to mimetype stats (Jimmy Joyce via lewismc)
+
* NUTCH-2111 Delete temporary files location for selenium tmp files after
driver quits (Kim Whitehall via lewismc)
* NUTCH-2095 WARC exporter for the CommonCrawlDataDumper (jorgelbg)
Modified: nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java?rev=1704928&r1=1704927&r2=1704928&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java Wed Sep 23
19:58:57 2015
@@ -111,15 +111,19 @@ public class DumpFileUtil {
// print total stats
builder.append("\nTOTAL Stats:\n");
builder.append("[\n");
+ int mimetypeCount = 0;
for (String mimeType : typeCounts.keySet()) {
builder.append(" {\"mimeType\":\"");
builder.append(mimeType);
builder.append("\",\"count\":\"");
builder.append(typeCounts.get(mimeType));
builder.append("\"}\n");
+ mimetypeCount += typeCounts.get(mimeType);
}
builder.append("]\n");
+ builder.append("Total count: " + mimetypeCount + "\n");
// filtered types stats
+ mimetypeCount = 0;
if (!filteredCounts.isEmpty()) {
builder.append("\nFILTERED Stats:\n");
builder.append("[\n");
@@ -129,8 +133,10 @@ public class DumpFileUtil {
builder.append("\",\"count\":\"");
builder.append(filteredCounts.get(mimeType));
builder.append("\"}\n");
+ mimetypeCount += filteredCounts.get(mimeType);
}
builder.append("]\n");
+ builder.append("Total filtered count: " + mimetypeCount
+ "\n");
}
return builder.toString();
}