Author: mattmann
Date: Fri Apr 10 23:30:09 2015
New Revision: 1672796

URL: http://svn.apache.org/r1672796
Log:
- fix for NUTCH-1983 CommonCrawlDumper and FileDumper don't dump correct JSON 

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
    nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java
    nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1672796&r1=1672795&r2=1672796&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Apr 10 23:30:09 2015
@@ -2,6 +2,8 @@ Nutch Change Log
  
 Nutch Current Development 1.10-SNAPSHOT
 
+* NUTCH-1983 CommonCrawlDumper and FileDumper don't dump correct JSON 
(mattmann)
+
 * NUTCH-1972 Dockerfile for Nutch 1.x (Michael Joyce via mattmann)
 
 * NUTCH-1771 Indexer fails if a segment is corrupted or incomplete (Diaa, 
Chong Li via snagel) 

Modified: nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java?rev=1672796&r1=1672795&r2=1672796&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java Fri 
Apr 10 23:30:09 2015
@@ -488,7 +488,7 @@ public class CommonCrawlDataDumper {
                }
                
                if (!typeCounts.isEmpty()) {
-                       LOG.info("CommonsCrawlDataDumper File Stats: " + 
displayFileTypes(typeCounts, filteredCounts));
+                       LOG.info("CommonsCrawlDataDumper File Stats: " + 
DumpFileUtil.displayFileTypes(typeCounts, filteredCounts));
                }
        }
        
@@ -547,35 +547,6 @@ public class CommonCrawlDataDumper {
        private void collectStats(Map<String, Integer> typeCounts, String 
mimeType) {
                typeCounts.put(mimeType, typeCounts.containsKey(mimeType) ? 
typeCounts.get(mimeType) + 1 : 1);
        }
-
-       private String displayFileTypes(Map<String, Integer> typeCounts, 
Map<String, Integer> filteredCounts) {
-               StringBuilder builder = new StringBuilder();
-               // print total stats
-               builder.append("\nTOTAL Stats:\n");
-               builder.append("{\n");
-               for (String mimeType : typeCounts.keySet()) {
-                       builder.append("    {\"mimeType\":\"");
-                       builder.append(mimeType);
-                       builder.append("\",\"count\":");
-                       builder.append(typeCounts.get(mimeType));
-                       builder.append("\"}\n");
-               }
-               builder.append("}\n");
-               // filtered types stats
-               if (!filteredCounts.isEmpty()) {
-                       builder.append("\nFILTERED Stats:\n");
-                       builder.append("{\n");
-                       for (String mimeType : filteredCounts.keySet()) {
-                               builder.append("    {\"mimeType\":\"");
-                               builder.append(mimeType);
-                               builder.append("\",\"count\":");
-                               builder.append(filteredCounts.get(mimeType));
-                               builder.append("\"}\n");
-                       }
-                       builder.append("}\n");
-               }
-               return builder.toString();
-       }
        
        /**
         * Gets the current date if the given timestamp is empty or null.

Modified: nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java?rev=1672796&r1=1672795&r2=1672796&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java Fri Apr 10 
23:30:09 2015
@@ -80,26 +80,29 @@ import org.slf4j.LoggerFactory;
  * <pre>
  * {@code
  * INFO: File Types: 
- *   TOTAL Stats:    {
- *     {"mimeType":"application/xml","count":19"}
- *     {"mimeType":"image/png","count":47"}
- *     {"mimeType":"image/jpeg","count":141"}
- *     {"mimeType":"image/vnd.microsoft.icon","count":4"}
- *     {"mimeType":"text/plain","count":89"}
- *     {"mimeType":"video/quicktime","count":2"}
- *     {"mimeType":"image/gif","count":63"}
- *     {"mimeType":"application/xhtml+xml","count":1670"}
- *     {"mimeType":"application/octet-stream","count":40"}
- *     {"mimeType":"text/html","count":1863"}
- *   }
- *   FILTER Stats:    {
- *     {"mimeType":"image/png","count":47"}
- *     {"mimeType":"image/jpeg","count":141"}
- *     {"mimeType":"image/vnd.microsoft.icon","count":4"}
- *     {"mimeType":"video/quicktime","count":2"}
- *     {"mimeType":"image/gif","count":63"}
- *   }
- * }
+ *   TOTAL Stats:    
+ *    [
+ *     {"mimeType":"application/xml","count":"19"}
+ *     {"mimeType":"image/png","count":"47"}
+ *     {"mimeType":"image/jpeg","count":"141"}
+ *     {"mimeType":"image/vnd.microsoft.icon","count":"4"}
+ *     {"mimeType":"text/plain","count":"89"}
+ *     {"mimeType":"video/quicktime","count":"2"}
+ *     {"mimeType":"image/gif","count":"63"}
+ *     {"mimeType":"application/xhtml+xml","count":"1670"}
+ *     {"mimeType":"application/octet-stream","count":"40"}
+ *     {"mimeType":"text/html","count":"1863"}
+ *   ]
+ *   
+ *   FILTER Stats: 
+ *   [
+ *     {"mimeType":"image/png","count":"47"}
+ *     {"mimeType":"image/jpeg","count":"141"}
+ *     {"mimeType":"image/vnd.microsoft.icon","count":"4"}
+ *     {"mimeType":"video/quicktime","count":"2"}
+ *     {"mimeType":"image/gif","count":"63"}
+ *   ]
+ *  
  * </pre>
  * <p>
  * In the case above, the tool would have been run with the <b>-mimeType
@@ -237,7 +240,7 @@ public class FileDumper {
       }
     }
     LOG.info("Dumper File Stats: "
-        + displayFileTypes(typeCounts, filteredCounts));
+        + DumpFileUtil.displayFileTypes(typeCounts, filteredCounts));
 
   }
 
@@ -314,34 +317,4 @@ public class FileDumper {
         typeCounts.containsKey(mimeType) ? typeCounts.get(mimeType) + 1 : 1);
   }
 
-  private String displayFileTypes(Map<String, Integer> typeCounts,
-      Map<String, Integer> filteredCounts) {
-    StringBuilder builder = new StringBuilder();
-    // print total stats
-    builder.append("\n  TOTAL Stats:\n");
-    builder.append("                {\n");
-    for (String mimeType : typeCounts.keySet()) {
-      builder.append("    {\"mimeType\":\"");
-      builder.append(mimeType);
-      builder.append("\",\"count\":");
-      builder.append(typeCounts.get(mimeType));
-      builder.append("\"}\n");
-    }
-    builder.append("}\n");
-    if (!filteredCounts.isEmpty()) {
-      // print dumper stats
-      builder.append("\n  FILTERED Stats:\n");
-      builder.append("                {\n");
-      for (String mimeType : filteredCounts.keySet()) {
-        builder.append("    {\"mimeType\":\"");
-        builder.append(mimeType);
-        builder.append("\",\"count\":");
-        builder.append(filteredCounts.get(mimeType));
-        builder.append("\"}\n");
-      }
-      builder.append("}\n");
-    }
-    return builder.toString();
-  }
-
 }

Modified: nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java?rev=1672796&r1=1672795&r2=1672796&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/DumpFileUtil.java Fri Apr 10 
23:30:09 2015
@@ -26,6 +26,7 @@ import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.Map;
 
 public class DumpFileUtil {
     private static final Logger LOG = 
LoggerFactory.getLogger(DumpFileUtil.class
@@ -104,4 +105,33 @@ public class DumpFileUtil {
                
                return outputFullPath;
     }
+    
+       public static String displayFileTypes(Map<String, Integer> typeCounts, 
Map<String, Integer> filteredCounts) {
+               StringBuilder builder = new StringBuilder();
+               // print total stats
+               builder.append("\nTOTAL Stats:\n");
+               builder.append("[\n");
+               for (String mimeType : typeCounts.keySet()) {
+                       builder.append("    {\"mimeType\":\"");
+                       builder.append(mimeType);
+                       builder.append("\",\"count\":\"");
+                       builder.append(typeCounts.get(mimeType));
+                       builder.append("\"}\n");
+               }
+               builder.append("]\n");
+               // filtered types stats
+               if (!filteredCounts.isEmpty()) {
+                       builder.append("\nFILTERED Stats:\n");
+                       builder.append("[\n");
+                       for (String mimeType : filteredCounts.keySet()) {
+                               builder.append("    {\"mimeType\":\"");
+                               builder.append(mimeType);
+                               builder.append("\",\"count\":\"");
+                               builder.append(filteredCounts.get(mimeType));
+                               builder.append("\"}\n");
+                       }
+                       builder.append("]\n");
+               }
+               return builder.toString();
+       }  
 }


Reply via email to