Author: joyce
Date: Wed Nov 11 16:54:15 2015
New Revision: 1713890

URL: http://svn.apache.org/viewvc?rev=1713890&view=rev
Log:
NUTCH-1911 - Recommit help fixes and remove 'current' folder requirement

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/util/domain/DomainStatistics.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1713890&r1=1713889&r2=1713890&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Nov 11 16:54:15 2015
@@ -3,6 +3,8 @@ Nutch Change Log
 Nutch 1.11 Release 25/10/2015 (dd/mm/yyyy)
 Release Report: http://s.apache.org/nutch11
 
+* NUTCH-1911 Improve DomainStatistics tool command line parsing (joyce)
+
 * NUTCH-2064 URLNormalizer basic to encode reserved chars and decode 
non-reserved chars (markus, snagel)
 
 * NUTCH-2159 Ensure that all WebApp files are copied into generated artifacts 
for 1.X Webapp (lewismc)

Modified: 
nutch/trunk/src/java/org/apache/nutch/util/domain/DomainStatistics.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/domain/DomainStatistics.java?rev=1713890&r1=1713889&r2=1713890&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/domain/DomainStatistics.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/util/domain/DomainStatistics.java Wed 
Nov 11 16:54:15 2015
@@ -17,6 +17,7 @@
 
 package org.apache.nutch.util.domain;
 
+import java.io.File;
 import java.io.IOException;
 import java.net.URL;
 import java.text.SimpleDateFormat;
@@ -65,8 +66,21 @@ public class DomainStatistics extends Co
 
   public int run(String[] args) throws Exception {
     if (args.length < 3) {
-      System.out
-          .println("usage: DomainStatistics inputDirs outDir 
host|domain|suffix|tld [numOfReducer]");
+      System.err.println("Usage: DomainStatistics inputDirs outDir mode 
[numOfReducer]");
+
+      System.err.println("\tinputDirs\tComma separated list of crawldb input 
directories");
+      System.err.println("\t\t\tE.g.: crawl/crawldb/");
+
+      System.err.println("\toutDir\t\tOutput directory where results should be 
dumped");
+
+      System.err.println("\tmode\t\tSet statistics gathering mode");
+      System.err.println("\t\t\t\thost\tGather statistics by host");
+      System.err.println("\t\t\t\tdomain\tGather statistics by domain");
+      System.err.println("\t\t\t\tsuffix\tGather statistics by suffix");
+      System.err.println("\t\t\t\ttld\tGather statistics by top level 
directory");
+
+      System.err.println("\t[numOfReducers]\tOptional number of reduce jobs to 
use. Defaults to 1.");
+      
       return 1;
     }
     String inputDir = args[0];
@@ -106,7 +120,8 @@ public class DomainStatistics extends Co
 
     String[] inputDirsSpecs = inputDir.split(",");
     for (int i = 0; i < inputDirsSpecs.length; i++) {
-      FileInputFormat.addInputPath(job, new Path(inputDirsSpecs[i]));
+      File completeInputPath = new File(new File(inputDirsSpecs[i]), 
"current");
+      FileInputFormat.addInputPath(job, new 
Path(completeInputPath.toString()));
     }
 
     job.setInputFormatClass(SequenceFileInputFormat.class);


Reply via email to