Author: mattmann
Date: Fri Apr 17 20:58:08 2015
New Revision: 1674401

URL: http://svn.apache.org/r1674401
Log:
Fix for NUTCH-1988 Make nested output directory dump optional contributed by 
Michael Joyce <[email protected]> this closes #19.

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1674401&r1=1674400&r2=1674401&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Apr 17 20:58:08 2015
@@ -2,6 +2,8 @@ Nutch Change Log
  
 Nutch Current Development 1.10-SNAPSHOT
 
+* NUTCH-1988 Make nested output directory dump optional (Michael Joyce via 
mattmann)
+
 * NUTCH-1927 Create a whitelist of IPs/hostnames to allow skipping of 
RobotRules parsing (mattmann, snagel)
 
 * NUTCH-1986 Clarify Elastic Search Indexer Plugin Settings (Michael Joyce via 
mattmann)

Modified: nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java?rev=1674401&r1=1674400&r2=1674401&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java Fri Apr 17 
20:58:08 2015
@@ -128,9 +128,13 @@ public class FileDumper {
    * @param mimeTypes
    *          an array of mime types we have to dump, all others will be
    *          filtered out.
+   * @param flatDir
+   *          a boolean flag specifying whether the output directory should 
contain
+   *          only files instead of using nested directories to prevent naming
+   *          conflicts.
    * @throws Exception
    */
-  public void dump(File outputDir, File segmentRootDir, String[] mimeTypes)
+  public void dump(File outputDir, File segmentRootDir, String[] mimeTypes, 
boolean flatDir)
       throws Exception {
     if (mimeTypes == null)
       LOG.info("Accepting all mimetypes.");
@@ -209,7 +213,11 @@ public class FileDumper {
 
           if (filter) {
             String md5Ofurl = DumpFileUtil.getUrlMD5(url);
-            String fullDir = 
DumpFileUtil.createTwoLevelsDirectory(outputDir.getAbsolutePath(), md5Ofurl);
+
+            String fullDir = outputDir.getAbsolutePath();
+            if (!flatDir) {
+                fullDir = DumpFileUtil.createTwoLevelsDirectory(fullDir, 
md5Ofurl);
+            }
 
             if (!Strings.isNullOrEmpty(fullDir)) {
               String outputFullPath = String.format("%s/%s", fullDir, 
DumpFileUtil.createFileName(md5Ofurl, baseName, extension));
@@ -273,6 +281,12 @@ public class FileDumper {
         .withDescription(
             "an optional list of mimetypes to dump, excluding all others. 
Defaults to all.")
         .create("mimetype");
+    @SuppressWarnings("static-access")
+    Option dirStructureOpt = OptionBuilder
+        .withArgName("flatdir")
+        .withDescription(
+            "optionally specify that the output directory should only contain 
files.")
+        .create("flatdir");
 
     // create the options
     Options options = new Options();
@@ -280,6 +294,7 @@ public class FileDumper {
     options.addOption(outputOpt);
     options.addOption(segOpt);
     options.addOption(mimeOpt);
+    options.addOption(dirStructureOpt);
 
     CommandLineParser parser = new GnuParser();
     try {
@@ -294,6 +309,7 @@ public class FileDumper {
       File outputDir = new File(line.getOptionValue("outputDir"));
       File segmentRootDir = new File(line.getOptionValue("segment"));
       String[] mimeTypes = line.getOptionValues("mimetype");
+      boolean flatDir = line.hasOption("flatdir");
 
       if (!outputDir.exists()) {
         LOG.warn("Output directory: [" + outputDir.getAbsolutePath()
@@ -304,7 +320,7 @@ public class FileDumper {
       }
 
       FileDumper dumper = new FileDumper();
-      dumper.dump(outputDir, segmentRootDir, mimeTypes);
+      dumper.dump(outputDir, segmentRootDir, mimeTypes, flatDir);
     } catch (Exception e) {
       LOG.error("FileDumper: " + StringUtils.stringifyException(e));
       e.printStackTrace();


Reply via email to