Author: mattmann
Date: Wed Mar  4 02:20:40 2015
New Revision: 1663847

URL: http://svn.apache.org/r1663847
Log:
Fix for NUTCH-1950 File name too long contributed by xzjh <[email protected]> 
and Chong Li. This closes #9.

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1663847&r1=1663846&r2=1663847&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Mar  4 02:20:40 2015
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development 1.10-SNAPSHOT
 
+* NUTCH-1950 File name too long (Jiaheng Zhang, Chong Li via mattmann)
+
 * NUTCH-1921 Optionally disable HTTP if-modified-since header (markus)
 
 * NUTCH-1933 nutch-selenium plugin (Mo Omer, Mohammad Al-Moshin, lewismc)

Modified: nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java?rev=1663847&r1=1663846&r2=1663847&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java Wed Mar  4 
02:20:40 2015
@@ -26,6 +26,7 @@ import java.io.ByteArrayInputStream;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
+import java.security.MessageDigest;
 
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.CommandLineParser;
@@ -206,9 +207,32 @@ public class FileDumper {
             File outputFile = new File(outputFullPath);
             if (!outputFile.exists()) {
               LOG.info("Writing: [" + outputFullPath + "]");
-              FileOutputStream output = new FileOutputStream(outputFile);
-              IOUtils.write(content.getContent(), output);
-              fileCount++;
+              try {
+                FileOutputStream output = new FileOutputStream(outputFile);
+                IOUtils.write(content.getContent(), output);
+                fileCount++;
+                  
+              } catch (Exception e) {
+                // if the file name is too long, we get the first 32 chars of 
the original name and append its MD5
+                // after the first 32 chars as the new file name
+                MessageDigest md = MessageDigest.getInstance("MD5");
+                md.update(outputFullPath.getBytes());
+                byte[] digest = md.digest();
+                StringBuffer sb = new StringBuffer();
+                for (byte b : digest) {
+                  sb.append(String.format("%02x", b & 0xff));
+                }
+                outputFullPath = outputFullPath.substring(0, 32) + "_" + 
sb.toString();
+
+                File newOutPutFile = new File(outputFullPath);
+                FileOutputStream output = new FileOutputStream(newOutPutFile);
+                IOUtils.write(content.getContent(), output);
+                fileCount++;
+                LOG.info("File name is too long. Truncated and MD5 appended.");
+                
+                //e.printStackTrace();
+              }
+              
             } else {
               LOG.info("Skipping writing: [" + outputFullPath
                   + "]: file already exists");


Reply via email to