Author: mattmann
Date: Wed Mar 4 02:20:40 2015
New Revision: 1663847
URL: http://svn.apache.org/r1663847
Log:
Fix for NUTCH-1950 File name too long contributed by xzjh <[email protected]>
and Chong Li. This closes #9.
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1663847&r1=1663846&r2=1663847&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Mar 4 02:20:40 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.10-SNAPSHOT
+* NUTCH-1950 File name too long (Jiaheng Zhang, Chong Li via mattmann)
+
* NUTCH-1921 Optionally disable HTTP if-modified-since header (markus)
* NUTCH-1933 nutch-selenium plugin (Mo Omer, Mohammad Al-Moshin, lewismc)
Modified: nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java?rev=1663847&r1=1663846&r2=1663847&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java Wed Mar 4
02:20:40 2015
@@ -26,6 +26,7 @@ import java.io.ByteArrayInputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
+import java.security.MessageDigest;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
@@ -206,9 +207,32 @@ public class FileDumper {
File outputFile = new File(outputFullPath);
if (!outputFile.exists()) {
LOG.info("Writing: [" + outputFullPath + "]");
- FileOutputStream output = new FileOutputStream(outputFile);
- IOUtils.write(content.getContent(), output);
- fileCount++;
+ try {
+ FileOutputStream output = new FileOutputStream(outputFile);
+ IOUtils.write(content.getContent(), output);
+ fileCount++;
+
+ } catch (Exception e) {
+ // if the file name is too long, we get the first 32 chars of
the original name and append its MD5
+ // after the first 32 chars as the new file name
+ MessageDigest md = MessageDigest.getInstance("MD5");
+ md.update(outputFullPath.getBytes());
+ byte[] digest = md.digest();
+ StringBuffer sb = new StringBuffer();
+ for (byte b : digest) {
+ sb.append(String.format("%02x", b & 0xff));
+ }
+ outputFullPath = outputFullPath.substring(0, 32) + "_" +
sb.toString();
+
+ File newOutPutFile = new File(outputFullPath);
+ FileOutputStream output = new FileOutputStream(newOutPutFile);
+ IOUtils.write(content.getContent(), output);
+ fileCount++;
+ LOG.info("File name is too long. Truncated and MD5 appended.");
+
+ //e.printStackTrace();
+ }
+
} else {
LOG.info("Skipping writing: [" + outputFullPath
+ "]: file already exists");