Author: mattmann
Date: Sat Mar 7 04:50:14 2015
New Revision: 1664792
URL: http://svn.apache.org/r1664792
Log:
Fix for NUTCH-1954: FilenameTooLong error appears in CommonCrawlDumper.
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1664792&r1=1664791&r2=1664792&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Mar 7 04:50:14 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.10-SNAPSHOT
+* NUTCH-1954 FilenameTooLong error appears in CommonCrawlDumper (mattmann)
+
* NUTCH-1949 Dump out the Nutch data into the Common Crawl format (Giuseppe
Totaro via lewismc)
* NUTCH-1950 File name too long (Jiaheng Zhang, Chong Li via mattmann)
Modified: nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java?rev=1664792&r1=1664791&r2=1664792&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
(original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java Sat
Mar 7 04:50:14 2015
@@ -30,6 +30,7 @@ import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
+import java.security.MessageDigest;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
@@ -369,7 +370,22 @@ public class CommonCrawlDataDumper {
}
else {
LOG.info("Writing: [" + outputFullPath + "]");
-
IOUtils.copy(new ByteArrayInputStream(byteData), new
FileOutputStream(outputFile));
+ try{
+
IOUtils.copy(new ByteArrayInputStream(byteData), new
FileOutputStream(outputFile));
+ }
+ catch
(Exception e){
+
MessageDigest md = MessageDigest.getInstance("MD5");
+
md.update(outputFullPath.getBytes());
+ byte[]
digest = md.digest();
+
StringBuffer sb = new StringBuffer();
+ for (byte b
: digest) {
+
sb.append(String.format("%02x", b & 0xff));
+ }
+
outputFullPath = outputFullPath.substring(0, 32) + "_" + sb.toString();
+ File
newOutPutFile = new File(outputFullPath);
+
IOUtils.copy(new ByteArrayInputStream(byteData), new
FileOutputStream(newOutPutFile));
+
LOG.info("File name is too long. Truncated and MD5 appended.");
+ }
}
}
else {