Author: mattmann
Date: Sat Mar  7 04:50:14 2015
New Revision: 1664792

URL: http://svn.apache.org/r1664792
Log:
Fix for NUTCH-1954: FilenameTooLong error appears in CommonCrawlDumper.

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1664792&r1=1664791&r2=1664792&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Mar  7 04:50:14 2015
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development 1.10-SNAPSHOT
 
+* NUTCH-1954 FilenameTooLong error appears in CommonCrawlDumper (mattmann)
+
 * NUTCH-1949 Dump out the Nutch data into the Common Crawl format (Giuseppe 
Totaro via lewismc)
 
 * NUTCH-1950 File name too long (Jiaheng Zhang, Chong Li via mattmann)

Modified: nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java?rev=1664792&r1=1664791&r2=1664792&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java 
(original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/CommonCrawlDataDumper.java Sat 
Mar  7 04:50:14 2015
@@ -30,6 +30,7 @@ import java.util.Arrays;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.Map;
+import java.security.MessageDigest;
 
 import org.apache.commons.cli.CommandLine;
 import org.apache.commons.cli.CommandLineParser;
@@ -369,7 +370,22 @@ public class CommonCrawlDataDumper {
                                                        }
                                                        else {
                                                                
LOG.info("Writing: [" + outputFullPath + "]");
-                                                               
IOUtils.copy(new ByteArrayInputStream(byteData), new 
FileOutputStream(outputFile));
+                                                               try{
+                                                                   
IOUtils.copy(new ByteArrayInputStream(byteData), new 
FileOutputStream(outputFile));
+                                                               }
+                                                               catch 
(Exception e){
+                                                                   
MessageDigest md = MessageDigest.getInstance("MD5");
+                                                                   
md.update(outputFullPath.getBytes());
+                                                                   byte[] 
digest = md.digest();
+                                                                   
StringBuffer sb = new StringBuffer();
+                                                                   for (byte b 
: digest) {
+                                                                          
sb.append(String.format("%02x", b & 0xff));
+                                                                   }
+                                                                   
outputFullPath = outputFullPath.substring(0, 32) + "_" + sb.toString();
+                                                                   File 
newOutPutFile = new File(outputFullPath);
+                                                                   
IOUtils.copy(new ByteArrayInputStream(byteData), new 
FileOutputStream(newOutPutFile));
+                                                                   
LOG.info("File name is too long. Truncated and MD5 appended.");
+                                                               }
                                                        }
                                                }
                                                else {


Reply via email to