This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git

commit 34236ffecf478a1776559b0ed8c1ad929483d752
Author: Madhav Sharan <[email protected]>
AuthorDate: Wed Mar 29 18:07:07 2017 -0400

    fix for NUTCH-2370 contributed by [email protected]
---
 src/java/org/apache/nutch/tools/FileDumper.java | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/java/org/apache/nutch/tools/FileDumper.java 
b/src/java/org/apache/nutch/tools/FileDumper.java
index 53e6be4..51cc124 100644
--- a/src/java/org/apache/nutch/tools/FileDumper.java
+++ b/src/java/org/apache/nutch/tools/FileDumper.java
@@ -57,6 +57,7 @@ import org.apache.tika.Tika;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.codehaus.jackson.map.ObjectMapper;
 /**
  * The file dumper tool enables one to reverse generate the raw content from
  * Nutch segment data directories.
@@ -154,6 +155,7 @@ public class FileDumper {
     for (File segment : segmentDirs) {
       LOG.info("Processing segment: [" + segment.getAbsolutePath() + "]");
       DataOutputStream doutputStream = null;
+      Map<String, String> filenameToUrl = new HashMap<String, String>();
 
       File segmentDir = new File(segment.getAbsolutePath(), Content.DIR_NAME);
       File[] partDirs = segmentDir.listFiles(file -> file.canRead() && 
file.isDirectory());
@@ -242,7 +244,7 @@ public class FileDumper {
                   } else {
                     outputFullPath = String.format("%s/%s", fullDir, 
DumpFileUtil.createFileName(md5Ofurl, baseName, extension));
                   }
-
+                  filenameToUrl.put(outputFullPath, url);
                   File outputFile = new File(outputFullPath);
 
                   if (!outputFile.exists()) {
@@ -284,6 +286,10 @@ public class FileDumper {
           }
         }
       }
+      //save filenameToUrl in a json file for each segment there is one 
mapping file 
+      String filenameToUrlFilePath = String.format("%s/%s_filenameToUrl.json", 
outputDir.getAbsolutePath(), segment.getName() );
+      new ObjectMapper().writeValue(new File(filenameToUrlFilePath), 
filenameToUrl);
+      
     }
     LOG.info("Dumper File Stats: "
         + DumpFileUtil.displayFileTypes(typeCounts, filteredCounts));

-- 
To stop receiving notification emails like this one, please contact
"[email protected]" <[email protected]>.

Reply via email to