Repository: nutch
Updated Branches:
  refs/heads/master 24cc2aa9c -> 6e051f2cc


NUTCH-2336 SegmentReader to implement Tool (contributed by Vincent Slot), 
closes #159


Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/6e051f2c
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/6e051f2c
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/6e051f2c

Branch: refs/heads/master
Commit: 6e051f2ccadba6c6bac60ee8708ced958a30cc8b
Parents: 24cc2aa
Author: Sebastian Nagel <sna...@apache.org>
Authored: Wed Nov 30 17:05:15 2016 +0100
Committer: Sebastian Nagel <sna...@apache.org>
Committed: Thu Dec 1 11:57:32 2016 +0100

----------------------------------------------------------------------
 .../org/apache/nutch/segment/SegmentReader.java | 45 ++++++++++++++------
 1 file changed, 33 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nutch/blob/6e051f2c/src/java/org/apache/nutch/segment/SegmentReader.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/segment/SegmentReader.java 
b/src/java/org/apache/nutch/segment/SegmentReader.java
index d00d1e2..9ea20be 100644
--- a/src/java/org/apache/nutch/segment/SegmentReader.java
+++ b/src/java/org/apache/nutch/segment/SegmentReader.java
@@ -59,6 +59,8 @@ import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.NutchWritable;
 import org.apache.nutch.parse.ParseData;
@@ -69,7 +71,7 @@ import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
 
 /** Dump the content of a segment. */
-public class SegmentReader extends Configured implements
+public class SegmentReader extends Configured implements Tool,
     Reducer<Text, NutchWritable, Text, Text> {
 
   public static final Logger LOG = 
LoggerFactory.getLogger(SegmentReader.class);
@@ -574,10 +576,10 @@ public class SegmentReader extends Configured implements
 
   private static final int MODE_GET = 2;
 
-  public static void main(String[] args) throws Exception {
+  public int run(String[] args) throws Exception {
     if (args.length < 2) {
       usage();
-      return;
+      return -1;
     }
     int mode = -1;
     if (args[0].equals("-dump"))
@@ -622,20 +624,33 @@ public class SegmentReader extends Configured implements
     // collect required args
     switch (mode) {
     case MODE_DUMP:
+
+      this.co = co;
+      this.fe = fe;
+      this.ge = ge;
+      this.pa = pa;
+      this.pd = pd;
+      this.pt = pt;
+      try {
+        this.fs = FileSystem.get(getConf());
+      } catch (IOException e) {
+        LOG.error("IOException:", e);
+      }
+
       String input = args[1];
       if (input == null) {
         System.err.println("Missing required argument: <segment_dir>");
         usage();
-        return;
+        return -1;
       }
       String output = args.length > 2 ? args[2] : null;
       if (output == null) {
         System.err.println("Missing required argument: <output>");
         usage();
-        return;
+        return -1;
       }
-      segmentReader.dump(new Path(input), new Path(output));
-      return;
+      dump(new Path(input), new Path(output));
+      return 0;
     case MODE_LIST:
       ArrayList<Path> dirs = new ArrayList<Path>();
       for (int i = 1; i < args.length; i++) {
@@ -653,27 +668,27 @@ public class SegmentReader extends Configured implements
           dirs.add(new Path(args[i]));
       }
       segmentReader.list(dirs, new OutputStreamWriter(System.out, "UTF-8"));
-      return;
+      return 0;
     case MODE_GET:
       input = args[1];
       if (input == null) {
         System.err.println("Missing required argument: <segment_dir>");
         usage();
-        return;
+        return -1;
       }
       String key = args.length > 2 ? args[2] : null;
       if (key == null) {
         System.err.println("Missing required argument: <keyValue>");
         usage();
-        return;
+        return -1;
       }
       segmentReader.get(new Path(input), new Text(key), new OutputStreamWriter(
           System.out, "UTF-8"), new HashMap<String, List<Writable>>());
-      return;
+      return 0;
     default:
       System.err.println("Invalid operation: " + args[0]);
       usage();
-      return;
+      return -1;
     }
   }
 
@@ -716,4 +731,10 @@ public class SegmentReader extends Configured implements
     System.err
         .println("\t\tNote: put double-quotes around strings with spaces.");
   }
+
+  public static void main(String[] args) throws Exception {
+    int result = ToolRunner.run(NutchConfiguration.create(),
+        new SegmentReader(), args);
+    System.exit(result);
+  }
 }

Reply via email to