Repository: nutch Updated Branches: refs/heads/master 24cc2aa9c -> 6e051f2cc
NUTCH-2336 SegmentReader to implement Tool (contributed by Vincent Slot), closes #159 Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/6e051f2c Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/6e051f2c Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/6e051f2c Branch: refs/heads/master Commit: 6e051f2ccadba6c6bac60ee8708ced958a30cc8b Parents: 24cc2aa Author: Sebastian Nagel <sna...@apache.org> Authored: Wed Nov 30 17:05:15 2016 +0100 Committer: Sebastian Nagel <sna...@apache.org> Committed: Thu Dec 1 11:57:32 2016 +0100 ---------------------------------------------------------------------- .../org/apache/nutch/segment/SegmentReader.java | 45 ++++++++++++++------ 1 file changed, 33 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nutch/blob/6e051f2c/src/java/org/apache/nutch/segment/SegmentReader.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/nutch/segment/SegmentReader.java b/src/java/org/apache/nutch/segment/SegmentReader.java index d00d1e2..9ea20be 100644 --- a/src/java/org/apache/nutch/segment/SegmentReader.java +++ b/src/java/org/apache/nutch/segment/SegmentReader.java @@ -59,6 +59,8 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.util.Progressable; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.crawl.NutchWritable; import org.apache.nutch.parse.ParseData; @@ -69,7 +71,7 @@ import org.apache.nutch.util.NutchConfiguration; import org.apache.nutch.util.NutchJob; /** Dump the content of a segment. */ -public class SegmentReader extends Configured implements +public class SegmentReader extends Configured implements Tool, Reducer<Text, NutchWritable, Text, Text> { public static final Logger LOG = LoggerFactory.getLogger(SegmentReader.class); @@ -574,10 +576,10 @@ public class SegmentReader extends Configured implements private static final int MODE_GET = 2; - public static void main(String[] args) throws Exception { + public int run(String[] args) throws Exception { if (args.length < 2) { usage(); - return; + return -1; } int mode = -1; if (args[0].equals("-dump")) @@ -622,20 +624,33 @@ public class SegmentReader extends Configured implements // collect required args switch (mode) { case MODE_DUMP: + + this.co = co; + this.fe = fe; + this.ge = ge; + this.pa = pa; + this.pd = pd; + this.pt = pt; + try { + this.fs = FileSystem.get(getConf()); + } catch (IOException e) { + LOG.error("IOException:", e); + } + String input = args[1]; if (input == null) { System.err.println("Missing required argument: <segment_dir>"); usage(); - return; + return -1; } String output = args.length > 2 ? args[2] : null; if (output == null) { System.err.println("Missing required argument: <output>"); usage(); - return; + return -1; } - segmentReader.dump(new Path(input), new Path(output)); - return; + dump(new Path(input), new Path(output)); + return 0; case MODE_LIST: ArrayList<Path> dirs = new ArrayList<Path>(); for (int i = 1; i < args.length; i++) { @@ -653,27 +668,27 @@ public class SegmentReader extends Configured implements dirs.add(new Path(args[i])); } segmentReader.list(dirs, new OutputStreamWriter(System.out, "UTF-8")); - return; + return 0; case MODE_GET: input = args[1]; if (input == null) { System.err.println("Missing required argument: <segment_dir>"); usage(); - return; + return -1; } String key = args.length > 2 ? args[2] : null; if (key == null) { System.err.println("Missing required argument: <keyValue>"); usage(); - return; + return -1; } segmentReader.get(new Path(input), new Text(key), new OutputStreamWriter( System.out, "UTF-8"), new HashMap<String, List<Writable>>()); - return; + return 0; default: System.err.println("Invalid operation: " + args[0]); usage(); - return; + return -1; } } @@ -716,4 +731,10 @@ public class SegmentReader extends Configured implements System.err .println("\t\tNote: put double-quotes around strings with spaces."); } + + public static void main(String[] args) throws Exception { + int result = ToolRunner.run(NutchConfiguration.create(), + new SegmentReader(), args); + System.exit(result); + } }