Author: ab Date: Fri Oct 9 13:54:27 2009 New Revision: 823553 URL: http://svn.apache.org/viewvc?rev=823553&view=rev Log: NUTCH-754 Use GenericOptionsParser instead of FileSystem.parseArgs().
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=823553&r1=823552&r2=823553&view=diff ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Fri Oct 9 13:54:27 2009 @@ -20,6 +20,9 @@ * NUTCH-757 - RequestUtils getBooleanParameter() always returns false (Niall Pemberton via ab) +* NUTCH-754 - Use GenericOptionsParser instead of FileSystem.parseArgs() (Julien + Nioche via ab) + Release 1.0 - 2009-03-23 1. NUTCH-474 - Fetcher2 crawlDelay and blocking fix (Dogacan Guney via ab) Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java?rev=823553&r1=823552&r2=823553&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java Fri Oct 9 13:54:27 2009 @@ -20,9 +20,12 @@ import java.io.*; import java.util.*; +import org.apache.commons.cli.Options; import org.apache.hadoop.io.*; +import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; +import org.apache.hadoop.fs.FileSystem; import org.apache.nutch.metadata.Metadata; import org.apache.nutch.util.NutchConfiguration; @@ -205,11 +208,18 @@ return; } + Options opts = new Options(); Configuration conf = NutchConfiguration.create(); - FileSystem fs = FileSystem.parseArgs(argv, 0, conf); + + GenericOptionsParser parser = + new GenericOptionsParser(conf, opts, argv); + + String[] remainingArgs = parser.getRemainingArgs(); + FileSystem fs = FileSystem.get(conf); + try { - int recno = Integer.parseInt(argv[0]); - String segment = argv[1]; + int recno = Integer.parseInt(remainingArgs[0]); + String segment = remainingArgs[1]; Path file = new Path(segment, DIR_NAME); System.out.println("Reading from file: " + file); Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java?rev=823553&r1=823552&r2=823553&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java Fri Oct 9 13:54:27 2009 @@ -19,8 +19,10 @@ import java.io.*; import org.apache.hadoop.io.*; +import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.fs.*; import org.apache.hadoop.conf.*; +import org.apache.commons.cli.Options; import org.apache.nutch.util.NutchConfiguration; /* The text conversion of page's content, stored using gzip compression. @@ -86,12 +88,18 @@ System.out.println("usage:" + usage); return; } - + Options opts = new Options(); Configuration conf = NutchConfiguration.create(); - FileSystem fs = FileSystem.parseArgs(argv, 0, conf); + + GenericOptionsParser parser = + new GenericOptionsParser(conf, opts, argv); + + String[] remainingArgs = parser.getRemainingArgs(); + + FileSystem fs = FileSystem.get(conf); try { - int recno = Integer.parseInt(argv[0]); - String segment = argv[1]; + int recno = Integer.parseInt(remainingArgs[0]); + String segment = remainingArgs[1]; String filename = new Path(segment, ParseText.DIR_NAME).toString(); ParseText parseText = new ParseText(); Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java?rev=823553&r1=823552&r2=823553&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java Fri Oct 9 13:54:27 2009 @@ -27,6 +27,7 @@ import java.util.zip.InflaterInputStream; //Hadoop imports +import org.apache.commons.cli.Options; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -35,6 +36,7 @@ import org.apache.hadoop.io.UTF8; import org.apache.hadoop.io.VersionMismatchException; import org.apache.hadoop.io.Writable; +import org.apache.hadoop.util.GenericOptionsParser; //Nutch imports import org.apache.nutch.metadata.Metadata; @@ -255,11 +257,18 @@ System.out.println("usage:" + usage); return; } + Options opts = new Options(); Configuration conf = NutchConfiguration.create(); - FileSystem fs = FileSystem.parseArgs(argv, 0, conf); + + GenericOptionsParser parser = + new GenericOptionsParser(conf, opts, argv); + + String[] remainingArgs = parser.getRemainingArgs(); + FileSystem fs = FileSystem.get(conf); + try { - int recno = Integer.parseInt(argv[0]); - String segment = argv[1]; + int recno = Integer.parseInt(remainingArgs[0]); + String segment = remainingArgs[1]; Path file = new Path(segment, DIR_NAME); System.out.println("Reading from file: " + file); Modified: lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java?rev=823553&r1=823552&r2=823553&view=diff ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java Fri Oct 9 13:54:27 2009 @@ -58,7 +58,10 @@ public void testSkipChildren() { DOMParser parser= new DOMParser(); + try { + parser.setFeature("http://xml.org/sax/features/validation", false); + parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); parser.parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes()))); } catch (Exception e) { e.printStackTrace();