Author: jerome Date: Thu Mar 23 15:21:03 2006 New Revision: 388293 URL: http://svn.apache.org/viewcvs?rev=388293&view=rev Log: Set the configuration of the parser used in the main method to fix NPEs
Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java?rev=388293&r1=388292&r2=388293&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java (original) +++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java Thu Mar 23 15:21:03 2006 @@ -269,9 +269,11 @@ byte[] bytes = new byte[(int)file.length()]; DataInputStream in = new DataInputStream(new FileInputStream(file)); in.readFully(bytes); - Parse parse = new HtmlParser().getParse( - new Content(url, url, bytes, "text/html", new Metadata(), - NutchConfiguration.create())); + Configuration conf = NutchConfiguration.create(); + HtmlParser parser = new HtmlParser(); + parser.setConf(conf); + Parse parse = parser.getParse( + new Content(url, url, bytes, "text/html", new Metadata(), conf)); System.out.println("data: "+parse.getData()); System.out.println("text: "+parse.getText());