Author: jerome
Date: Thu Mar 23 15:21:03 2006
New Revision: 388293
URL: http://svn.apache.org/viewcvs?rev=388293&view=rev
Log:
Set the configuration of the parser used in the main method to fix NPEs
Modified:
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
Modified:
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java?rev=388293&r1=388292&r2=388293&view=diff
==============================================================================
---
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
(original)
+++
lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
Thu Mar 23 15:21:03 2006
@@ -269,9 +269,11 @@
byte[] bytes = new byte[(int)file.length()];
DataInputStream in = new DataInputStream(new FileInputStream(file));
in.readFully(bytes);
- Parse parse = new HtmlParser().getParse(
- new Content(url, url, bytes, "text/html", new Metadata(),
- NutchConfiguration.create()));
+ Configuration conf = NutchConfiguration.create();
+ HtmlParser parser = new HtmlParser();
+ parser.setConf(conf);
+ Parse parse = parser.getParse(
+ new Content(url, url, bytes, "text/html", new Metadata(), conf));
System.out.println("data: "+parse.getData());
System.out.println("text: "+parse.getText());