Author: jerome Date: Sat Feb 11 02:56:14 2006 New Revision: 376966 URL: http://svn.apache.org/viewcvs?rev=376966&view=rev Log: Fix parse-msexcel unit tests
Modified: lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java Modified: lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java?rev=376966&r1=376965&r2=376966&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java (original) +++ lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java Sat Feb 11 02:56:14 2006 @@ -4,18 +4,25 @@ */ package org.apache.nutch.parse.msexcel; +// Nutch imports +import org.apache.nutch.crawl.CrawlDatum; +import org.apache.nutch.parse.ParseUtil; import org.apache.nutch.protocol.ProtocolFactory; import org.apache.nutch.protocol.Protocol; import org.apache.nutch.protocol.Content; import org.apache.nutch.protocol.ProtocolException; - -import org.apache.nutch.parse.ParserFactory; -import org.apache.nutch.parse.Parser; import org.apache.nutch.parse.Parse; import org.apache.nutch.parse.ParseException; +import org.apache.nutch.util.NutchConfiguration; +// JUnit imports import junit.framework.TestCase; +// Hadoop imports +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.UTF8; + + /** * Based on Unit tests for MSWordParser by John Xing * @@ -31,31 +38,32 @@ private String[] sampleFiles = {"test.xls"}; - private String expectedText = "BitStream test.xls 321654.0 Apache incubator 1234.0 Doug Cutting 89078.0 CS 599 Search Engines Spring 2005.0 SBC 1234.0 764893.0 Java NUTCH!! "; + private String expectedText = "BitStream test.xls 321654.0 Apache " + + "incubator 1234.0 Doug Cutting 89078.0 " + + "CS 599 Search Engines Spring 2005.0 SBC " + + "1234.0 764893.0 Java NUTCH!! "; public TestMSExcelParser(String name) { - super(name); + super(name); } - protected void setUp() {} - - protected void tearDown() {} - public void testIt() throws ProtocolException, ParseException { + String urlString; Protocol protocol; Content content; - Parser parser; Parse parse; + Configuration conf = NutchConfiguration.create(); + ParseUtil parser = new ParseUtil(conf); + ProtocolFactory factory = new ProtocolFactory(conf); for (int i = 0; i < sampleFiles.length; i++) { urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i]; - protocol = ProtocolFactory.getProtocol(urlString); - content = protocol.getContent(urlString); - - parser = ParserFactory.getParser(content.getContentType(), urlString); - parse = parser.getParse(content); + protocol = factory.getProtocol(urlString); + content = protocol.getProtocolOutput(new UTF8(urlString), + new CrawlDatum()).getContent(); + parse = parser.parseByParserId("parse-msexcel", content); assertTrue(parse.getText().equals(expectedText)); }