Author: jerome Date: Sat Feb 11 02:56:14 2006 New Revision: 376966 URL: http://svn.apache.org/viewcvs?rev=376966&view=rev Log: Fix parse-msexcel unit tests
Modified: lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java Modified: lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java?rev=376966&r1=376965&r2=376966&view=diff ============================================================================== --- lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java (original) +++ lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java Sat Feb 11 02:56:14 2006 @@ -4,18 +4,25 @@ */ package org.apache.nutch.parse.msexcel; +// Nutch imports +import org.apache.nutch.crawl.CrawlDatum; +import org.apache.nutch.parse.ParseUtil; import org.apache.nutch.protocol.ProtocolFactory; import org.apache.nutch.protocol.Protocol; import org.apache.nutch.protocol.Content; import org.apache.nutch.protocol.ProtocolException; - -import org.apache.nutch.parse.ParserFactory; -import org.apache.nutch.parse.Parser; import org.apache.nutch.parse.Parse; import org.apache.nutch.parse.ParseException; +import org.apache.nutch.util.NutchConfiguration; +// JUnit imports import junit.framework.TestCase; +// Hadoop imports +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.UTF8; + + /** * Based on Unit tests for MSWordParser by John Xing * @@ -31,31 +38,32 @@ private String[] sampleFiles = {"test.xls"}; - private String expectedText = "BitStream test.xls 321654.0 Apache incubator 1234.0 Doug Cutting 89078.0 CS 599 Search Engines Spring 2005.0 SBC 1234.0 764893.0 Java NUTCH!! "; + private String expectedText = "BitStream test.xls 321654.0 Apache " + + "incubator 1234.0 Doug Cutting 89078.0 " + + "CS 599 Search Engines Spring 2005.0 SBC " + + "1234.0 764893.0 Java NUTCH!! "; public TestMSExcelParser(String name) { - super(name); + super(name); } - protected void setUp() {} - - protected void tearDown() {} - public void testIt() throws ProtocolException, ParseException { + String urlString; Protocol protocol; Content content; - Parser parser; Parse parse; + Configuration conf = NutchConfiguration.create(); + ParseUtil parser = new ParseUtil(conf); + ProtocolFactory factory = new ProtocolFactory(conf); for (int i = 0; i < sampleFiles.length; i++) { urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i]; - protocol = ProtocolFactory.getProtocol(urlString); - content = protocol.getContent(urlString); - - parser = ParserFactory.getParser(content.getContentType(), urlString); - parse = parser.getParse(content); + protocol = factory.getProtocol(urlString); + content = protocol.getProtocolOutput(new UTF8(urlString), + new CrawlDatum()).getContent(); + parse = parser.parseByParserId("parse-msexcel", content); assertTrue(parse.getText().equals(expectedText)); } ------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Do you grep through log files for problems? Stop! Download the new AJAX search engine that makes searching your log files as easy as surfing the web. DOWNLOAD SPLUNK! http://sel.as-us.falkag.net/sel?cmd=lnk&kid=103432&bid=230486&dat=121642 _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs