Author: jerome
Date: Sat Feb 11 02:56:14 2006
New Revision: 376966

URL: http://svn.apache.org/viewcvs?rev=376966&view=rev
Log:
Fix parse-msexcel unit tests

Modified:
    
lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java

Modified: 
lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java?rev=376966&r1=376965&r2=376966&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java
 Sat Feb 11 02:56:14 2006
@@ -4,18 +4,25 @@
  */
 package org.apache.nutch.parse.msexcel;
 
+// Nutch imports
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.protocol.ProtocolFactory;
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.ProtocolException;
-
-import org.apache.nutch.parse.ParserFactory;
-import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseException;
+import org.apache.nutch.util.NutchConfiguration;
 
+// JUnit imports
 import junit.framework.TestCase;
 
+// Hadoop imports
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.UTF8;
+
+
 /** 
  * Based on Unit tests for MSWordParser by John Xing
  *
@@ -31,31 +38,32 @@
   
   private String[] sampleFiles = {"test.xls"};
 
-  private String expectedText = "BitStream test.xls 321654.0 Apache incubator 
1234.0 Doug Cutting 89078.0 CS 599 Search Engines Spring 2005.0 SBC 1234.0 
764893.0 Java NUTCH!! ";
+  private String expectedText = "BitStream test.xls 321654.0 Apache " +
+                                "incubator 1234.0 Doug Cutting 89078.0 " +
+                                "CS 599 Search Engines Spring 2005.0 SBC " +
+                                "1234.0 764893.0 Java NUTCH!! ";
 
   public TestMSExcelParser(String name) { 
-    super(name); 
+    super(name);
   }
 
-  protected void setUp() {}
-
-  protected void tearDown() {}
-
   public void testIt() throws ProtocolException, ParseException {
+
     String urlString;
     Protocol protocol;
     Content content;
-    Parser parser;
     Parse parse;
 
+    Configuration conf = NutchConfiguration.create();
+    ParseUtil parser = new ParseUtil(conf);
+    ProtocolFactory factory = new ProtocolFactory(conf);
     for (int i = 0; i < sampleFiles.length; i++) {
       urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
-      protocol = ProtocolFactory.getProtocol(urlString);
-      content = protocol.getContent(urlString);
-
-      parser = ParserFactory.getParser(content.getContentType(), urlString);
-      parse = parser.getParse(content);
+      protocol = factory.getProtocol(urlString);
+      content = protocol.getProtocolOutput(new UTF8(urlString),
+                                           new CrawlDatum()).getContent();
+      parse = parser.parseByParserId("parse-msexcel", content);
 
       assertTrue(parse.getText().equals(expectedText));
     }


Reply via email to