Author: jerome
Date: Sat Feb 11 02:56:14 2006
New Revision: 376966

URL: http://svn.apache.org/viewcvs?rev=376966&view=rev
Log:
Fix parse-msexcel unit tests

Modified:
    
lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java

Modified: 
lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java?rev=376966&r1=376965&r2=376966&view=diff
==============================================================================
--- 
lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java
 (original)
+++ 
lucene/nutch/trunk/src/plugin/parse-msexcel/src/test/org/apache/nutch/parse/msexcel/TestMSExcelParser.java
 Sat Feb 11 02:56:14 2006
@@ -4,18 +4,25 @@
  */
 package org.apache.nutch.parse.msexcel;
 
+// Nutch imports
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.protocol.ProtocolFactory;
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.ProtocolException;
-
-import org.apache.nutch.parse.ParserFactory;
-import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseException;
+import org.apache.nutch.util.NutchConfiguration;
 
+// JUnit imports
 import junit.framework.TestCase;
 
+// Hadoop imports
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.UTF8;
+
+
 /** 
  * Based on Unit tests for MSWordParser by John Xing
  *
@@ -31,31 +38,32 @@
   
   private String[] sampleFiles = {"test.xls"};
 
-  private String expectedText = "BitStream test.xls 321654.0 Apache incubator 
1234.0 Doug Cutting 89078.0 CS 599 Search Engines Spring 2005.0 SBC 1234.0 
764893.0 Java NUTCH!! ";
+  private String expectedText = "BitStream test.xls 321654.0 Apache " +
+                                "incubator 1234.0 Doug Cutting 89078.0 " +
+                                "CS 599 Search Engines Spring 2005.0 SBC " +
+                                "1234.0 764893.0 Java NUTCH!! ";
 
   public TestMSExcelParser(String name) { 
-    super(name); 
+    super(name);
   }
 
-  protected void setUp() {}
-
-  protected void tearDown() {}
-
   public void testIt() throws ProtocolException, ParseException {
+
     String urlString;
     Protocol protocol;
     Content content;
-    Parser parser;
     Parse parse;
 
+    Configuration conf = NutchConfiguration.create();
+    ParseUtil parser = new ParseUtil(conf);
+    ProtocolFactory factory = new ProtocolFactory(conf);
     for (int i = 0; i < sampleFiles.length; i++) {
       urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
 
-      protocol = ProtocolFactory.getProtocol(urlString);
-      content = protocol.getContent(urlString);
-
-      parser = ParserFactory.getParser(content.getContentType(), urlString);
-      parse = parser.getParse(content);
+      protocol = factory.getProtocol(urlString);
+      content = protocol.getProtocolOutput(new UTF8(urlString),
+                                           new CrawlDatum()).getContent();
+      parse = parser.parseByParserId("parse-msexcel", content);
 
       assertTrue(parse.getText().equals(expectedText));
     }




-------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc. Do you grep through log files
for problems?  Stop!  Download the new AJAX search engine that makes
searching your log files as easy as surfing the  web.  DOWNLOAD SPLUNK!
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=103432&bid=230486&dat=121642
_______________________________________________
Nutch-cvs mailing list
Nutch-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nutch-cvs

Reply via email to