Author: jerome
Date: Fri Sep 30 15:11:19 2005
New Revision: 292865

URL: http://svn.apache.org/viewcvs?rev=292865&view=rev
Log:
NUTCH-88, Second step implementation:
* Add a configuration property for the parse-plugins.xml file location
* ParserFactory now returns an ordered list of Parsers
* Improve logging
* Improve Parser selection policy
* Unit Tests added

Added:
    lucene/nutch/trunk/src/test/org/apache/nutch/parse/parse-plugin-test.xml   
(with props)
Modified:
    lucene/nutch/trunk/conf/nutch-default.xml
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
    lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParserFactory.java

Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-default.xml?rev=292865&r1=292864&r2=292865&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Fri Sep 30 15:11:19 2005
@@ -612,6 +612,15 @@
   </description>
 </property>
 
+<!-- parser properties -->
+
+<property>
+  <name>parse.plugin.file</name>
+  <value>parse-plugins.xml</value>
+  <description>The name of the file that defines the associations between
+  content-types and parsers.</description>
+</property>
+
 <property>
   <name>parser.character.encoding.default</name>
   <value>windows-1252</value>

Modified: 
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java?rev=292865&r1=292864&r2=292865&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java 
Fri Sep 30 15:11:19 2005
@@ -22,6 +22,12 @@
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
+import java.io.InputStream;
+import java.io.IOException;
+
+import java.net.URL;
+import java.net.MalformedURLException;
+
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 
@@ -76,8 +82,35 @@
     Document document = null;
     InputSource inputSource = null;
     
-    inputSource = new InputSource(NutchConf.get()
-                          .getConfResourceAsInputStream(fParsePluginsFile));
+    //check to see if the Nutch conf property
+    //parse.plugin.file is defined
+    String parsePluginFileUrl = NutchConf.get().get("parse.plugin.file");
+    
+    InputStream ppInputStream = null;
+
+        if (parsePluginFileUrl != null) {
+            URL parsePluginUrl = null;
+
+            try {
+                parsePluginUrl = new URL(parsePluginFileUrl);
+                ppInputStream = parsePluginUrl.openStream();
+            } catch (MalformedURLException e) {
+                LOG.log(Level.SEVERE,
+                        "Unable to load parse plugins file from URL ["
+                                + parsePluginFileUrl + "]", e);
+                return null;
+            } catch (IOException e) {
+                LOG.log(Level.SEVERE,
+                        "Unable to load parse plugins file from URL ["
+                                + parsePluginFileUrl + "]", e);
+                return null;
+            }
+        } else {
+            ppInputStream = NutchConf.get().getConfResourceAsInputStream(
+                    fParsePluginsFile);
+        }
+    
+    inputSource = new InputSource(ppInputStream);
     
     try {
       factory = DocumentBuilderFactory.newInstance();
@@ -154,6 +187,12 @@
   public static void main(String[] args) throws Exception {
     String parsePluginFile = null;
     String usage = "ParsePluginsReader [--file <parse plugin file location>]";
+    
+    if (( args.length != 0 && args.length != 2 )
+        || (args.length == 2 && !"--file".equals(args[0]))) {
+      System.err.println(usage);
+      System.exit(1);
+    }
     
     for (int i = 0; i < args.length; i++) {
       if (args[i].equals("--file")) {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java?rev=292865&r1=292864&r2=292865&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java Fri 
Sep 30 15:11:19 2005
@@ -16,9 +16,11 @@
 package org.apache.nutch.parse;
 
 // JDK imports
+import java.util.Collections;
 import java.util.Hashtable;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Vector;
 import java.util.logging.Logger;
 
 // Nutch imports
@@ -35,14 +37,19 @@
   public static final Logger LOG =
           LogFormatter.getLogger(ParserFactory.class.getName());
   
+  /** Wildcard for default plugins. */
   public static final String DEFAULT_PLUGIN = "*";
 
+  /** Extension point. */
   private static final ExtensionPoint X_POINT =
           PluginRepository.getInstance().getExtensionPoint(Parser.X_POINT_ID);
   
+  /** List of parser plugins. */
   private static final ParsePluginList PARSE_PLUGIN_LIST =
           new ParsePluginsReader().parse();
   
+  /** Empty extension list for caching purposes. */
+  private static final List EMPTY_EXTENSION_LIST = Collections.EMPTY_LIST;
   
   static {
     if (X_POINT == null) {
@@ -53,100 +60,299 @@
     }
   }
   
+  //cache mapping mimeType->List of Extensions
   private static final Hashtable CACHE = new Hashtable();
   
+  //cache mapping parser plugin id->Parser instance
+  private static final Hashtable PARSER_CACHE = new Hashtable();
   
   private ParserFactory() {}                      // no public ctor
   
+
   /**
    * Returns the appropriate [EMAIL PROTECTED] Parser} implementation given a 
content
    * type and url.
-   *
-   * <p>Parser extensions should define the attributes"contentType" and/or
-   * "pathSuffix".  Content type has priority: the first plugin found whose
+   * 
+   * @deprecated Since the addition of NUTCH-88, this method is replaced by 
+   * taking the highest priority [EMAIL PROTECTED] Parser} returned from
+   * [EMAIL PROTECTED] #getParsers(String, String)}.
+   * 
+   * Parser extensions should define the attributes "contentType" and/or
+   * "pathSuffix". Content type has priority: the first plugin found whose
    * "contentType" attribute matches the beginning of the content's type is
-   * used.  If none match, then the first whose "pathSuffix" attribute matches
+   * used. If none match, then the first whose "pathSuffix" attribute matches
    * the end of the url's path is used.  If neither of these match, then the
    * first plugin whose "pathSuffix" is the empty string is used.
    */
   public static Parser getParser(String contentType, String url)
   throws ParserNotFound {
     
-    try {
-      Extension extension = getExtension(contentType);
-      if (extension != null) {
-        return (Parser) extension.getExtensionInstance();
-      }
-      // TODO once the MimeTypes is available
-      // extension = getExtension(MimeUtils.map(contentType));
-      // if (extension != null) {
-      //   return (Parser) extension.getExtensionInstance();
-      // }
-      // Last Chance: Guess content-type from file url...
-      // extension = getExtension(MimeUtils.getMimeType(url));
-        throw new ParserNotFound(url, contentType);
-    } catch (PluginRuntimeException e) {
-      throw new ParserNotFound(url, contentType, e.toString());
+    Parser[] parsers = getParsers(contentType, url);
+    
+    if(parsers != null){
+      //give the user the highest priority parser available
+      for(int i = 0;  i < parsers.length; i++ ){
+        Parser p = parsers[i];
+        if(p != null){
+          return p;
+        }
+      }
+      
+      throw new ParserNotFound(url, contentType);
+      
+    } 
+    else{
+      throw new ParserNotFound(url, contentType);
     }
   }
+   
+  /**
+   * Function returns an array of [EMAIL PROTECTED] Parser}s for a given 
content type.
+   *
+   * The function consults the internal [EMAIL PROTECTED] ParsePluginList} for 
the
+   * ParserFactory to determine the list of pluginIds, then gets the
+   * appropriate extension points to instantiate as {Parser}s.
+   *
+   * @param contentType The contentType to return the <code>Array</code>
+   *                    of {Parser}s for.
+   * @param url The url for the content that may allow us to get the type from
+   *            the file suffix.
+   * @return An <code>Array</code> of [EMAIL PROTECTED] for the given 
contentType.
+   *         If there were plugins mapped to a contentType via the
+   *         <code>parse-plugins.xml</code> file, but never enabled via
+   *         the <code>plugin.includes</code> Nutch conf, then those plugins
+   *         won't be part of this array, i.e., they will be skipped.
+   *         So, if the ordered list of parsing plugins for
+   *         <code>text/plain</code> was <code>[parse-text,parse-html,
+   *         parse-rtf]</code>, and only <code>parse-html</code> and
+   *         <code>parse-rtf</code> were enabled via
+   *         <code>plugin.includes</code>, then this ordered Array would
+   *         consist of two [EMAIL PROTECTED] Parser} interfaces,
+   *         <code>[parse-html, parse-rtf]</code>.
+   */
+  public static Parser[] getParsers(String contentType, String url)
+  throws ParserNotFound {
+    
+    List parsers = null;
+    List parserExts = null;
     
-  protected static Extension getExtension(String contentType)
-  throws PluginRuntimeException {
-    
-    Extension extension = (Extension) CACHE.get(contentType);
-    if (extension == null) {
-      extension = findExtension(contentType);
-      // TODO: For null extension, add a fake extension in the CACHE
-      //       in order to avoid trying to find each time
-      //       an unavailable extension
-      if (extension != null) {
-        CACHE.put(contentType, extension);
+    // TODO once the MimeTypes is available
+    // parsers = getExtensions(MimeUtils.map(contentType));
+    // if (parsers != null) {
+    //   return parsers;
+    // }
+    // Last Chance: Guess content-type from file url...
+    // parsers = getExtensions(MimeUtils.getMimeType(url));
+
+    parserExts = getExtensions(contentType);
+    if (parserExts == null) {
+      throw new ParserNotFound(url, contentType);
+    }
+
+    parsers = new Vector(parserExts.size());
+    for (Iterator i=parserExts.iterator(); i.hasNext(); ){
+      Extension ext = (Extension) i.next();
+      Parser p = null;
+      try {
+        //check to see if we've cached this parser instance yet
+        p = (Parser) PARSER_CACHE.get(ext.getDescriptor().getPluginId());
+        if (p == null) {
+          // go ahead and instantiate it and then cache it
+          p = (Parser) ext.getExtensionInstance();
+          PARSER_CACHE.put(ext.getDescriptor().getPluginId(),p);
+        }
+        parsers.add(p);
+      } catch (PluginRuntimeException e) {
+        LOG.warning("ParserFactory:PluginRuntimeException when "
+                  + "initializing parser plugin "
+                  + ext.getDescriptor().getPluginId()
+                  + " instance in getParsers "
+                  + "function: attempting to continue instantiating parsers");
       }
     }
-    return extension;
+    return (Parser[]) parsers.toArray(new Parser[]{});
   }
   
-  private static Extension findExtension(String contentType)
-  throws PluginRuntimeException{
+  /**
+   * finds the best-suited parse plugin for a given contentType.
+   *
+   * @param contentType Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType.
+   *                If none, returns null.
+   */
+  protected static List getExtensions(String contentType){
+    
+    List extensions = (List)CACHE.get(contentType);
+
+    // Just compare the reference:
+    // if this is the empty list, we know we will find no extension.
+    if (extensions == EMPTY_EXTENSION_LIST) {
+      return null;
+    }
+    
+    if (extensions == null) {
+      extensions = findExtensions(contentType);
+      if (extensions != null) {
+        CACHE.put(contentType, extensions);
+      } else {
+       // Put the empty extension list into cache
+       // to remember we don't know any related extension.
+       CACHE.put(contentType, EMPTY_EXTENSION_LIST);
+      }
+    }
+    return extensions;
+  }
+  
+  /**
+   * searches a list of suitable parse plugins for the given contentType.
+   * <p>It first looks for a preferred plugin defined in the parse-plugin
+   * file.  If none is found, it returns a list of default plugins.
+   * 
+   * @param contentType Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType.
+   *                If none, returns null.
+   */
+  private static List findExtensions(String contentType){
     
     Extension[] extensions = X_POINT.getExtensions();
     
     // Look for a preferred plugin.
     List parsePluginList = PARSE_PLUGIN_LIST.getPluginList(contentType);
-    Extension extension = matchExtension(parsePluginList, extensions, 
contentType);
-    if (extension != null) {
-      return extension;
+    List extensionList = matchExtensions(parsePluginList, extensions, 
contentType);
+    if (extensionList != null) {
+      return extensionList;
     }
     
     // If none found, look for a default plugin.
     parsePluginList = PARSE_PLUGIN_LIST.getPluginList(DEFAULT_PLUGIN);
-    return matchExtension(parsePluginList, extensions, DEFAULT_PLUGIN);
+    return matchExtensions(parsePluginList, extensions, DEFAULT_PLUGIN);
   }
   
-  private static Extension matchExtension(List plugins,
-                                          Extension[] extensions,
-                                          String contentType) {
-    
-    // Preliminary check
-    if (plugins == null) { return null; }
-    
-    Iterator iter = plugins.iterator();
-    while (iter.hasNext()) {
-      String pluginId = (String) iter.next();
-      if (pluginId != null) {
-        for (int i=0; i<extensions.length; i++) {
-          if (match(extensions[i], pluginId, contentType)) {
-            return extensions[i];
+  /**
+   * Tries to find a suitable parser for the given contentType.
+   * <ol>
+   * <li>It checks if a parser which accepts the contentType
+   * can be found in the <code>plugins</code> list;</li>
+   * <li>If this list is empty, it tries to find amongst the loaded
+   * extensions whether some of them might suit and warns the user.</li>
+   * </ol>
+   * @param plugins List of candidate plugins.
+   * @param extensions Array of loaded extensions.
+   * @param contentType Content-Type for which we seek a parse plugin.
+   * @return List - List of extensions to be used for this contentType.
+   *                If none, returns null.
+   */
+  private static List matchExtensions(List plugins,
+                                      Extension[] extensions,
+                                      String contentType) {
+    
+    List extList = null;
+    if (plugins != null) {
+      extList = new Vector(plugins.size());
+      
+      for (Iterator i = plugins.iterator(); i.hasNext();) {
+        String parsePluginId = (String) i.next();
+        
+        Extension ext = getExtensionByIdAndType(extensions,
+                                                parsePluginId,
+                                                contentType);
+        // the extension returned may be null
+        // that means that it was not enabled in the plugin.includes
+        // nutch conf property, but it was mapped in the
+        // parse-plugins.xml
+        // file. 
+        // OR it was enabled in plugin.includes, but the plugin's plugin.xml
+        // file does not claim that the plugin supports the specified mimeType
+        // in either case, LOG the appropriate error message to SEVERE level
+        
+        if (ext == null) {
+           //try to get it just by its pluginId
+            ext = getExtensionById(extensions, parsePluginId);
+          if (ext != null) {
+            // plugin was enabled via plugin.includes
+            // its plugin.xml just doesn't claim to support that
+            // particular mimeType
+            LOG.warning("ParserFactory:Plugin: " + parsePluginId +
+                        " mapped to contentType " + contentType +
+                        " via parse-plugins.xml, but " + "its plugin.xml " +
+                        "file does not claim to support contentType: " +
+                        contentType);
+            
+            //go ahead and load the extension anyways, though
+            extList.add(ext);
+          
+          } else{
+            // plugin wasn't enabled via plugin.includes
+            LOG.severe("ParserFactory: Plugin: " + parsePluginId + 
+                       " mapped to contentType " + contentType +
+                       " via parse-plugins.xml, but not enabled via " +
+                       "plugin.includes in nutch-default.xml");                
     
           }
+          
+        } else{
+          // add it to the list
+          extList.add(ext);
         }
       }
+      
+      return extList;
+    } else {
+      // okay, there were no list of plugins defined for
+      // this mimeType, however, there may be plugins registered
+      // via the plugin.includes nutch conf property that claim
+      // via their plugin.xml file to support this contentType
+      // so, iterate through the list of extensions and if you find
+      // any extensions where this is the case, throw a
+      // NotMappedParserException
+      
+      List unmappedPlugins = new Vector();
+      
+      for (int i = 0; i < extensions.length; i++) {
+        if (extensions[i].getAttribute("contentType") != null
+            && extensions[i].getAttribute("contentType").equals(
+                contentType)) {
+          unmappedPlugins.add(extensions[i].getDescriptor()
+              .getPluginId());
+        }
+      }
+      
+      if (unmappedPlugins.size() > 0) {
+        LOG.info("The parsing plugins: " + unmappedPlugins +
+                 " are enabled via the plugin.includes system " +
+                 "property, and all claim to support the content type " +
+                 contentType + ", but they are not mapped to it  in the " +
+                 "parse-plugins.xml file");
+      } else {
+        LOG.fine("ParserFactory:No parse plugins mapped or enabled for " +
+                 "contentType " + contentType);
+      }
+      return null;
     }
-    return null;
   }
 
   private static boolean match(Extension extension, String id, String type) {
     return (id.equals(extension.getDescriptor().getPluginId())) &&
-              (type.equals(extension.getAttribute("contentType")) ||
-              (type.equals(DEFAULT_PLUGIN))); 
+    (type.equals(extension.getAttribute("contentType")) ||
+        (type.equals(DEFAULT_PLUGIN))); 
+  }
+  
+  private static Extension getExtensionByIdAndType(Extension[] extList,
+                                                   String plugId,
+                                                   String contentType) {
+    for (int i = 0; i < extList.length; i++) {
+      if (match(extList[i], plugId, contentType)) {
+        return extList[i];
+      }
+    }
+    return null;
+  }
+  
+  private static Extension getExtensionById(Extension[] extList, String 
plugId) {
+    for(int i = 0; i < extList.length; i++){
+      if(plugId.equals(extList[i].getDescriptor().getPluginId())){
+        return extList[i];
+      }
+    }
+    return null;
   }
 }

Modified: 
lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParserFactory.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParserFactory.java?rev=292865&r1=292864&r2=292865&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParserFactory.java 
(original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParserFactory.java 
Fri Sep 30 15:11:19 2005
@@ -21,6 +21,12 @@
 
 // Nutch imports
 import org.apache.nutch.plugin.Extension;
+import org.apache.nutch.util.NutchConf;
+
+// JDK imports
+import java.io.File;
+
+import java.net.MalformedURLException;
 
 
 /**
@@ -30,9 +36,32 @@
  * @version 1.0
  */
 public class TestParserFactory extends TestCase {
+       
+  private NutchConf conf = null;
   
   public TestParserFactory(String name) { super(name); }
   
+  private void initNutchConf(String testFile) {
+    // set the Nutch Conf property for parse.plugin.file.url
+    // to ${test.src.dir}/org/apache/nutch/parse/parse-plugin-test.xml
+    String testParsePluginFileUrl = null;
+    try{
+      testParsePluginFileUrl = new File(System.getProperty("test.src.dir")
+          +"/org/apache/nutch/parse/" + testFile).toURL().toString();
+      NutchConf.get().set("parse.plugin.file.url",testParsePluginFileUrl);
+      this.conf = NutchConf.get();
+    }
+    catch(MalformedURLException e){
+      throw new RuntimeException("Unable to load parse-plugins.xml file from 
URL: "+testParsePluginFileUrl);
+    }    
+  }
+  
+  /** Inits the Test Case: loads the Nutch Conf instance. */
+  protected void setUp() throws Exception {
+    if (conf == null) {
+      initNutchConf("parse-plugin-test.xml");
+    }
+  }
   
   /** Unit test for <code>getParser(String, String)</code> method. */
   public void testGetParser() throws Exception {
@@ -42,12 +71,46 @@
     assertNotNull(parser);
   }
   
-  /** Unit test for <code>getExtension(String)</code> method. */
-  public void testGetExtension() throws Exception {
-    Extension ext = ParserFactory.getExtension("text/html");
+  /** Unit test for <code>getExtensions(String)</code> method. */
+  public void testGetExtensions() throws Exception {
+    Extension ext = (Extension)ParserFactory.getExtensions("text/html").get(0);
     assertEquals("parse-html", ext.getDescriptor().getPluginId());
-    ext = ParserFactory.getExtension("foo/bar");
+    ext = (Extension)ParserFactory.getExtensions("foo/bar").get(0);
     assertEquals("parse-text", ext.getDescriptor().getPluginId());
   }
   
+  /** Unit test to check <code>getParsers</code> method */
+  public void testGetParsers() throws Exception {
+    Parser [] parsers = ParserFactory.getParsers("text/html", 
"http://foo.com";);
+    assertNotNull(parsers);
+    assertEquals(1, parsers.length);
+    assertEquals("org.apache.nutch.parse.html.HtmlParser",
+        parsers[0].getClass().getName());
+    
+    parsers = ParserFactory.getParsers("application/x-javascript",
+    "http://foo.com";);
+    assertNotNull(parsers);
+    assertEquals(1, parsers.length);
+    assertEquals("org.apache.nutch.parse.js.JSParseFilter",
+        parsers[0].getClass().getName());
+    
+    parsers = ParserFactory.getParsers("text/plain", "http://foo.com";);
+    assertNotNull(parsers);
+    assertEquals(1, parsers.length);
+    assertEquals("org.apache.nutch.parse.text.TextParser",
+        parsers[0].getClass().getName());
+    
+    Parser parser1 = ParserFactory.getParsers("text/plain", 
"http://foo.com";)[0];
+    Parser parser2 = ParserFactory.getParsers("*", "http://foo.com";)[0];
+   
+    assertEquals("Different instances!", parser1.hashCode(), 
parser2.hashCode());
+    
+    //test and make sure that the rss parser is loaded even though its 
plugin.xml
+    //doesn't claim to support text/rss, only application/rss+xml
+    parsers = ParserFactory.getParsers("text/rss","http://foo.com";);
+    assertNotNull(parsers);
+    assertEquals(1,parsers.length);
+    
assertEquals("org.apache.nutch.parse.rss.RSSParser",parsers[0].getClass().getName());
+  }
+ 
 }

Added: lucene/nutch/trunk/src/test/org/apache/nutch/parse/parse-plugin-test.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/parse/parse-plugin-test.xml?rev=292865&view=auto
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/parse/parse-plugin-test.xml 
(added)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/parse/parse-plugin-test.xml 
Fri Sep 30 15:11:19 2005
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+       Copyright 2005 The Apache Software Foundation
+       
+       Licensed under the Apache License, Version 2.0 (the "License");
+       you may not use this file except in compliance with the License.
+       You may obtain a copy of the License at
+       
+       http://www.apache.org/licenses/LICENSE-2.0
+       
+       Unless required by applicable law or agreed to in writing, software
+       distributed under the License is distributed on an "AS IS" BASIS,
+       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+       See the License for the specific language governing permissions and
+       limitations under the License.
+       
+       Author     : mattmann 
+       Description: Test parse-plugins.xml file. 
+-->
+
+<parse-plugins>
+
+       <!--  by default if the mimeType is set to *, or 
+               can't be determined, use parse-text -->
+       <mimeType name="*">
+                <plugin id="parse-text" />
+       </mimeType>
+       
+    <!--  test these 4 plugins -->
+     <mimeType name="text/html">
+          <plugin id="parse-html"/>
+     </mimeType>
+       
+        <mimeType name="text/plain">
+          <plugin id="parse-text"/>
+        </mimeType>
+        
+        <mimeType name="application/x-javascript">
+             <plugin id="parse-js"/>
+        </mimeType>
+        
+        <mimeType name="text/rss">
+             <plugin id="parse-rss"/>
+        </mimeType>
+
+</parse-plugins>

Propchange: 
lucene/nutch/trunk/src/test/org/apache/nutch/parse/parse-plugin-test.xml
------------------------------------------------------------------------------
    svn:eol-style = native


Reply via email to