Author: jukka
Date: Tue Oct 16 09:11:00 2007
New Revision: 585182

URL: http://svn.apache.org/viewvc?rev=585182&view=rev
Log:
TIKA-71 - Remove ParserConfig and ParserFactory

Removed:
    incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java
    incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java
Modified:
    incubator/tika/trunk/CHANGES.txt
    incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
    
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
    incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java
    incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java

Modified: incubator/tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=585182&r1=585181&r2=585182&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Tue Oct 16 09:11:00 2007
@@ -117,3 +117,5 @@
 52. TIKA-67 - Add an auto-detecting Parser implementation (jukka)
 
 53. TIKA-70 - Better MIME information for the Open Document formats (jukka)
+
+54. TIKA-71 - Remove ParserConfig and ParserFactory (jukka)

Modified: 
incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java?rev=585182&r1=585181&r2=585182&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java 
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java 
Tue Oct 16 09:11:00 2007
@@ -27,6 +27,8 @@
 //TIKA imports
 import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.mime.MimeUtils;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserPostProcessor;
 import org.apache.tika.utils.Utils;
 
 //JDOM imports
@@ -40,12 +42,11 @@
  * Parse xml config file.
  */
 public class TikaConfig {
-    
+
     public static final String DEFAULT_CONFIG_LOCATION = 
         "/org/apache/tika/tika-config.xml";
 
-    private final Map<String, ParserConfig> configs =
-        new HashMap<String, ParserConfig>();
+    private final Map<String, Parser> parsers = new HashMap<String, Parser>();
     
     private static MimeUtils mimeTypeRepo;
 
@@ -74,16 +75,30 @@
         String mimeTypeRepoResource = mtr.getAttributeValue("resource");
         mimeTypeRepo = new MimeUtils(mimeTypeRepoResource);
 
-        for (Object parser : XPath.selectNodes(element, "//parser")) {
-            ParserConfig config = new ParserConfig((Element) parser);
-            for (Object child : ((Element) parser).getChildren("mime")) {
-                configs.put(((Element) child).getTextTrim(), config);
+        for (Object node : XPath.selectNodes(element, "//parser")) {
+            String className = ((Element) node).getAttributeValue("class");
+            try {
+                Parser parser = new ParserPostProcessor(
+                        (Parser) Class.forName(className).newInstance());
+                for (Object child : ((Element) node).getChildren("mime")) {
+                    parsers.put(((Element) child).getTextTrim(), parser);
+                }
+            } catch (Exception e) {
+                throw new JDOMException(
+                        "Invalid parser configuration: " + className, e);
             }
         }
     }
 
-    public ParserConfig getParserConfig(String mimeType) {
-        return configs.get(mimeType);
+    /**
+     * Returns the parser instance configured for the given MIME type.
+     * Returns <code>null</code> if the given MIME type is unknown.
+     *
+     * @param mimeType MIME type
+     * @return configured Parser instance, or <code>null</code>
+     */
+    public Parser getParser(String mimeType) {
+        return parsers.get(mimeType);
     }
     
     public MimeTypes getMimeRepository(){

Modified: 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=585182&r1=585181&r2=585182&view=diff
==============================================================================
--- 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java 
(original)
+++ 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java 
Tue Oct 16 09:11:00 2007
@@ -21,7 +21,6 @@
 import java.io.IOException;
 import java.io.InputStream;
 
-import org.apache.tika.config.ParserConfig;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
@@ -76,17 +75,16 @@
         MimeType type = getMimeType(stream, metadata);
         metadata.set(Metadata.CONTENT_TYPE, type.getName());
 
-        // Get the parser configuration for the detected MIME type
-        ParserConfig pc = config.getParserConfig(type.getName());
-        if (pc == null) {
-            pc = config.getParserConfig(MimeTypes.DEFAULT);
+        // Get the parser configured for the detected MIME type
+        Parser parser = config.getParser(type.getName());
+        if (parser == null) {
+            parser = config.getParser(MimeTypes.DEFAULT);
         }
-        if (pc == null) {
-            throw new TikaException("No parsers available for this document");
+        if (parser == null) {
+            throw new TikaException("No parsers available: " + type.getName());
         }
 
-        // Instantiate the configured parser and use it to parse the document
-        Parser parser = ParserFactory.getParser(pc);
+        // Parse the document
         parser.parse(stream, handler, metadata);
     }
 

Modified: 
incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java?rev=585182&r1=585181&r2=585182&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java 
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java 
Tue Oct 16 09:11:00 2007
@@ -28,13 +28,11 @@
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.tika.config.ParserConfig;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaMimeKeys;
 import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParserFactory;
 import org.apache.tika.parser.WriteOutContentHandler;
 import org.xml.sax.SAXException;
 
@@ -57,8 +55,7 @@
      */
     public static Parser getParser(String mimeType, TikaConfig config)
             throws TikaException {
-        ParserConfig pc = config.getParserConfig(mimeType);
-        return ParserFactory.getParser(pc);
+        return config.getParser(mimeType);
     }
 
     /**
@@ -171,8 +168,7 @@
             InputStream stream, TikaConfig config, String mimeType)
             throws TikaException, IOException {
         try {
-            ParserConfig pc = config.getParserConfig(mimeType);
-            Parser parser = ParserFactory.getParser(pc);
+            Parser parser = config.getParser(mimeType);
             StringWriter writer = new StringWriter();
             parser.parse(
                     stream, new WriteOutContentHandler(writer), new 
Metadata());

Modified: incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java?rev=585182&r1=585181&r2=585182&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java 
(original)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java Tue Oct 
16 09:11:00 2007
@@ -24,11 +24,9 @@
 
 import junit.framework.TestCase;
 
-import org.apache.tika.config.ParserConfig;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParserFactory;
 import org.apache.tika.utils.ParseUtils;
 import org.apache.tika.utils.Utils;
 import org.jdom.JDOMException;
@@ -103,9 +101,7 @@
         String s2 = ParseUtils.getStringContent(
                 file, tc, "application/vnd.ms-powerpoint");
         assertEquals(s1, s2);
-        ParserConfig config =
-            tc.getParserConfig("application/vnd.ms-powerpoint");
-        Parser parser = ParserFactory.getParser(config);
+        Parser parser = tc.getParser("application/vnd.ms-powerpoint");
         Metadata metadata = new Metadata();
         InputStream stream = new FileInputStream(file);
         try {
@@ -121,8 +117,7 @@
         String s1 = ParseUtils.getStringContent(file, tc);
         String s2 = ParseUtils.getStringContent(file, tc, 
"application/msword");
         assertEquals(s1, s2);
-        ParserConfig config = tc.getParserConfig("application/msword");
-        Parser parser = ParserFactory.getParser(config);
+        Parser parser = tc.getParser("application/msword");
         Metadata metadata = new Metadata();
         InputStream stream = new FileInputStream(file);
         try {
@@ -146,8 +141,7 @@
         assertEquals(s1, s2);
         assertTrue("Text does not contain '" + expected + "'", s1
                 .contains(expected));
-        ParserConfig config = tc.getParserConfig("application/vnd.ms-excel");
-        Parser parser = ParserFactory.getParser(config);
+        Parser parser = tc.getParser("application/vnd.ms-excel");
         Metadata metadata = new Metadata();
         InputStream stream = new FileInputStream(file);
         try {
@@ -172,8 +166,7 @@
         String s2 = ParseUtils.getStringContent(file, tc, "text/html");
         assertEquals(s1, s2);
 
-        ParserConfig config = tc.getParserConfig("text/html");
-        Parser parser = ParserFactory.getParser(config);
+        Parser parser = tc.getParser("text/html");
         assertNotNull(parser);
     }
 


Reply via email to