Author: jukka
Date: Tue Oct 16 09:11:00 2007
New Revision: 585182
URL: http://svn.apache.org/viewvc?rev=585182&view=rev
Log:
TIKA-71 - Remove ParserConfig and ParserFactory
Removed:
incubator/tika/trunk/src/main/java/org/apache/tika/config/ParserConfig.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java
Modified:
incubator/tika/trunk/CHANGES.txt
incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java
incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
Modified: incubator/tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=585182&r1=585181&r2=585182&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Tue Oct 16 09:11:00 2007
@@ -117,3 +117,5 @@
52. TIKA-67 - Add an auto-detecting Parser implementation (jukka)
53. TIKA-70 - Better MIME information for the Open Document formats (jukka)
+
+54. TIKA-71 - Remove ParserConfig and ParserFactory (jukka)
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java?rev=585182&r1=585181&r2=585182&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/config/TikaConfig.java
Tue Oct 16 09:11:00 2007
@@ -27,6 +27,8 @@
//TIKA imports
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.mime.MimeUtils;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserPostProcessor;
import org.apache.tika.utils.Utils;
//JDOM imports
@@ -40,12 +42,11 @@
* Parse xml config file.
*/
public class TikaConfig {
-
+
public static final String DEFAULT_CONFIG_LOCATION =
"/org/apache/tika/tika-config.xml";
- private final Map<String, ParserConfig> configs =
- new HashMap<String, ParserConfig>();
+ private final Map<String, Parser> parsers = new HashMap<String, Parser>();
private static MimeUtils mimeTypeRepo;
@@ -74,16 +75,30 @@
String mimeTypeRepoResource = mtr.getAttributeValue("resource");
mimeTypeRepo = new MimeUtils(mimeTypeRepoResource);
- for (Object parser : XPath.selectNodes(element, "//parser")) {
- ParserConfig config = new ParserConfig((Element) parser);
- for (Object child : ((Element) parser).getChildren("mime")) {
- configs.put(((Element) child).getTextTrim(), config);
+ for (Object node : XPath.selectNodes(element, "//parser")) {
+ String className = ((Element) node).getAttributeValue("class");
+ try {
+ Parser parser = new ParserPostProcessor(
+ (Parser) Class.forName(className).newInstance());
+ for (Object child : ((Element) node).getChildren("mime")) {
+ parsers.put(((Element) child).getTextTrim(), parser);
+ }
+ } catch (Exception e) {
+ throw new JDOMException(
+ "Invalid parser configuration: " + className, e);
}
}
}
- public ParserConfig getParserConfig(String mimeType) {
- return configs.get(mimeType);
+ /**
+ * Returns the parser instance configured for the given MIME type.
+ * Returns <code>null</code> if the given MIME type is unknown.
+ *
+ * @param mimeType MIME type
+ * @return configured Parser instance, or <code>null</code>
+ */
+ public Parser getParser(String mimeType) {
+ return parsers.get(mimeType);
}
public MimeTypes getMimeRepository(){
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=585182&r1=585181&r2=585182&view=diff
==============================================================================
---
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
(original)
+++
incubator/tika/trunk/src/main/java/org/apache/tika/parser/AutoDetectParser.java
Tue Oct 16 09:11:00 2007
@@ -21,7 +21,6 @@
import java.io.IOException;
import java.io.InputStream;
-import org.apache.tika.config.ParserConfig;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -76,17 +75,16 @@
MimeType type = getMimeType(stream, metadata);
metadata.set(Metadata.CONTENT_TYPE, type.getName());
- // Get the parser configuration for the detected MIME type
- ParserConfig pc = config.getParserConfig(type.getName());
- if (pc == null) {
- pc = config.getParserConfig(MimeTypes.DEFAULT);
+ // Get the parser configured for the detected MIME type
+ Parser parser = config.getParser(type.getName());
+ if (parser == null) {
+ parser = config.getParser(MimeTypes.DEFAULT);
}
- if (pc == null) {
- throw new TikaException("No parsers available for this document");
+ if (parser == null) {
+ throw new TikaException("No parsers available: " + type.getName());
}
- // Instantiate the configured parser and use it to parse the document
- Parser parser = ParserFactory.getParser(pc);
+ // Parse the document
parser.parse(stream, handler, metadata);
}
Modified:
incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java?rev=585182&r1=585181&r2=585182&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java
(original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java
Tue Oct 16 09:11:00 2007
@@ -28,13 +28,11 @@
import java.util.ArrayList;
import java.util.List;
-import org.apache.tika.config.ParserConfig;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMimeKeys;
import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParserFactory;
import org.apache.tika.parser.WriteOutContentHandler;
import org.xml.sax.SAXException;
@@ -57,8 +55,7 @@
*/
public static Parser getParser(String mimeType, TikaConfig config)
throws TikaException {
- ParserConfig pc = config.getParserConfig(mimeType);
- return ParserFactory.getParser(pc);
+ return config.getParser(mimeType);
}
/**
@@ -171,8 +168,7 @@
InputStream stream, TikaConfig config, String mimeType)
throws TikaException, IOException {
try {
- ParserConfig pc = config.getParserConfig(mimeType);
- Parser parser = ParserFactory.getParser(pc);
+ Parser parser = config.getParser(mimeType);
StringWriter writer = new StringWriter();
parser.parse(
stream, new WriteOutContentHandler(writer), new
Metadata());
Modified: incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
URL:
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java?rev=585182&r1=585181&r2=585182&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
(original)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java Tue Oct
16 09:11:00 2007
@@ -24,11 +24,9 @@
import junit.framework.TestCase;
-import org.apache.tika.config.ParserConfig;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParserFactory;
import org.apache.tika.utils.ParseUtils;
import org.apache.tika.utils.Utils;
import org.jdom.JDOMException;
@@ -103,9 +101,7 @@
String s2 = ParseUtils.getStringContent(
file, tc, "application/vnd.ms-powerpoint");
assertEquals(s1, s2);
- ParserConfig config =
- tc.getParserConfig("application/vnd.ms-powerpoint");
- Parser parser = ParserFactory.getParser(config);
+ Parser parser = tc.getParser("application/vnd.ms-powerpoint");
Metadata metadata = new Metadata();
InputStream stream = new FileInputStream(file);
try {
@@ -121,8 +117,7 @@
String s1 = ParseUtils.getStringContent(file, tc);
String s2 = ParseUtils.getStringContent(file, tc,
"application/msword");
assertEquals(s1, s2);
- ParserConfig config = tc.getParserConfig("application/msword");
- Parser parser = ParserFactory.getParser(config);
+ Parser parser = tc.getParser("application/msword");
Metadata metadata = new Metadata();
InputStream stream = new FileInputStream(file);
try {
@@ -146,8 +141,7 @@
assertEquals(s1, s2);
assertTrue("Text does not contain '" + expected + "'", s1
.contains(expected));
- ParserConfig config = tc.getParserConfig("application/vnd.ms-excel");
- Parser parser = ParserFactory.getParser(config);
+ Parser parser = tc.getParser("application/vnd.ms-excel");
Metadata metadata = new Metadata();
InputStream stream = new FileInputStream(file);
try {
@@ -172,8 +166,7 @@
String s2 = ParseUtils.getStringContent(file, tc, "text/html");
assertEquals(s1, s2);
- ParserConfig config = tc.getParserConfig("text/html");
- Parser parser = ParserFactory.getParser(config);
+ Parser parser = tc.getParser("text/html");
assertNotNull(parser);
}