Author: nick
Date: Sun Aug 16 18:35:26 2015
New Revision: 1696160
URL: http://svn.apache.org/r1696160
Log:
TIKA-1708 If the Tika Config detector entry calls for MimeTypes, use the
already created one, avoid creating a new empty one
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=1696160&r1=1696159&r2=1696160&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
Sun Aug 16 18:35:26 2015
@@ -434,6 +434,8 @@ public class TikaConfig {
abstract Class<? extends T> getLoaderClass(); // Generics workaround
abstract boolean isComposite(T loaded);
abstract boolean isComposite(Class<? extends T> loadedClass);
+ abstract T preLoadOne(Class<? extends T> loadedClass, String
classname,
+ MimeTypes mimeTypes) throws TikaException;
abstract CT createDefault(MimeTypes mimeTypes, ServiceLoader loader);
abstract CT createComposite(List<T> loaded, MimeTypes mimeTypes,
ServiceLoader loader);
abstract T createComposite(Class<? extends T> compositeClass,
@@ -479,15 +481,11 @@ public class TikaConfig {
try {
Class<? extends T> loadedClass =
loader.getServiceClass(getLoaderClass(), name);
-
- // Check for classes which can't be set in config
- if (AutoDetectParser.class.isAssignableFrom(loadedClass)) {
- // https://issues.apache.org/jira/browse/TIKA-866
- throw new TikaException(
- "AutoDetectParser not supported in a <parser>"
- + " configuration element: " + name);
- }
+ // Do pre-load checks and short-circuits
+ loaded = preLoadOne(loadedClass, name, mimeTypes);
+ if (loaded != null) return loaded;
+
// Is this a composite or decorated class? If so, support
recursion
if (isComposite(loadedClass)) {
// Get the child objects for it
@@ -562,6 +560,19 @@ public class TikaConfig {
return Parser.class;
}
@Override
+ Parser preLoadOne(Class<? extends Parser> loadedClass, String
classname,
+ MimeTypes mimeTypes) throws TikaException {
+ // Check for classes which can't be set in config
+ if (AutoDetectParser.class.isAssignableFrom(loadedClass)) {
+ // https://issues.apache.org/jira/browse/TIKA-866
+ throw new TikaException(
+ "AutoDetectParser not supported in a <parser>"
+ + " configuration element: " + classname);
+ }
+ // Continue with normal loading
+ return null;
+ }
+ @Override
boolean isComposite(Parser loaded) {
return loaded instanceof CompositeParser;
}
@@ -657,6 +668,17 @@ public class TikaConfig {
return Detector.class;
}
@Override
+ Detector preLoadOne(Class<? extends Detector> loadedClass, String
classname,
+ MimeTypes mimeTypes) throws TikaException {
+ // If they asked for the mime types as a detector, give
+ // them the one we've already created. TIKA-1708
+ if (MimeTypes.class.equals(loadedClass)) {
+ return mimeTypes;
+ }
+ // Continue with normal loading
+ return null;
+ }
+ @Override
boolean isComposite(Detector loaded) {
return loaded instanceof CompositeDetector;
}
@@ -728,6 +750,12 @@ public class TikaConfig {
return Translator.class;
}
@Override
+ Translator preLoadOne(Class<? extends Translator> loadedClass, String
classname,
+ MimeTypes mimeTypes) throws TikaException {
+ // Continue with normal loading
+ return null;
+ }
+ @Override
boolean isComposite(Translator loaded) { return false; }
@Override
boolean isComposite(Class<? extends Translator> loadedClass) { return
false; }
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java?rev=1696160&r1=1696159&r2=1696160&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaDetectorConfigTest.java
Sun Aug 16 18:35:26 2015
@@ -76,38 +76,44 @@ public class TikaDetectorConfigTest exte
* that detection of PST files still works
*/
@Test
- @Ignore // Currently broken as per bug report
public void testPSTDetectionWithoutZipDetector() throws Exception {
// Check the one with an exclude
- TikaConfig config = getConfig("TIKA-1708-detector-default.xml");
- assertNotNull(config.getParser());
- assertNotNull(config.getDetector());
- CompositeDetector detectorWX = (CompositeDetector)config.getDetector();
+ TikaConfig configWX = getConfig("TIKA-1708-detector-default.xml");
+ assertNotNull(configWX.getParser());
+ assertNotNull(configWX.getDetector());
+ CompositeDetector detectorWX =
(CompositeDetector)configWX.getDetector();
// Check it has the POIFS one, but not the zip one
assertDetectors(detectorWX, true, false);
// Check the one with an explicit list
- config = getConfig("TIKA-1708-detector-composite.xml");
- assertNotNull(config.getParser());
- assertNotNull(config.getDetector());
- CompositeDetector detectorCL = (CompositeDetector)config.getDetector();
+ TikaConfig configCL = getConfig("TIKA-1708-detector-composite.xml");
+ assertNotNull(configCL.getParser());
+ assertNotNull(configCL.getDetector());
+ CompositeDetector detectorCL =
(CompositeDetector)configCL.getDetector();
assertEquals(2, detectorCL.getDetectors().size());
// Check it also has the POIFS one, but not the zip one
assertDetectors(detectorCL, true, false);
+ // Check that both detectors have a mimetypes with entries
+ assertTrue("Not enough mime types: " +
configWX.getMediaTypeRegistry().getTypes().size(),
+ configWX.getMediaTypeRegistry().getTypes().size() > 100);
+ assertTrue("Not enough mime types: " +
configCL.getMediaTypeRegistry().getTypes().size(),
+ configCL.getMediaTypeRegistry().getTypes().size() > 100);
+
+
// Now check they detect PST files correctly
TikaInputStream stream = TikaInputStream.get(
getResourceAsFile("/test-documents/testPST.pst"));
assertEquals(
- OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE.toString(),
+ OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE,
detectorWX.detect(stream, new Metadata())
);
assertEquals(
- OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE.toString(),
+ OutlookPSTParser.MS_OUTLOOK_PST_MIMETYPE,
detectorCL.detect(stream, new Metadata())
);
}