Author: tpalsulich
Date: Tue Mar 31 21:37:31 2015
New Revision: 1670487
URL: http://svn.apache.org/r1670487
Log:
TIKA-1558. Refactor Parser blacklisting.
Added:
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklistsub.xml
Removed:
tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParser.java
tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserSubclass.java
tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserTest.java
tika/trunk/tika-core/src/test/resources/META-INF/
tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist2_file.blacklist2
tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist_file.blacklist
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
Modified: tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1670487&r1=1670486&r2=1670487&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Tue Mar 31 21:37:31 2015
@@ -61,9 +61,6 @@ Release 1.8 - Current Development
* Detect Global Change Master Directory (GCMD) Directory
Interchange Format (DIF) files (TIKA-1561).
- * Parsers and other services can now be disabled with a
- blacklist META-INF file (TIKA-1558).
-
* Tika's JAX-RS server can now return stacktraces for
parse exceptions. (TIKA-1323)
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java?rev=1670487&r1=1670486&r2=1670487&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
Tue Mar 31 21:37:31 2015
@@ -218,8 +218,7 @@ public class ServiceLoader {
}
/**
- * Returns all the available service providers of the given type
- * that aren't blacklisted.
+ * Returns all the available service providers of the given type.
*
* @param iface service provider interface
* @return available service providers
@@ -254,27 +253,13 @@ public class ServiceLoader {
providers.add((T) service.service);
}
}
- return removeBlacklisted(providers, iface);
+ return providers;
}
} else {
return new ArrayList<T>(0);
}
}
- private <T> List<T> removeBlacklisted(List<T> providers, Class<T> iface) {
- List<T> blacklist = loadStaticServiceProvidersBlacklist(iface);
- List<T> copy = new ArrayList<T>(providers);
-
- for (T provider : copy) {
- for (T blacklistedProvider : blacklist) {
- if
(blacklistedProvider.getClass().isAssignableFrom(provider.getClass())){
- providers.remove(provider);
- }
- }
- }
- return providers;
- }
-
/**
* Returns the defined static service providers of the given type, without
* attempting to load them.
@@ -305,68 +290,6 @@ public class ServiceLoader {
}
/**
- * Returns the blacklisted static service providers of the given type,
without
- * attempting to load them.
- * The providers are loaded using the service provider mechanism using
- * the configured class loader (if any).
- *
- * @since Apache Tika 1.8
- * @param iface service provider interface
- * @return static list of uninitialised blacklisted service providers.
- *
- */
- protected <T> List<String>
identifyStaticServiceProvidersBlacklist(Class<T> iface) {
- List<String> names = new ArrayList<String>();
-
- if (loader != null) {
- String fileName = iface.getName() + ".blacklist";
- Enumeration<URL> resources =
- findServiceResources("META-INF/services/" + fileName);
- for (URL resource : Collections.list(resources)) {
- try {
- collectServiceClassNames(resource, names);
- } catch (IOException e) {
- handler.handleLoadError(fileName, e);
- }
- }
- }
-
- return names;
- }
-
- /**
- * Returns the available blacklisted static service providers of the given
type.
- * The providers are loaded using the service provider mechanism using
- * the configured class loader (if any). The returned list is newly
- * allocated and may be freely modified by the caller.
- *
- * @since Apache Tika 1.8
- * @param iface service provider interface
- * @return blacklisted static service providers
- */
- @SuppressWarnings("unchecked")
- public <T> List<T> loadStaticServiceProvidersBlacklist(Class<T> iface) {
- List<T> providers = new ArrayList<T>();
-
- if (loader != null) {
- List<String> names =
identifyStaticServiceProvidersBlacklist(iface);
-
- for (String name : names) {
- try {
- Class<?> klass = loader.loadClass(name);
- if (iface.isAssignableFrom(klass)) {
- providers.add((T) klass.newInstance());
- }
- } catch (Throwable t) {
- handler.handleLoadError(name, t);
- }
- }
- }
-
- return providers;
- }
-
- /**
* Returns the available static service providers of the given type.
* The providers are loaded using the service provider mechanism using
* the configured class loader (if any). The returned list is newly
@@ -394,7 +317,7 @@ public class ServiceLoader {
}
}
}
- return removeBlacklisted(providers, iface);
+ return providers;
}
private static final Pattern COMMENT = Pattern.compile("#.*");
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=1670487&r1=1670486&r2=1670487&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
Tue Mar 31 21:37:31 2015
@@ -70,7 +70,7 @@ public class CompositeParser extends Abs
} else {
this.parsers = new ArrayList<Parser>();
for (Parser p : parsers) {
- if (! excludeParsers.contains(p.getClass())) {
+ if (!isExcluded(excludeParsers, p.getClass())) {
this.parsers.add(p);
}
}
@@ -99,6 +99,17 @@ public class CompositeParser extends Abs
return map;
}
+ private boolean isExcluded(Collection<Class<? extends Parser>>
excludeParsers, Class<? extends Parser> p){
+ return excludeParsers.contains(p) || assignableFrom(excludeParsers, p);
+ }
+
+ private boolean assignableFrom(Collection<Class<? extends Parser>>
excludeParsers, Class<? extends Parser> p) {
+ for (Class<? extends Parser> e : excludeParsers) {
+ if (e.isAssignableFrom(p)) return true;
+ }
+ return false;
+ }
+
/**
* Utility method that goes through all the component parsers and finds
* all media types for which more than one parser declares support. This
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java?rev=1670487&r1=1670486&r2=1670487&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
Tue Mar 31 21:37:31 2015
@@ -24,6 +24,7 @@ import static org.junit.Assert.assertTru
import static org.junit.Assert.fail;
import java.net.URL;
+import java.util.List;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.CompositeParser;
@@ -33,6 +34,7 @@ import org.apache.tika.parser.ParseConte
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParserDecorator;
import org.apache.tika.parser.executable.ExecutableParser;
+import org.apache.tika.parser.xml.XMLParser;
import org.junit.After;
import org.junit.Test;
@@ -131,4 +133,33 @@ public class TikaParserConfigTest {
fail("Shouldn't have the Executable Parser from config");
}
}
+ /**
+ * TIKA-1558 It should be possible to exclude Parsers from being picked up
by
+ * DefaultParser.
+ */
+ @Test
+ public void defaultParserBlacklist() throws Exception {
+ TikaConfig config = new TikaConfig();
+ CompositeParser cp = (CompositeParser) config.getParser();
+ List<Parser> parsers = cp.getAllComponentParsers();
+
+ boolean hasXML = false;
+ for (Parser p : parsers) {
+ if (p instanceof XMLParser) {
+ hasXML = true;
+ break;
+ }
+ }
+ assertTrue("Default config should include an XMLParser.", hasXML);
+
+ // This custom TikaConfig should exclude all AbstractParsers.
+ config = getConfig("TIKA-1558-blacklistsub.xml");
+ cp = (CompositeParser) config.getParser();
+ parsers = cp.getAllComponentParsers();
+
+ for (Parser p : parsers) {
+ if (p instanceof XMLParser)
+ fail("Custom config should not include an XMLParser.");
+ }
+ }
}
Added:
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklistsub.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklistsub.xml?rev=1670487&view=auto
==============================================================================
---
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklistsub.xml
(added)
+++
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklistsub.xml
Tue Mar 31 21:37:31 2015
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <parsers>
+ <parser class="org.apache.tika.parser.DefaultParser">
+ <parser-exclude class="org.apache.tika.parser.xml.XMLParser"/>
+ </parser>
+ </parsers>
+</properties>