Author: tpalsulich
Date: Tue Mar 31 21:37:31 2015
New Revision: 1670487

URL: http://svn.apache.org/r1670487
Log:
TIKA-1558. Refactor Parser blacklisting.

Added:
    
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklistsub.xml
Removed:
    
tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParser.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserSubclass.java
    
tika/trunk/tika-core/src/test/java/org/apache/tika/parser/BlacklistedParserTest.java
    tika/trunk/tika-core/src/test/resources/META-INF/
    
tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist2_file.blacklist2
    
tika/trunk/tika-core/src/test/resources/org/apache/tika/parser/blacklist_file.blacklist
Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
    
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java

Modified: tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1670487&r1=1670486&r2=1670487&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Tue Mar 31 21:37:31 2015
@@ -61,9 +61,6 @@ Release 1.8 - Current Development
   * Detect Global Change Master Directory (GCMD) Directory
     Interchange Format (DIF) files (TIKA-1561).
 
-  * Parsers and other services can now be disabled with a
-    blacklist META-INF file (TIKA-1558).
-
   * Tika's JAX-RS server can now return stacktraces for
     parse exceptions. (TIKA-1323)
 

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java?rev=1670487&r1=1670486&r2=1670487&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java 
Tue Mar 31 21:37:31 2015
@@ -218,8 +218,7 @@ public class ServiceLoader {
     }
 
     /**
-     * Returns all the available service providers of the given type
-     * that aren't blacklisted.
+     * Returns all the available service providers of the given type.
      *
      * @param iface service provider interface
      * @return available service providers
@@ -254,27 +253,13 @@ public class ServiceLoader {
                         providers.add((T) service.service);
                     }
                 }
-                return removeBlacklisted(providers, iface);
+                return providers;
             }
         } else {
             return new ArrayList<T>(0);
         }
     }
 
-    private <T> List<T> removeBlacklisted(List<T> providers, Class<T> iface) {
-        List<T> blacklist = loadStaticServiceProvidersBlacklist(iface);
-        List<T> copy = new ArrayList<T>(providers);
-
-        for (T provider : copy) {
-            for (T blacklistedProvider : blacklist) {
-                if 
(blacklistedProvider.getClass().isAssignableFrom(provider.getClass())){
-                    providers.remove(provider);
-                }
-            }
-        }
-        return providers;
-    }
-
     /**
      * Returns the defined static service providers of the given type, without
      * attempting to load them.
@@ -305,68 +290,6 @@ public class ServiceLoader {
     }
 
     /**
-     * Returns the blacklisted static service providers of the given type, 
without
-     * attempting to load them.
-     * The providers are loaded using the service provider mechanism using
-     * the configured class loader (if any).
-     *
-     * @since Apache Tika 1.8
-     * @param iface service provider interface
-     * @return static list of uninitialised blacklisted service providers.
-     *
-     */
-    protected <T> List<String> 
identifyStaticServiceProvidersBlacklist(Class<T> iface) {
-        List<String> names = new ArrayList<String>();
-
-        if (loader != null) {
-            String fileName = iface.getName() + ".blacklist";
-            Enumeration<URL> resources =
-                    findServiceResources("META-INF/services/" + fileName);
-            for (URL resource : Collections.list(resources)) {
-                try {
-                    collectServiceClassNames(resource, names);
-                } catch (IOException e) {
-                    handler.handleLoadError(fileName, e);
-                }
-            }
-        }
-
-        return names;
-    }
-
-    /**
-     * Returns the available blacklisted static service providers of the given 
type.
-     * The providers are loaded using the service provider mechanism using
-     * the configured class loader (if any). The returned list is newly
-     * allocated and may be freely modified by the caller.
-     *
-     * @since Apache Tika 1.8
-     * @param iface service provider interface
-     * @return blacklisted static service providers
-     */
-    @SuppressWarnings("unchecked")
-    public <T> List<T> loadStaticServiceProvidersBlacklist(Class<T> iface) {
-        List<T> providers = new ArrayList<T>();
-
-        if (loader != null) {
-            List<String> names = 
identifyStaticServiceProvidersBlacklist(iface);
-
-            for (String name : names) {
-                try {
-                    Class<?> klass = loader.loadClass(name);
-                    if (iface.isAssignableFrom(klass)) {
-                        providers.add((T) klass.newInstance());
-                    }
-                } catch (Throwable t) {
-                    handler.handleLoadError(name, t);
-                }
-            }
-        }
-
-        return providers;
-    }
-
-    /**
      * Returns the available static service providers of the given type.
      * The providers are loaded using the service provider mechanism using
      * the configured class loader (if any). The returned list is newly
@@ -394,7 +317,7 @@ public class ServiceLoader {
                 }
             }
         }
-        return removeBlacklisted(providers, iface);
+        return providers;
     }
 
     private static final Pattern COMMENT = Pattern.compile("#.*");

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=1670487&r1=1670486&r2=1670487&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java 
Tue Mar 31 21:37:31 2015
@@ -70,7 +70,7 @@ public class CompositeParser extends Abs
         } else {
             this.parsers = new ArrayList<Parser>();
             for (Parser p : parsers) {
-                if (! excludeParsers.contains(p.getClass())) {
+                if (!isExcluded(excludeParsers, p.getClass())) {
                     this.parsers.add(p);
                 }
             }
@@ -99,6 +99,17 @@ public class CompositeParser extends Abs
         return map;
     }
 
+    private boolean isExcluded(Collection<Class<? extends Parser>> 
excludeParsers, Class<? extends Parser> p){
+        return excludeParsers.contains(p) || assignableFrom(excludeParsers, p);
+    }
+
+    private boolean assignableFrom(Collection<Class<? extends Parser>> 
excludeParsers, Class<? extends Parser> p) {
+        for (Class<? extends Parser> e : excludeParsers) {
+            if (e.isAssignableFrom(p)) return true;
+        }
+        return false;
+    }
+
     /**
      * Utility method that goes through all the component parsers and finds
      * all media types for which more than one parser declares support. This

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java?rev=1670487&r1=1670486&r2=1670487&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/config/TikaParserConfigTest.java
 Tue Mar 31 21:37:31 2015
@@ -24,6 +24,7 @@ import static org.junit.Assert.assertTru
 import static org.junit.Assert.fail;
 
 import java.net.URL;
+import java.util.List;
 
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.CompositeParser;
@@ -33,6 +34,7 @@ import org.apache.tika.parser.ParseConte
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserDecorator;
 import org.apache.tika.parser.executable.ExecutableParser;
+import org.apache.tika.parser.xml.XMLParser;
 import org.junit.After;
 import org.junit.Test;
 
@@ -131,4 +133,33 @@ public class TikaParserConfigTest {
                 fail("Shouldn't have the Executable Parser from config");
         }
     }
+    /**
+     * TIKA-1558 It should be possible to exclude Parsers from being picked up 
by
+     * DefaultParser.
+     */
+    @Test
+    public void defaultParserBlacklist() throws Exception {
+        TikaConfig config = new TikaConfig();
+        CompositeParser cp = (CompositeParser) config.getParser();
+        List<Parser> parsers = cp.getAllComponentParsers();
+
+        boolean hasXML = false;
+        for (Parser p : parsers) {
+            if (p instanceof XMLParser) {
+                hasXML = true;
+                break;
+            }
+        }
+        assertTrue("Default config should include an XMLParser.", hasXML);
+
+        // This custom TikaConfig should exclude all AbstractParsers.
+        config = getConfig("TIKA-1558-blacklistsub.xml");
+        cp = (CompositeParser) config.getParser();
+        parsers = cp.getAllComponentParsers();
+
+        for (Parser p : parsers) {
+            if (p instanceof XMLParser)
+                fail("Custom config should not include an XMLParser.");
+        }
+    }
 }

Added: 
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklistsub.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklistsub.xml?rev=1670487&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklistsub.xml
 (added)
+++ 
tika/trunk/tika-parsers/src/test/resources/org/apache/tika/config/TIKA-1558-blacklistsub.xml
 Tue Mar 31 21:37:31 2015
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <parsers>
+    <parser class="org.apache.tika.parser.DefaultParser">
+      <parser-exclude class="org.apache.tika.parser.xml.XMLParser"/>
+    </parser>
+  </parsers>
+</properties>


Reply via email to