Author: nick Date: Mon Jun 8 15:28:45 2015 New Revision: 1684206 URL: http://svn.apache.org/r1684206 Log: Allow Tika Config xml to have a ParserDecorator with child parsers, and note about how this can work in the javadocs
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=1684206&r1=1684205&r2=1684206&view=diff ============================================================================== --- tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original) +++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Mon Jun 8 15:28:45 2015 @@ -381,8 +381,10 @@ public class TikaConfig { + " configuration element: " + name); } - // Is this a composite parser? If so, support recursion - if (CompositeParser.class.isAssignableFrom(parserClass)) { + // Is this a composite or decorated parser? If so, support recursion + if (CompositeParser.class.isAssignableFrom(parserClass) || + ParserDecorator.class.isAssignableFrom(parserClass)) { + // Get the child parsers for it List<Parser> childParsers = new ArrayList<Parser>(); NodeList childParserNodes = parserNode.getElementsByTagName("parser"); @@ -407,20 +409,36 @@ public class TikaConfig { // Create the Composite Parser Constructor<? extends Parser> c = null; - if (c == null) { + MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry(); + if (parser == null) { try { c = parserClass.getConstructor(MediaTypeRegistry.class, ServiceLoader.class, Collection.class); - parser = c.newInstance(mimeTypes.getMediaTypeRegistry(), loader, excludeParsers); + parser = c.newInstance(registry, loader, excludeParsers); } catch (NoSuchMethodException me) {} } - if (c == null) { + if (parser == null) { try { c = parserClass.getConstructor(MediaTypeRegistry.class, List.class, Collection.class); - parser = c.newInstance(mimeTypes.getMediaTypeRegistry(), childParsers, excludeParsers); + parser = c.newInstance(registry, childParsers, excludeParsers); + } catch (NoSuchMethodException me) {} + } + // Create as a Parser Decorator + if (parser == null && ParserDecorator.class.isAssignableFrom(parserClass)) { + try { + CompositeParser cp = null; + if (childParsers.size() == 1 && excludeParsers.size() == 0 && + childParsers.get(0) instanceof CompositeParser) { + cp = (CompositeParser)childParsers.get(0); + } else { + cp = new CompositeParser(registry, childParsers, excludeParsers); + } + c = parserClass.getConstructor(Parser.class); + parser = c.newInstance(cp); } catch (NoSuchMethodException me) {} } - if (c == null) { + // Default constructor + if (parser == null) { parser = parserClass.newInstance(); } } else { Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java?rev=1684206&r1=1684205&r2=1684206&view=diff ============================================================================== --- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java (original) +++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java Mon Jun 8 15:28:45 2015 @@ -30,10 +30,12 @@ import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; /** - * Decorator base class for the {@link Parser} interface. This class - * simply delegates all parsing calls to an underlying decorated parser - * instance. Subclasses can provide extra decoration by overriding the + * Decorator base class for the {@link Parser} interface. + * <p>This class simply delegates all parsing calls to an underlying decorated + * parser instance. Subclasses can provide extra decoration by overriding the * parse method. + * <p>To decorate several different parsers at the same time, wrap them in + * a {@link CompositeParser} instance first. */ public class ParserDecorator extends AbstractParser {