Author: nick
Date: Mon Jun 8 15:28:45 2015
New Revision: 1684206
URL: http://svn.apache.org/r1684206
Log:
Allow Tika Config xml to have a ParserDecorator with child parsers, and note
about how this can work in the javadocs
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=1684206&r1=1684205&r2=1684206&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
(original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
Mon Jun 8 15:28:45 2015
@@ -381,8 +381,10 @@ public class TikaConfig {
+ " configuration element: " + name);
}
- // Is this a composite parser? If so, support recursion
- if (CompositeParser.class.isAssignableFrom(parserClass)) {
+ // Is this a composite or decorated parser? If so, support
recursion
+ if (CompositeParser.class.isAssignableFrom(parserClass) ||
+ ParserDecorator.class.isAssignableFrom(parserClass)) {
+
// Get the child parsers for it
List<Parser> childParsers = new ArrayList<Parser>();
NodeList childParserNodes =
parserNode.getElementsByTagName("parser");
@@ -407,20 +409,36 @@ public class TikaConfig {
// Create the Composite Parser
Constructor<? extends Parser> c = null;
- if (c == null) {
+ MediaTypeRegistry registry = mimeTypes.getMediaTypeRegistry();
+ if (parser == null) {
try {
c =
parserClass.getConstructor(MediaTypeRegistry.class, ServiceLoader.class,
Collection.class);
- parser =
c.newInstance(mimeTypes.getMediaTypeRegistry(), loader, excludeParsers);
+ parser = c.newInstance(registry, loader,
excludeParsers);
}
catch (NoSuchMethodException me) {}
}
- if (c == null) {
+ if (parser == null) {
try {
c =
parserClass.getConstructor(MediaTypeRegistry.class, List.class,
Collection.class);
- parser =
c.newInstance(mimeTypes.getMediaTypeRegistry(), childParsers, excludeParsers);
+ parser = c.newInstance(registry, childParsers,
excludeParsers);
+ } catch (NoSuchMethodException me) {}
+ }
+ // Create as a Parser Decorator
+ if (parser == null &&
ParserDecorator.class.isAssignableFrom(parserClass)) {
+ try {
+ CompositeParser cp = null;
+ if (childParsers.size() == 1 && excludeParsers.size()
== 0 &&
+ childParsers.get(0) instanceof
CompositeParser) {
+ cp = (CompositeParser)childParsers.get(0);
+ } else {
+ cp = new CompositeParser(registry, childParsers,
excludeParsers);
+ }
+ c = parserClass.getConstructor(Parser.class);
+ parser = c.newInstance(cp);
} catch (NoSuchMethodException me) {}
}
- if (c == null) {
+ // Default constructor
+ if (parser == null) {
parser = parserClass.newInstance();
}
} else {
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java?rev=1684206&r1=1684205&r2=1684206&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParserDecorator.java
Mon Jun 8 15:28:45 2015
@@ -30,10 +30,12 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
/**
- * Decorator base class for the {@link Parser} interface. This class
- * simply delegates all parsing calls to an underlying decorated parser
- * instance. Subclasses can provide extra decoration by overriding the
+ * Decorator base class for the {@link Parser} interface.
+ * <p>This class simply delegates all parsing calls to an underlying decorated
+ * parser instance. Subclasses can provide extra decoration by overriding the
* parse method.
+ * <p>To decorate several different parsers at the same time, wrap them in
+ * a {@link CompositeParser} instance first.
*/
public class ParserDecorator extends AbstractParser {