Author: nick
Date: Thu Sep 24 22:38:38 2015
New Revision: 1705181

URL: http://svn.apache.org/viewvc?rev=1705181&view=rev
Log:
Expand the Tika Config dumping support for parsers

Modified:
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java?rev=1705181&r1=1705180&r2=1705181&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
 Thu Sep 24 22:38:38 2015
@@ -24,6 +24,7 @@ import java.io.OutputStreamWriter;
 import java.io.StringWriter;
 import java.io.Writer;
 import java.nio.charset.Charset;
+import java.util.Collections;
 import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;
@@ -97,8 +98,8 @@ public class DumpTikaConfigExample {
     }
 
     private void addTranslator(Mode mode, Element rootElement, Document doc, 
TikaConfig config) {
-        // TikaConfig only reads the first translator from the list,
-        //  but it looks like it expects a list
+        // Unlike the other entries, TikaConfig only wants one of
+        //  these, and no outer <translators> list
         Translator translator = config.getTranslator();
         if (mode == Mode.MINIMAL && translator instanceof DefaultTranslator) {
             Node mimeComment = doc.createComment(
@@ -160,54 +161,65 @@ public class DumpTikaConfigExample {
         } else if (mode == Mode.MINIMAL) {
             mode = Mode.CURRENT;
         }
-        addParsers(mode, rootElement, doc, parser);
+
+        Element parsersElement = doc.createElement("parsers");
+        rootElement.appendChild(parsersElement);
+        
+        addParser(mode, parsersElement, doc, parser);
     }
-    private void addParsers(Mode mode, Element rootElement, Document doc, 
Parser parser) throws Exception {
-        Parser realParser = parser;
+    private void addParser(Mode mode, Element rootElement, Document doc, 
Parser parser) throws Exception {
+        // If the parser is decorated, is it a kind where we output the parser 
inside?
+        ParserDecorator decoration = null;
         if (parser instanceof ParserDecorator) {
-            realParser = ((ParserDecorator)parser).getWrappedParser();
+            if 
(parser.getClass().getName().startsWith(ParserDecorator.class.getName()+"$")) {
+                decoration = ((ParserDecorator)parser);
+                parser = decoration.getWrappedParser();
+            }
         }
         
-        List<Parser> children = null;
-        if (mode == Mode.CURRENT && realParser instanceof DefaultParser) {
-            // Don't output any children
-            // TODO List excluded children
-        } else if (realParser instanceof CompositeParser) {
-            children = ((CompositeParser)realParser).getAllComponentParsers();
-            if (realParser instanceof DefaultParser || parser == realParser) {
-                realParser = null;
+        boolean outputParser = true;
+        List<Parser> children = Collections.emptyList();
+        if (mode == Mode.CURRENT && parser instanceof DefaultParser) {
+            // Only output the parser, not the children
+        } else if (parser instanceof CompositeParser) {
+            children = ((CompositeParser)parser).getAllComponentParsers();
+            // Special case for a naked composite
+            if (parser.getClass().equals(CompositeParser.class)) {
+                outputParser = false;
+            }
+            // Special case for making Default to static
+            if (mode == Mode.STATIC && parser instanceof DefaultParser) {
+                outputParser = false;
             }
         }
         
-        Element parsersElement = doc.createElement("parsers");
-        rootElement.appendChild(parsersElement);
-        Element addParserTo = parsersElement;
-        
-        if (realParser != null) {
-            addParserTo = addParser(addParserTo, doc, parser, realParser);
+        if (outputParser) {
+            rootElement = addParser(rootElement, doc, parser, decoration);
         }
-        if (children != null && !children.isEmpty()) {
-            for (Parser p : children) {
-                addParser(addParserTo, doc, p, p);
-            }
+        for (Parser childParser : children) {
+            addParser(mode, rootElement, doc, childParser);
         }
+        // TODO Parser Exclusions
     }
-    private Element addParser(Element rootElement, Document doc, Parser 
parser, Parser realParser) throws Exception {
+    private Element addParser(Element rootElement, Document doc, Parser 
parser, ParserDecorator decorator) throws Exception {
         ParseContext context = new ParseContext();
         
-        Set<MediaType> types = new TreeSet<>();
         Set<MediaType> addedTypes = new TreeSet<>();
         Set<MediaType> excludedTypes = new TreeSet<>();
-        types.addAll(parser.getSupportedTypes(context));
-        
-        for (MediaType type : realParser.getSupportedTypes(context)) {
-            if (! types.contains(type)) {
-                excludedTypes.add(type);
+        if (decorator != null) {
+            Set<MediaType> types = new TreeSet<>();
+            types.addAll(decorator.getSupportedTypes(context));
+            addedTypes.addAll(types);
+            
+            for (MediaType type : parser.getSupportedTypes(context)) {
+                if (! types.contains(type)) {
+                    excludedTypes.add(type);
+                }
+                addedTypes.remove(type);
             }
-            addedTypes.remove(type);
         }
         
-        String className = realParser.getClass().getCanonicalName();
+        String className = parser.getClass().getCanonicalName();
         Element parserElement = doc.createElement("parser");
         parserElement.setAttribute("class", className);
         rootElement.appendChild(parserElement);
@@ -223,10 +235,6 @@ public class DumpTikaConfigExample {
             parserElement.appendChild(mimeElement);
         }
         
-        if (realParser instanceof CompositeParser) {
-            // TODO Recurse
-        }
-        
         return parserElement;
     }
     


Reply via email to