This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit b928453caf6bb557748168418e49cb8a112d996f
Author: tballison <talli...@mitre.org>
AuthorDate: Fri Apr 6 11:17:37 2018 -0400

    TIKA-2625
---
 .../tika/extractor/TestEmbeddedDocumentUtil.java   | 58 ++++++++++++++++++++++
 .../tika/extractor/EmbeddedDocumentUtil.java       | 15 +++++-
 .../apache/tika/parser/RecursiveParserWrapper.java |  3 +-
 3 files changed, 73 insertions(+), 3 deletions(-)

diff --git 
a/tika-app/src/test/java/org/apache/tika/extractor/TestEmbeddedDocumentUtil.java
 
b/tika-app/src/test/java/org/apache/tika/extractor/TestEmbeddedDocumentUtil.java
new file mode 100644
index 0000000..2262998
--- /dev/null
+++ 
b/tika-app/src/test/java/org/apache/tika/extractor/TestEmbeddedDocumentUtil.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.extractor;
+
+import org.apache.tika.batch.DigestingAutoDetectParserFactory;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.extractor.EmbeddedDocumentUtil;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.apache.tika.sax.BasicContentHandlerFactory;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+public class TestEmbeddedDocumentUtil {
+    //TODO -- figure out how to mock this into tika-core
+
+    @Test
+    public void testSimple() {
+        Parser p = new AutoDetectParser();
+        ParseContext parseContext = new ParseContext();
+        parseContext.set(Parser.class, p);
+        Parser txtParser = 
EmbeddedDocumentUtil.tryToFindExistingLeafParser(org.apache.tika.parser.txt.TXTParser.class,
 parseContext);
+        assertNotNull(txtParser);
+        assertEquals(org.apache.tika.parser.txt.TXTParser.class, 
txtParser.getClass());
+
+    }
+
+    @Test
+    public void testDoublyDecorated() {
+        Parser d = new 
DigestingAutoDetectParserFactory().getParser(TikaConfig.getDefaultConfig());
+        RecursiveParserWrapper wrapper = new RecursiveParserWrapper(d,
+                new 
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1));
+        ParseContext parseContext = new ParseContext();
+        parseContext.set(Parser.class, wrapper);
+        Parser txtParser = 
EmbeddedDocumentUtil.tryToFindExistingLeafParser(org.apache.tika.parser.txt.TXTParser.class,
 parseContext);
+        assertNotNull(txtParser);
+        assertEquals(org.apache.tika.parser.txt.TXTParser.class, 
txtParser.getClass());
+    }
+}
diff --git 
a/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentUtil.java 
b/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentUtil.java
index c03a871..1fc7308 100644
--- 
a/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentUtil.java
+++ 
b/tika-core/src/main/java/org/apache/tika/extractor/EmbeddedDocumentUtil.java
@@ -241,7 +241,7 @@ public class EmbeddedDocumentUtil implements Serializable {
         Parser returnParser = null;
         if (p != null) {
             if (p instanceof ParserDecorator) {
-                p = ((ParserDecorator)p).getWrappedParser();
+                p = findInDecorated((ParserDecorator)p, clazz);
             }
             if (equals(p, clazz)) {
                 return p;
@@ -257,6 +257,17 @@ public class EmbeddedDocumentUtil implements Serializable {
         return null;
     }
 
+    private static Parser findInDecorated(ParserDecorator p, Class clazz) {
+        Parser candidate = p.getWrappedParser();
+        if (equals(candidate, clazz)) {
+            return candidate;
+        }
+        if (candidate instanceof ParserDecorator) {
+            candidate = findInDecorated((ParserDecorator)candidate, clazz);
+        }
+        return candidate;
+    }
+
     private static Parser findInComposite(CompositeParser p, Class clazz, 
ParseContext context) {
         Map<MediaType, Parser> map = p.getParsers(context);
         for (Map.Entry<MediaType, Parser> e : map.entrySet()) {
@@ -265,7 +276,7 @@ public class EmbeddedDocumentUtil implements Serializable {
                 return candidate;
             }
             if (candidate instanceof ParserDecorator) {
-                candidate = ((ParserDecorator)candidate).getWrappedParser();
+                candidate = findInDecorated((ParserDecorator)candidate, clazz);
             }
             if (equals(candidate, clazz)) {
                 return candidate;
diff --git 
a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java 
b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
index 739a9b3..9484d4c 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
@@ -70,7 +70,7 @@ import org.xml.sax.helpers.DefaultHandler;
  * The unit tests for this class are in the tika-parsers module.
  * </p>
  */
-public class RecursiveParserWrapper implements Parser {
+public class RecursiveParserWrapper extends ParserDecorator {
     
     /**
      * Generated serial version
@@ -126,6 +126,7 @@ public class RecursiveParserWrapper implements Parser {
      */
     public RecursiveParserWrapper(Parser wrappedParser,
                                   ContentHandlerFactory contentHandlerFactory, 
boolean catchEmbeddedExceptions) {
+        super(wrappedParser);
         this.wrappedParser = wrappedParser;
         this.contentHandlerFactory = contentHandlerFactory;
         this.catchEmbeddedExceptions = catchEmbeddedExceptions;

-- 
To stop receiving notification emails like this one, please contact
talli...@apache.org.

Reply via email to