Author: tallison
Date: Mon Mar 30 13:57:06 2015
New Revision: 1670095

URL: http://svn.apache.org/r1670095
Log:
TIKA-1584: fixed regression in Tika 1.7 that prevents processing of embedded 
docs with /tika service

Modified:
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
    
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
    
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java

Modified: 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java?rev=1670095&r1=1670094&r2=1670095&view=diff
==============================================================================
--- 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
 (original)
+++ 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/MetadataResource.java
 Mon Mar 30 13:57:06 2015
@@ -128,7 +128,8 @@ public class MetadataResource {
         final ParseContext context = new ParseContext();
         AutoDetectParser parser = TikaResource.createParser(tikaConfig);
         TikaResource.fillMetadata(parser, metadata, context, httpHeaders);
-        TikaResource.fillParseContext(context, httpHeaders);
+        //no need to pass parser for embedded document parsing
+        TikaResource.fillParseContext(context, httpHeaders, null);
         TikaResource.logRequest(logger, info, metadata);
         TikaResource.parse(parser, logger, info.getPath(), is, new 
DefaultHandler(), metadata, context);
         return metadata;

Modified: 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java?rev=1670095&r1=1670094&r2=1670095&view=diff
==============================================================================
--- 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
 (original)
+++ 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/RecursiveMetadataResource.java
 Mon Mar 30 13:57:06 2015
@@ -78,7 +78,8 @@ public class RecursiveMetadataResource {
         RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser,
                 new BasicContentHandlerFactory(type, -1));
         TikaResource.fillMetadata(parser, metadata, context, httpHeaders);
-        TikaResource.fillParseContext(context, httpHeaders);
+        //no need to add parser to parse recursively
+        TikaResource.fillParseContext(context, httpHeaders, null);
         TikaResource.logRequest(logger, info, metadata);
         TikaResource.parse(wrapper, logger, info.getPath(), is, new 
DefaultHandler(), metadata, context);
         return new MetadataList(wrapper.getMetadata());

Modified: 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java?rev=1670095&r1=1670094&r2=1670095&view=diff
==============================================================================
--- 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
 (original)
+++ 
tika/trunk/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
 Mon Mar 30 13:57:06 2015
@@ -138,7 +138,8 @@ public class TikaResource {
         return httpHeaders.getFirst("File-Name");
     }
 
-    public static void fillParseContext(ParseContext parseContext, 
MultivaluedMap<String, String> httpHeaders) {
+    public static void fillParseContext(ParseContext parseContext, 
MultivaluedMap<String, String> httpHeaders,
+                                        Parser embeddedParser) {
         TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
         PDFParserConfig pdfParserConfig = new PDFParserConfig();
         for (String key : httpHeaders.keySet()) {
@@ -150,6 +151,9 @@ public class TikaResource {
         }
         parseContext.set(TesseractOCRConfig.class, ocrConfig);
         parseContext.set(PDFParserConfig.class, pdfParserConfig);
+        if (embeddedParser != null) {
+            parseContext.set(Parser.class, embeddedParser);
+        }
     }
 
     /**
@@ -295,7 +299,7 @@ public class TikaResource {
         final ParseContext context = new ParseContext();
 
         fillMetadata(parser, metadata, context, httpHeaders);
-        fillParseContext(context, httpHeaders);
+        fillParseContext(context, httpHeaders, parser);
 
         logRequest(logger, info, metadata);
 
@@ -353,7 +357,7 @@ public class TikaResource {
         final ParseContext context = new ParseContext();
 
         fillMetadata(parser, metadata, context, httpHeaders);
-        fillParseContext(context, httpHeaders);
+        fillParseContext(context, httpHeaders, parser);
 
 
         logRequest(logger, info, metadata);

Modified: 
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java?rev=1670095&r1=1670094&r2=1670095&view=diff
==============================================================================
--- 
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
 (original)
+++ 
tika/trunk/tika-server/src/test/java/org/apache/tika/server/TikaResourceTest.java
 Mon Mar 30 13:57:06 2015
@@ -37,6 +37,8 @@ public class TikaResourceTest extends CX
     public static final String TEST_DOC = "test.doc";
     public static final String TEST_XLSX = "16637.xlsx";
     public static final String TEST_PASSWORD_PROTECTED = "password.xls";
+    private static final String TEST_RECURSIVE_DOC = 
"test_recursive_embedded.docx";
+
     private static final String TIKA_PATH = "/tika";
     private static final int UNPROCESSEABLE = 422;
 
@@ -149,4 +151,23 @@ public class TikaResourceTest extends CX
         assertTrue(responseMsg.contains("test"));
     }
 
+    @Test
+    public void testEmbedded() throws Exception {
+        //first try text
+        Response response = WebClient.create(endPoint + TIKA_PATH)
+                .accept("text/plain")
+                
.put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+        String responseMsg = getStringFromInputStream((InputStream) response
+                .getEntity());
+        assertTrue(responseMsg.contains("Course of human events"));
+
+        //now go for xml -- different call than text
+        response = WebClient.create(endPoint + TIKA_PATH)
+                .accept("text/xml")
+                
.put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
+        responseMsg = getStringFromInputStream((InputStream) response
+                .getEntity());
+        assertTrue(responseMsg.contains("Course of human events"));
+    }
+
 }


Reply via email to