This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_2x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_2x by this push:
     new 5789b57  microsoft package unit tests work
5789b57 is described below

commit 5789b5761caa39c06ee4d444e4e7d1c49326b333
Author: tallison <[email protected]>
AuthorDate: Tue Aug 18 07:34:09 2020 -0400

    microsoft package unit tests work
---
 tika-parser-integration-tests/pom.xml              |  13 ++
 .../parser/tests}/microsoft/EMFParserTest.java     |  40 +---
 .../parser/tests/microsoft/ExcelParserTest.java    |  16 ++
 .../microsoft/POIContainerExtractionTest.java      | 170 +---------------
 .../tests/microsoft/PowerPointParserTest.java      |  22 +++
 .../parser/tests/microsoft/XML2003ParserTest.java  |  66 +++++++
 .../tests/microsoft/ooxml/OOXMLParserTest.java     |  49 +++++
 .../tests/microsoft/ooxml/TruncatedOOXMLTest.java  |  33 ++++
 .../parser/tests/microsoft/rtf/RTFParserTest.java  | 111 +++++++++++
 tika-parser-modules/pom.xml                        |   5 +-
 .../tika-parser-microsoft-module/pom.xml           |  39 +++-
 .../services/org.apache.tika.detect.Detector       |  16 ++
 .../services/org.apache.tika.parser.Parser         |  27 +++
 .../tika/parser/microsoft/EMFParserTest.java       |  22 +--
 .../tika/parser/microsoft/ExcelParserTest.java     |   6 +-
 .../microsoft/POIContainerExtractionTest.java      | 219 ---------------------
 .../parser/microsoft/PowerPointParserTest.java     |   9 -
 .../tika/parser/microsoft/WordParserTest.java      |   2 +-
 .../microsoft/onenote/OneNoteParserTest.java       |   2 -
 .../parser/microsoft/ooxml/OOXMLParserTest.java    |  28 +--
 .../parser/microsoft/ooxml/SXSLFExtractorTest.java |   8 -
 .../parser/microsoft/ooxml/SXWPFExtractorTest.java |   2 +-
 .../parser/microsoft/ooxml/TruncatedOOXMLTest.java |  19 --
 .../tika/parser/microsoft/rtf/RTFParserTest.java   |  85 +-------
 .../parser/microsoft/xml/XML2003ParserTest.java    |  49 -----
 .../ooxml}/tika-config-custom-date-override.xml    |   0
 .../microsoft/ooxml/tika-config-dom-macros.xml     |   0
 .../microsoft/ooxml/tika-config-sax-macros.xml     |   0
 .../rtf/ignoreListMarkup-tika-config.xml           |   0
 .../tika/parser/microsoft}/rtf/tika-config.xml     |   0
 .../tika-config-custom-date-override.xml           |   0
 .../microsoft/tika-config-exclude-phonetic.xml     |   0
 .../tika-config-extract-all-alternatives-msg.xml   |   0
 .../tika/parser/microsoft/tika-config-macros.xml   |   0
 .../tika/parser/microsoft/tika-config-sax-docx.xml |   0
 .../src/test/resources/test-documents/Doc1_ole.doc | Bin
 .../resources/test-documents/EmbeddedDocument.docx | Bin
 .../resources/test-documents/EmbeddedOutlook.docx  | Bin
 .../test/resources/test-documents/EmbeddedPDF.docx | Bin
 .../test/resources/test-documents/NullHeader.docx  | Bin
 .../test/resources/test-documents/footnotes.docx   | Bin
 .../test/resources/test-documents/headerPic.docx   | Bin
 .../src/test/resources/test-documents/jxl.xls      | Bin
 .../src/test/resources/test-documents/pictures.ppt | Bin
 .../src/test/resources/test-documents/protect.xlsx | Bin
 .../resources/test-documents/protectedFile.xlsx    | Bin
 .../resources/test-documents/protectedSheets.xlsx  | Bin
 .../resources/test-documents/test-columnar.xls     | Bin
 .../resources/test-documents/test-columnar.xlsb    | Bin
 .../resources/test-documents/test-columnar.xlsx    | Bin
 .../test/resources/test-documents/test-outlook.msg | Bin
 .../resources/test-documents/test-outlook2003.msg  | Bin
 .../src/test/resources/test-documents/test.doc     | Bin
 .../test/resources/test-documents/testACCESS.mdb   | Bin
 .../resources/test-documents/testAccess2.accdb     | Bin
 .../resources/test-documents/testAccess2_2000.mdb  | Bin
 .../test-documents/testAccess2_2002-2003.mdb       | Bin
 .../test-documents/testAccess2_encrypted.accdb     | Bin
 .../resources/test-documents/testAccess_V1997.mdb  | Bin
 .../test-documents/testBinControlWord.rtf          |   0
 .../test/resources/test-documents/testComment.doc  | Bin
 .../test/resources/test-documents/testComment.docx | Bin
 .../test/resources/test-documents/testComment.ppt  | Bin
 .../test/resources/test-documents/testComment.pptx | Bin
 .../test/resources/test-documents/testComment.rtf  |   0
 .../test/resources/test-documents/testComment.xls  | Bin
 .../test/resources/test-documents/testComment.xlsx | Bin
 .../test-documents/testControlCharacters.doc       | Bin
 .../test-documents/testDOCX_Thumbnail.docx         | Bin
 .../test/resources/test-documents/testDOTM.dotm    | Bin
 .../resources/test-documents/testDocumentLink.doc  | Bin
 .../src/test/resources/test-documents/testEMF.emf  | Bin
 .../test/resources/test-documents/testEMLX.emlx    |   0
 .../testEML_embedded_xhtml_and_img.eml             |   0
 .../test-documents/testFontAfterBufferedText.rtf   |   0
 .../test/resources/test-documents/testOneNote.one  | Bin
 .../test/resources/test-documents/testOneNote1.one | Bin
 .../test/resources/test-documents/testOneNote2.one | Bin
 .../test-documents/testOneNote2007OrEarlier1.one   | Bin
 .../test-documents/testOneNote2007OrEarlier2.one   | Bin
 .../resources/test-documents/testOneNote2016.one   | Bin
 .../test/resources/test-documents/testOneNote3.one | Bin
 .../test/resources/test-documents/testOneNote4.one | Bin
 .../test-documents/testOneNoteEmbeddedWordDoc.one  | Bin
 .../resources/test-documents/testPROJECT2003.mpp   | Bin
 .../resources/test-documents/testPROJECT2007.mpp   | Bin
 .../resources/test-documents/testPUBLISHER.pub     | Bin
 .../test/resources/test-documents/testWINMAIL.dat  | Bin
 .../src/test/resources/test-documents/testWMF.wmf  | Bin
 .../resources/test-documents/testWMF_charset.wmf   | Bin
 .../test-documents/testWORKSSpreadsheet7.0.xlr     | Bin
 .../test/resources/test-documents/testWordArt.pptx | Bin
 .../test-documents/testXLSX_Thumbnail.xlsx         | Bin
 .../resources/test-documents/testXPS_various.xps   | Bin
 .../test-documents/testZIP_corrupted_oom.zip       | Bin
 .../resources/test-documents/test_TIKA-1251.doc    | Bin
 .../test-documents/test_embedded_zip.pptx          | Bin
 .../test-documents/test_list_override.rtf          |   0
 .../test-documents/test_recursive_embedded.doc     | Bin
 .../test-documents/test_recursive_embedded.docx    | Bin
 .../test_recursive_embedded_npe.docx               | Bin
 tika-parsers/pom.xml                               |  10 -
 .../services/org.apache.tika.parser.Parser         |  12 --
 103 files changed, 413 insertions(+), 667 deletions(-)

diff --git a/tika-parser-integration-tests/pom.xml 
b/tika-parser-integration-tests/pom.xml
index 8341715..fde1038 100644
--- a/tika-parser-integration-tests/pom.xml
+++ b/tika-parser-integration-tests/pom.xml
@@ -46,6 +46,12 @@
         </dependency>
         <dependency>
             <groupId>${project.groupId}</groupId>
+            <artifactId>tika-parser-microsoft-module</artifactId>
+            <version>${project.version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>${project.groupId}</groupId>
             <artifactId>tika-parser-pkg-module</artifactId>
             <version>${project.version}</version>
             <scope>test</scope>
@@ -80,6 +86,13 @@
         </dependency>
         <dependency>
             <groupId>${project.groupId}</groupId>
+            <artifactId>tika-parser-microsoft-module</artifactId>
+            <version>${project.version}</version>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>${project.groupId}</groupId>
             <artifactId>tika-parser-pkg-module</artifactId>
             <version>${project.version}</version>
             <type>test-jar</type>
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/EMFParserTest.java
 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/EMFParserTest.java
similarity index 51%
copy from 
tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/EMFParserTest.java
copy to 
tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/EMFParserTest.java
index e6d2db3..7ebde82 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/EMFParserTest.java
+++ 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/EMFParserTest.java
@@ -1,30 +1,14 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft;
-
-import static org.junit.Assert.assertEquals;
-
-import java.util.List;
+package org.apache.tika.parser.tests.microsoft;
 
 import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.RecursiveParserWrapper;
 import org.junit.Test;
 
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
 public class EMFParserTest extends TikaTest {
 
     @Test
@@ -38,17 +22,6 @@ public class EMFParserTest extends TikaTest {
         Metadata pdfMetadata = metadataList.get(2);
         assertEquals("application/pdf", 
pdfMetadata.get(Metadata.CONTENT_TYPE));
         assertContains("is a toolkit for detecting", 
pdfMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
-
-    }
-
-    @Test
-    public void testTextExtractionMac() throws Exception {
-        List<Metadata> metadataList = 
getRecursiveMetadata("testEXCEL_embeddedPDF_mac.xls");
-        Metadata emfMetadata = metadataList.get(2);
-        assertEquals("image/emf", emfMetadata.get(Metadata.CONTENT_TYPE));
-        assertContains("is a toolkit for detecting", 
emfMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
-        //test that a space was inserted before url
-        assertContains("Tika http://incubator.apache.org/tika/";, 
emfMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
     }
 
     @Test
@@ -60,7 +33,4 @@ public class EMFParserTest extends TikaTest {
         assertEquals("application/pdf", 
pdfMetadata.get(Metadata.CONTENT_TYPE));
         assertContains("is a toolkit for detecting", 
pdfMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
     }
-
-
 }
-
diff --git 
a/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/ExcelParserTest.java
 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/ExcelParserTest.java
new file mode 100644
index 0000000..53bcd39
--- /dev/null
+++ 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/ExcelParserTest.java
@@ -0,0 +1,16 @@
+package org.apache.tika.parser.tests.microsoft;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.junit.Test;
+
+import java.util.List;
+
+public class ExcelParserTest extends TikaTest {
+    @Test
+    public void testEmbeddedPDF() throws Exception {
+        List<Metadata> metadataList = 
getRecursiveMetadata("testExcel_embeddedPDF.xls");
+        assertContains("Hello World!", 
metadataList.get(2).get(RecursiveParserWrapper.TIKA_CONTENT));
+    }
+}
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/POIContainerExtractionTest.java
similarity index 66%
copy from 
tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
copy to 
tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/POIContainerExtractionTest.java
index 148efec..c01b78a 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
+++ 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/POIContainerExtractionTest.java
@@ -1,110 +1,15 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.microsoft;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.util.List;
+package org.apache.tika.parser.tests.microsoft;
 
+import org.apache.tika.TikaTest;
 import org.apache.tika.extractor.ContainerExtractor;
 import org.apache.tika.extractor.ParserContainerExtractor;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.utils.ParserUtils;
+import org.apache.tika.parser.microsoft.AbstractPOIContainerExtractionTest;
 import org.junit.Test;
 
-/**
- * Tests that the various POI powered parsers are
- * able to extract their embedded contents.
- */
-public class POIContainerExtractionTest extends 
AbstractPOIContainerExtractionTest {
-
-    /**
-     * For office files which don't have anything embedded in them
-     */
-    @Test
-    public void testWithoutEmbedded() throws Exception {
-        ContainerExtractor extractor = new ParserContainerExtractor();
-
-        String[] files = new String[]{
-                "testEXCEL.xls", "testWORD.doc", "testPPT.ppt",
-                "testVISIO.vsd", "test-outlook.msg"
-        };
-        for (String file : files) {
-            // Process it without recursing
-            TrackingHandler handler = process(file, extractor, false);
-
-            // Won't have fired
-            assertEquals(0, handler.filenames.size());
-            assertEquals(0, handler.mediaTypes.size());
-
-            // Ditto with recursing
-            handler = process(file, extractor, true);
-            assertEquals(0, handler.filenames.size());
-            assertEquals(0, handler.mediaTypes.size());
-        }
-    }
-
-    /**
-     * Office files with embedded images, but no other
-     * office files in them
-     */
-    @Test
-    public void testEmbeddedImages() throws Exception {
-        ContainerExtractor extractor = new ParserContainerExtractor();
-        TrackingHandler handler;
-
-        // Excel with 1 image
-        handler = process("testEXCEL_1img.xls", extractor, false);
-        assertEquals(1, handler.filenames.size());
-        assertEquals(1, handler.mediaTypes.size());
-
-        assertEquals(null, handler.filenames.get(0));
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(0));
-
-
-        // PowerPoint with 2 images + sound
-        // TODO
-
-
-        // Word with 1 image
-        handler = process("testWORD_1img.doc", extractor, false);
-        assertEquals(1, handler.filenames.size());
-        assertEquals(1, handler.mediaTypes.size());
-
-        assertEquals("image1.png", handler.filenames.get(0));
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(0));
-
-
-        // Word with 3 images
-        handler = process("testWORD_3imgs.doc", extractor, false);
-        assertEquals(3, handler.filenames.size());
-        assertEquals(3, handler.mediaTypes.size());
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 
-        assertEquals("image1.png", handler.filenames.get(0));
-        assertEquals("image2.jpg", handler.filenames.get(1));
-        assertEquals("image3.png", handler.filenames.get(2));
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(0));
-        assertEquals(TYPE_JPG, handler.mediaTypes.get(1));
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(2));
-    }
+public class POIContainerExtractionTest extends 
AbstractPOIContainerExtractionTest {
 
     /**
      * Office files which have other office files
@@ -122,7 +27,7 @@ public class POIContainerExtractionTest extends 
AbstractPOIContainerExtractionTe
     @Test
     public void testEmbeddedOfficeFiles() throws Exception {
         ContainerExtractor extractor = new ParserContainerExtractor();
-        TrackingHandler handler;
+        TikaTest.TrackingHandler handler;
 
 
         // Excel with a word doc and a powerpoint doc, both of which have 
images in them
@@ -326,65 +231,4 @@ public class POIContainerExtractionTest extends 
AbstractPOIContainerExtractionTe
         assertEquals("smbprn.00009008.KdcPjl.pdf", handler.filenames.get(1));
         assertEquals(TYPE_PDF, handler.mediaTypes.get(1));
     }
-
-    @Test
-    public void testEmbeddedOfficeFilesXML() throws Exception {
-        ContainerExtractor extractor = new ParserContainerExtractor();
-        TrackingHandler handler;
-
-        handler = process("EmbeddedDocument.docx", extractor, false);
-        
assertTrue(handler.filenames.contains("Microsoft_Office_Excel_97-2003_Worksheet1.bin"));
-        assertEquals(2, handler.filenames.size());
-    }
-
-    @Test
-    public void testPowerpointImages() throws Exception {
-        ContainerExtractor extractor = new ParserContainerExtractor();
-        TrackingHandler handler;
-
-        handler = process("pictures.ppt", extractor, false);
-        assertTrue(handler.mediaTypes.contains(new MediaType("image", 
"jpeg")));
-        assertTrue(handler.mediaTypes.contains(new MediaType("image", "png")));
-    }
-
-    @Test
-    public void testEmbeddedStorageId() throws Exception {
-
-        List<Metadata> list = getRecursiveMetadata("testWORD_embeded.doc");
-        //.docx
-        assertEquals("{F4754C9B-64F5-4B40-8AF4-679732AC0607}",
-                
list.get(10).get(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID));
-        //_1345471035.ppt
-        assertEquals("{64818D10-4F9B-11CF-86EA-00AA00B929E8}",
-                
list.get(14).get(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID));
-        //_1345470949.xls
-        assertEquals("{00020820-0000-0000-C000-000000000046}",
-                
list.get(16).get(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID));
-
-    }
-
-    @Test
-    public void testEmbeddedGraphChart() throws Exception {
-        //doc converts a chart to a actual xls file
-        //so we only need to look in ppt and xls
-        for (String suffix : new String[]{"ppt", "xls"}) {
-            List<Metadata> list = 
getRecursiveMetadata("testMSChart-govdocs-428996."+suffix);
-            boolean found = false;
-            for (Metadata m : list) {
-                if 
(m.get(Metadata.CONTENT_TYPE).equals(POIFSContainerDetector.MS_GRAPH_CHART.toString()))
 {
-                    found = true;
-                }
-                assertNull(m.get(ParserUtils.EMBEDDED_EXCEPTION));
-            }
-            assertTrue("didn't find chart in "+suffix, found);
-        }
-    }
-
-    @Test
-    public void testEmbeddedEquation() throws Exception {
-        //file derives from govdocs1 863534.doc
-        List<Metadata> metadataList = 
getRecursiveMetadata("testMSEquation-govdocs-863534.doc");
-        assertEquals(3, metadataList.size());
-        assertEquals("application/vnd.ms-equation", 
metadataList.get(2).get(Metadata.CONTENT_TYPE));
-    }
 }
diff --git 
a/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/PowerPointParserTest.java
 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/PowerPointParserTest.java
new file mode 100644
index 0000000..71da57e
--- /dev/null
+++ 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/PowerPointParserTest.java
@@ -0,0 +1,22 @@
+package org.apache.tika.parser.tests.microsoft;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.junit.Test;
+
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+public class PowerPointParserTest extends TikaTest {
+    @Test
+    public void testEmbeddedPDF() throws Exception {
+        List<Metadata> metadataList = 
getRecursiveMetadata("testPPT_EmbeddedPDF.ppt");
+        assertContains("Apache Tika project", 
metadataList.get(1).get(RecursiveParserWrapper.TIKA_CONTENT));
+        assertEquals("3.pdf", 
metadataList.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));
+        assertContains("Hello World", 
metadataList.get(2).get(RecursiveParserWrapper.TIKA_CONTENT));
+        assertEquals("4.pdf", 
metadataList.get(2).get(TikaCoreProperties.RESOURCE_NAME_KEY));
+    }
+}
diff --git 
a/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/XML2003ParserTest.java
 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/XML2003ParserTest.java
new file mode 100644
index 0000000..8ebe759
--- /dev/null
+++ 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/XML2003ParserTest.java
@@ -0,0 +1,66 @@
+package org.apache.tika.parser.tests.microsoft;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+public class XML2003ParserTest extends TikaTest {
+    @Test
+    public void testBasicWord() throws Exception {
+        List<Metadata> list =  getRecursiveMetadata("testWORD2003.xml");
+        assertEquals(6, list.size());
+        Metadata m = list.get(0);//container doc
+        String xml = m.get(RecursiveParserWrapper.TIKA_CONTENT);
+        xml = xml.replaceAll("\\s+", " ");
+        //make sure that metadata gets dumped to xml
+        assertContains("<meta name=\"meta:character-count-with-spaces\" 
content=\"256\"", xml);
+        //do not allow nested <p> elements
+        assertContains("<p /> <img href=\"02000003.jpg\" /><p /> <p><img 
href=\"02000004.jpg\" /></p>", xml);
+        assertContains("<table><tbody>", xml);
+        assertContains("</tbody></table>", xml);
+        assertContains("<td><p>R1 c1</p> </td>", xml);
+        assertContains("<a href=\"https://tika.apache.org/\";>tika</a>", xml);
+        assertContains("footnote", xml);
+        assertContains("Mycomment", xml);
+        assertContains("Figure 1: My Figure", xml);
+        assertContains("myEndNote", xml);
+        assertContains("We have always been at war with OceaniaEurasia", xml);
+        assertContains("Text box", xml);
+        assertNotContained("Text boxText box", xml);
+        assertContains("MyHeader", xml);
+        assertContains("MyFooter", xml);
+        assertContains("<img href=\"02000003.jpg\" />", xml);
+        assertEquals("219", m.get(Office.CHARACTER_COUNT));
+        assertEquals("256", m.get(Office.CHARACTER_COUNT_WITH_SPACES));
+
+        assertEquals("38", m.get(Office.WORD_COUNT));
+        assertEquals("1", m.get(Office.PARAGRAPH_COUNT));
+        assertEquals("Allison, Timothy B.", m.get(TikaCoreProperties.CREATOR));
+        assertEquals("2016-04-27T17:49:00Z", 
m.get(TikaCoreProperties.CREATED));
+        assertEquals("application/vnd.ms-wordml", 
m.get(Metadata.CONTENT_TYPE));
+
+        //make sure embedded docs were properly processed
+        assertContains("moscow-birds",
+                
Arrays.asList(list.get(5).getValues(TikaCoreProperties.SUBJECT)));
+
+        assertEquals("testJPEG_EXIF.jpg", 
list.get(5).get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME));
+
+        //check that text is extracted with breaks between elements
+        String txt = 
getText(getResourceAsStream("/test-documents/testWORD2003.xml"),AUTO_DETECT_PARSER);
+        txt = txt.replaceAll("\\s+", " ");
+        assertNotContained("beforeR1", txt);
+        assertContains("R1 c1 R1 c2", txt);
+        assertNotContained("footnoteFigure", txt);
+        assertContains("footnote Figure", txt);
+        assertContains("test space", txt);
+
+    }
+}
diff --git 
a/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/ooxml/OOXMLParserTest.java
 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/ooxml/OOXMLParserTest.java
new file mode 100644
index 0000000..0edd3c3
--- /dev/null
+++ 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/ooxml/OOXMLParserTest.java
@@ -0,0 +1,49 @@
+package org.apache.tika.parser.tests.microsoft.ooxml;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.apache.tika.parser.microsoft.OfficeParserConfig;
+import org.junit.Test;
+
+import java.util.List;
+
+public class OOXMLParserTest extends TikaTest {
+
+    @Test
+    public void testEmbeddedPDFInPPTX() throws Exception {
+        List<Metadata> metadataList = 
getRecursiveMetadata("testPPT_EmbeddedPDF.pptx");
+        Metadata pdfMetadata1 = metadataList.get(4);
+        assertContains("Apache Tika", 
pdfMetadata1.get(RecursiveParserWrapper.TIKA_CONTENT));
+        Metadata pdfMetadata2 = metadataList.get(5);
+        assertContains("Hello World", 
pdfMetadata2.get(RecursiveParserWrapper.TIKA_CONTENT));
+    }
+
+    @Test
+    public void testEmbeddedPDFInXLSX() throws Exception {
+        List<Metadata> metadataList = 
getRecursiveMetadata("testExcel_embeddedPDF.xlsx");
+        Metadata pdfMetadata = metadataList.get(1);
+        assertContains("Hello World", 
pdfMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
+    }
+
+    @Test
+    public void testEmbeddedPDFInStreamingPPTX() throws Exception {
+        ParseContext parseContext = new ParseContext();
+        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
+        officeParserConfig.setUseSAXPptxExtractor(true);
+        parseContext.set(OfficeParserConfig.class, officeParserConfig);
+
+        List<Metadata> metadataList = 
getRecursiveMetadata("testPPT_EmbeddedPDF.pptx", parseContext);
+        Metadata pdfMetadata1 = metadataList.get(4);
+        assertContains("Apache Tika", 
pdfMetadata1.get(RecursiveParserWrapper.TIKA_CONTENT));
+        Metadata pdfMetadata2 = metadataList.get(5);
+        assertContains("Hello World", 
pdfMetadata2.get(RecursiveParserWrapper.TIKA_CONTENT));
+    }
+
+    @Test(expected = org.apache.tika.exception.TikaException.class)
+    public void testCorruptedZip() throws Exception {
+        //TIKA_2446
+        getRecursiveMetadata("testZIP_corrupted_oom.zip");
+    }
+}
diff --git 
a/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/ooxml/TruncatedOOXMLTest.java
 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/ooxml/TruncatedOOXMLTest.java
new file mode 100644
index 0000000..f4a26ad
--- /dev/null
+++ 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/ooxml/TruncatedOOXMLTest.java
@@ -0,0 +1,33 @@
+package org.apache.tika.parser.tests.microsoft.ooxml;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+public class TruncatedOOXMLTest extends TikaTest {
+
+    @Test
+    public void testWordTrunc13138() throws Exception {
+        //this truncates the content_types.xml
+        //this tests that there's a backoff to the pkg parser
+        List<Metadata> metadataList = getRecursiveMetadata(truncate(
+                "testWORD_various.docx", 13138), true);
+        assertEquals(19, metadataList.size());
+        Metadata m = metadataList.get(0);
+        assertEquals("application/x-tika-ooxml", m.get(Metadata.CONTENT_TYPE));
+    }
+
+    @Test
+    public void testWordTrunc774() throws Exception {
+        //this is really truncated
+        List<Metadata> metadataList = getRecursiveMetadata(truncate(
+                "testWORD_various.docx", 774), true);
+        assertEquals(4, metadataList.size());
+        Metadata m = metadataList.get(0);
+        assertEquals("application/x-tika-ooxml", m.get(Metadata.CONTENT_TYPE));
+    }
+}
diff --git 
a/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/rtf/RTFParserTest.java
 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/rtf/RTFParserTest.java
new file mode 100644
index 0000000..a8f6c96
--- /dev/null
+++ 
b/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/tests/microsoft/rtf/RTFParserTest.java
@@ -0,0 +1,111 @@
+package org.apache.tika.parser.tests.microsoft.rtf;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.tika.TikaTest;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.RTFMetadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.RecursiveParserWrapper;
+import org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
+import org.apache.tika.sax.BasicContentHandlerFactory;
+import org.apache.tika.sax.RecursiveParserWrapperHandler;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+public class RTFParserTest extends TikaTest {
+    // TIKA-1010
+    @Test
+    public void testEmbeddedMonster() throws Exception {
+
+        Map<Integer, Pair> expected = new HashMap<>();
+        expected.put(3, new Pair("Hw.txt","text/plain; charset=ISO-8859-1"));
+        expected.put(4, new Pair("file_0.doc", "application/msword"));
+        expected.put(7, new Pair("file_1.xlsx",
+                
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"));
+        expected.put(10, new Pair("text.html", "text/html; 
charset=windows-1252"));
+        expected.put(11, new Pair("html-within-zip.zip", "application/zip"));
+        expected.put(12, new 
Pair("test-zip-of-zip_\u666E\u6797\u65AF\u987F.zip", "application/zip"));
+        expected.put(15, new 
Pair("testHTML_utf8_\u666E\u6797\u65AF\u987F.html", "text/html; 
charset=UTF-8"));
+        expected.put(18, new Pair("testJPEG_\u666E\u6797\u65AF\u987F.jpg", 
"image/jpeg"));
+        expected.put(21, new Pair("file_2.xls", "application/vnd.ms-excel"));
+        expected.put(24, new Pair("testMSG_\u666E\u6797\u65AF\u987F.msg", 
"application/vnd.ms-outlook"));
+        expected.put(27, new Pair("file_3.pdf", "application/pdf"));
+        expected.put(30, new Pair("file_4.ppt", 
"application/vnd.ms-powerpoint"));
+        expected.put(34, new Pair("file_5.pptx", 
"application/vnd.openxmlformats-officedocument.presentationml.presentation"));
+        expected.put(33, new Pair("thumbnail.jpeg", "image/jpeg"));
+        expected.put(37, new Pair("file_6.doc", "application/msword"));
+        expected.put(40, new Pair("file_7.doc", "application/msword"));
+        expected.put(43, new Pair("file_8.docx", 
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
+        expected.put(46, new Pair("testJPEG_\u666E\u6797\u65AF\u987F.jpg", 
"image/jpeg"));
+
+
+        List<Metadata> metadataList = 
getRecursiveMetadata("testRTFEmbeddedFiles.rtf");
+        assertEquals(49, metadataList.size());
+        for (Map.Entry<Integer, Pair> e : expected.entrySet()) {
+            Metadata metadata = metadataList.get(e.getKey());
+            Pair p = e.getValue();
+            assertNotNull(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+            //necessary to getName() because MSOffice extractor includes
+            //directory: _1457338524/HW.txt
+            Assert.assertEquals("filename equals ",
+                    p.fileName, FilenameUtils.getName(
+                            
metadata.get(AbstractRecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH)));
+
+            assertEquals(p.mimeType, metadata.get(Metadata.CONTENT_TYPE));
+        }
+        
assertEquals("C:\\Users\\tallison\\AppData\\Local\\Temp\\testJPEG_普林斯顿.jpg",
+                
metadataList.get(46).get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME));
+    }
+
+    //TIKA-1010 test regular (not "embedded") images/picts
+    @Test
+    public void testRegularImages() throws Exception {
+        ParseContext ctx = new ParseContext();
+        RecursiveParserWrapper parser = new 
RecursiveParserWrapper(AUTO_DETECT_PARSER);
+        RecursiveParserWrapperHandler handler = new 
RecursiveParserWrapperHandler(
+                new 
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.IGNORE, 
-1),-1);
+        Metadata rootMetadata = new Metadata();
+        rootMetadata.add(TikaCoreProperties.RESOURCE_NAME_KEY, 
"testRTFRegularImages.rtf");
+        try (TikaInputStream tis = 
TikaInputStream.get(getResourceAsStream("/test-documents/testRTFRegularImages.rtf")))
 {
+            parser.parse(tis, handler, rootMetadata, ctx);
+        }
+        List<Metadata> metadatas = handler.getMetadataList();
+
+        Metadata meta_jpg_exif = 
metadatas.get(1);//("testJPEG_EXIF_\u666E\u6797\u65AF\u987F.jpg");
+        Metadata meta_jpg = 
metadatas.get(3);//("testJPEG_\u666E\u6797\u65AF\u987F.jpg");
+
+        assertTrue(meta_jpg_exif != null);
+        assertTrue(meta_jpg != null);
+        
assertTrue(Arrays.asList(meta_jpg_exif.getValues(TikaCoreProperties.SUBJECT)).contains("serbor"));
+        
assertTrue(meta_jpg.get(TikaCoreProperties.COMMENTS).contains("Licensed to the 
Apache"));
+        //make sure old metadata doesn't linger between objects
+        
assertFalse(Arrays.asList(meta_jpg.getValues(TikaCoreProperties.SUBJECT)).contains("serbor"));
+        assertEquals("false", meta_jpg.get(RTFMetadata.THUMBNAIL));
+        assertEquals("false", meta_jpg_exif.get(RTFMetadata.THUMBNAIL));
+
+        assertEquals(51, meta_jpg.names().length);
+        assertEquals(112, meta_jpg_exif.names().length);
+    }
+
+    private static class Pair {
+        final String fileName;
+        final String mimeType;
+        Pair(String fileName, String mimeType) {
+            this.fileName = fileName;
+            this.mimeType = mimeType;
+        }
+    }
+
+}
diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml
index c07171b..42880cb 100644
--- a/tika-parser-modules/pom.xml
+++ b/tika-parser-modules/pom.xml
@@ -39,6 +39,10 @@
         <jempbox.version>1.8.16</jempbox.version>
         <mime4j.version>0.8.3</mime4j.version>
         <pdfbox.version>2.0.20</pdfbox.version>
+        <commons.logging.version>1.2</commons.logging.version>
+        <!-- used by POI, PDFBox and Jackcess ...try to sync -->
+        <bouncycastle.version>1.65</bouncycastle.version>
+
     </properties>
 
     <dependencies>
@@ -71,7 +75,6 @@
         <module>tika-parser-font-module</module>
         <module>tika-parser-microsoft-module</module>
         <module>tika-parser-pkg-module</module>
-        <module>tika-parser-rtf-module</module>
         <module>tika-parser-mail-commons</module>
         <module>tika-parser-xml-module</module>
     </modules>
diff --git a/tika-parser-modules/tika-parser-microsoft-module/pom.xml 
b/tika-parser-modules/tika-parser-microsoft-module/pom.xml
index 36a17b5..3e4c1f4 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/pom.xml
+++ b/tika-parser-modules/tika-parser-microsoft-module/pom.xml
@@ -46,7 +46,6 @@
             <artifactId>commons-codec</artifactId>
             <version>${codec.version}</version>
         </dependency>
-        <!-- we're only using Pair from this -->
         <dependency>
             <groupId>org.apache.commons</groupId>
             <artifactId>commons-lang3</artifactId>
@@ -87,6 +86,12 @@
                 </exclusion>
             </exclusions>
         </dependency>
+        <!-- needed by jackcess -->
+        <dependency>
+            <groupId>commons-logging</groupId>
+            <artifactId>commons-logging</artifactId>
+            <version>${commons.logging.version}</version>
+        </dependency>
         <dependency>
             <groupId>com.healthmarketscience.jackcess</groupId>
             <artifactId>jackcess</artifactId>
@@ -119,7 +124,37 @@
                 </exclusion>
             </exclusions>
         </dependency>
-
+        <!-- for java 10
+           See TIKA-2778 for why we need to do this now.
+            May the gods of API design fix this in the future.
+            only required for jackcess-encrypt
+           -->
+        <dependency>
+            <groupId>org.glassfish.jaxb</groupId>
+            <artifactId>jaxb-runtime</artifactId>
+            <version>${jaxb.version}</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>jakarta.activation</groupId>
+                    <artifactId>jakarta.activation-api</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>com.sun.activation</groupId>
+            <artifactId>jakarta.activation</artifactId>
+            <version>1.2.1</version>
+        </dependency>
+        <dependency>
+            <groupId>org.bouncycastle</groupId>
+            <artifactId>bcmail-jdk15on</artifactId>
+            <version>${bouncycastle.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.bouncycastle</groupId>
+            <artifactId>bcprov-jdk15on</artifactId>
+            <version>${bouncycastle.version}</version>
+        </dependency>
         <!-- https://mvnrepository.com/artifact/log4j/log4j -->
         <dependency>
             <groupId>log4j</groupId>
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector
 
b/tika-parser-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector
new file mode 100644
index 0000000..f674e79
--- /dev/null
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.detect.Detector
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.tika.parser.microsoft.POIFSContainerDetector
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
 
b/tika-parser-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
new file mode 100644
index 0000000..0a9fa02
--- /dev/null
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
@@ -0,0 +1,27 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+org.apache.tika.parser.microsoft.rtf.RTFParser
+org.apache.tika.parser.microsoft.EMFParser
+org.apache.tika.parser.microsoft.WMFParser
+org.apache.tika.parser.microsoft.JackcessParser
+org.apache.tika.parser.microsoft.MSOwnerFileParser
+org.apache.tika.parser.microsoft.OfficeParser
+org.apache.tika.parser.microsoft.OldExcelParser
+org.apache.tika.parser.microsoft.TNEFParser
+org.apache.tika.parser.microsoft.onenote.OneNoteParser
+org.apache.tika.parser.microsoft.ooxml.OOXMLParser
+org.apache.tika.parser.microsoft.ooxml.xwpf.ml2006.Word2006MLParser
+org.apache.tika.parser.microsoft.xml.WordMLParser
+org.apache.tika.parser.microsoft.xml.SpreadsheetMLParser
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/EMFParserTest.java
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/EMFParserTest.java
index e6d2db3..e3508d7 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/EMFParserTest.java
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/EMFParserTest.java
@@ -27,19 +27,7 @@ import org.junit.Test;
 
 public class EMFParserTest extends TikaTest {
 
-    @Test
-    public void testTextExtractionWindows() throws Exception {
-        List<Metadata> metadataList = 
getRecursiveMetadata("testEXCEL_embeddedPDF_windows.xls");
-        Metadata emfMetadata = metadataList.get(1);
-        assertEquals("image/emf", emfMetadata.get(Metadata.CONTENT_TYPE));
-        assertContains("<p>testPDF.pdf</p>", 
emfMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
 
-        //this is just the usual embedded pdf
-        Metadata pdfMetadata = metadataList.get(2);
-        assertEquals("application/pdf", 
pdfMetadata.get(Metadata.CONTENT_TYPE));
-        assertContains("is a toolkit for detecting", 
pdfMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
-
-    }
 
     @Test
     public void testTextExtractionMac() throws Exception {
@@ -51,15 +39,7 @@ public class EMFParserTest extends TikaTest {
         assertContains("Tika http://incubator.apache.org/tika/";, 
emfMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
     }
 
-    @Test
-    public void testPDFExtraction() throws Exception {
-        List<Metadata> metadataList = 
getRecursiveMetadata("testEXCEL_embeddedPDF_mac.xls");
-        //this pdf has to be extracted from within the EMF
-        //it does not exist as a standalone pdf file inside the _mac.xls file.
-        Metadata pdfMetadata = metadataList.get(1);
-        assertEquals("application/pdf", 
pdfMetadata.get(Metadata.CONTENT_TYPE));
-        assertContains("is a toolkit for detecting", 
pdfMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
-    }
+
 
 
 }
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
index 7fb8fb8..4e1528a 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
@@ -474,11 +474,7 @@ public class ExcelParserTest extends TikaTest {
 //        assertContains("<a 
href=\"http://tika.apache.org/1.12/gettingstarted.html\";>", xml);
     }
 
-    @Test
-    public void testEmbeddedPDF() throws Exception {
-        List<Metadata> metadataList = 
getRecursiveMetadata("testExcel_embeddedPDF.xls");
-        assertContains("Hello World!", 
metadataList.get(2).get(RecursiveParserWrapper.TIKA_CONTENT));
-    }
+
 
     @Test
     public void testBigIntegersWGeneralFormat() throws Exception {
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
index 148efec..9b9e3d2 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
@@ -106,227 +106,8 @@ public class POIContainerExtractionTest extends 
AbstractPOIContainerExtractionTe
         assertEquals(TYPE_PNG, handler.mediaTypes.get(2));
     }
 
-    /**
-     * Office files which have other office files
-     * embedded into them. The embedded office files
-     * will sometimes have images in them.
-     * <p/>
-     * eg xls
-     * -> word
-     * -> image
-     * -> image
-     * -> powerpoint
-     * -> excel
-     * -> image
-     */
-    @Test
-    public void testEmbeddedOfficeFiles() throws Exception {
-        ContainerExtractor extractor = new ParserContainerExtractor();
-        TrackingHandler handler;
-
-
-        // Excel with a word doc and a powerpoint doc, both of which have 
images in them
-        // Without recursion, should see both documents + the images
-        handler = process("testEXCEL_embeded.xls", extractor, false);
-        assertEquals(5, handler.filenames.size());
-        assertEquals(5, handler.mediaTypes.size());
-
-        // We don't know their filenames
-        assertEquals(null, handler.filenames.get(0));
-        assertEquals(null, handler.filenames.get(1));
-        assertEquals(null, handler.filenames.get(2));
-        assertEquals("MBD0003271D.ppt", handler.filenames.get(3));
-        assertEquals("MBD00032A24.doc", handler.filenames.get(4));
-        // But we do know their types
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(0)); // Icon of embedded 
office doc
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(1)); // Icon of embedded 
office doc
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(2)); // Embedded image
-        assertEquals(TYPE_PPT, handler.mediaTypes.get(3)); // Embedded office 
doc
-        assertEquals(TYPE_DOC, handler.mediaTypes.get(4)); // Embedded office 
doc
-
-
-        // With recursion, should get the images embedded in the office files 
too
-        handler = process("testEXCEL_embeded.xls", extractor, true);
-        assertEquals(17, handler.filenames.size());
-        assertEquals(17, handler.mediaTypes.size());
-
-        assertEquals(null, handler.filenames.get(0));
-        assertEquals(null, handler.filenames.get(1));
-        assertEquals(null, handler.filenames.get(2));
-        assertEquals("MBD0003271D.ppt", handler.filenames.get(3));
-        assertEquals("1", handler.filenames.get(4));
-        assertEquals(null, handler.filenames.get(5));
-        assertEquals("2", handler.filenames.get(6));
-        assertEquals("image1.png", handler.filenames.get(7));
-        assertEquals("image2.jpg", handler.filenames.get(8));
-        assertEquals("image3.png", handler.filenames.get(9));
-        assertEquals("image1.png", handler.filenames.get(16));
-
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(0)); // Icon of embedded 
office doc
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(1)); // Icon of embedded 
office doc
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(2)); // Embedded image
-        assertEquals(TYPE_PPT, handler.mediaTypes.get(3)); // Embedded 
presentation
-        assertEquals(TYPE_XLS, handler.mediaTypes.get(4)); // Embedded XLS
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(5)); // Embedded image
-        assertEquals(TYPE_DOC, handler.mediaTypes.get(6)); // Embedded office 
doc
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(7)); // Embedded image
-        assertEquals(TYPE_JPG, handler.mediaTypes.get(8)); // Embedded image
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(9)); // Embedded image
-        assertEquals(TYPE_DOC, handler.mediaTypes.get(15)); // Embedded office 
doc
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(16)); // Embedded image
-
-        // Word with .docx, powerpoint and excel
-        handler = process("testWORD_embeded.doc", extractor, false);
-        assertEquals(9, handler.filenames.size());
-        assertEquals(9, handler.mediaTypes.size());
-
-        // Filenames are a bit iffy...
-        // Should really be 3*embedded pictures then 3*icons then embedded docs
-        assertEquals("image1.emf", handler.filenames.get(0));
-        assertEquals("image4.png", handler.filenames.get(1));
-        assertEquals("image5.jpg", handler.filenames.get(2));
-        assertEquals("image6.png", handler.filenames.get(3));
-        assertEquals("image2.emf", handler.filenames.get(4));
-        assertEquals("image3.emf", handler.filenames.get(5));
-        assertEquals(null, handler.filenames.get(6));
-        assertEquals("_1345471035.ppt", handler.filenames.get(7));
-        assertEquals("_1345470949.xls", handler.filenames.get(8));
-
-        // But we do know their types
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(0)); // Icon of embedded 
office doc?
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(1)); // Embedded image - 
logo
-        assertEquals(TYPE_JPG, handler.mediaTypes.get(2)); // Embedded image - 
safe
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(3)); // Embedded image - 
try
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(4)); // Icon of embedded 
office doc?
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(5)); // Icon of embedded 
office doc?
-        assertEquals(TYPE_DOCX, handler.mediaTypes.get(6)); // Embedded office 
doc
-        assertEquals(TYPE_PPT, handler.mediaTypes.get(7)); // Embedded office 
doc
-        assertEquals(TYPE_XLS, handler.mediaTypes.get(8)); // Embedded office 
doc
-
-
-        // With recursion, should get their images too
-        handler = process("testWORD_embeded.doc", extractor, true);
-        assertEquals(16, handler.filenames.size());
-        assertEquals(16, handler.mediaTypes.size());
-
-        // We don't know their filenames, except for doc images + docx
-        assertEquals("image1.emf", handler.filenames.get(0));
-        assertEquals("image4.png", handler.filenames.get(1));
-        assertEquals("image5.jpg", handler.filenames.get(2));
-        assertEquals("image6.png", handler.filenames.get(3));
-        assertEquals("image2.emf", handler.filenames.get(4));
-        assertEquals("image3.emf", handler.filenames.get(5));
-        assertEquals(null, handler.filenames.get(6));
-        assertEquals("image2.png", handler.filenames.get(7));
-        assertEquals("image3.jpeg", handler.filenames.get(8));
-        assertEquals("image4.png", handler.filenames.get(9));
-        for (int i = 11; i < 14; i++) {
-            assertNull(handler.filenames.get(i));
-        }
-        // But we do know their types
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(0)); // Icon of embedded 
office doc
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(1)); // Embedded image - 
logo
-        assertEquals(TYPE_JPG, handler.mediaTypes.get(2)); // Embedded image - 
safe
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(3)); // Embedded image - 
try
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(4)); // Icon of embedded 
office doc
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(5)); // Icon of embedded 
office doc
-        assertEquals(TYPE_DOCX, handler.mediaTypes.get(6)); // Embedded office 
doc
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(7));  //    PNG inside 
.docx
-        assertEquals(TYPE_JPG, handler.mediaTypes.get(8));  //    JPG inside 
.docx
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(9));  //    PNG inside 
.docx
-        assertEquals(TYPE_PPT, handler.mediaTypes.get(10)); // Embedded office 
doc
-        assertEquals(TYPE_XLS, handler.mediaTypes.get(14)); // Embedded office 
doc
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(15)); //    PNG inside 
.xls
 
 
-        // PowerPoint with excel and word
-        handler = process("testPPT_embeded.ppt", extractor, false);
-        assertEquals(7, handler.filenames.size());
-        assertEquals(7, handler.mediaTypes.size());
-
-        // We don't get all that helpful filenames
-        assertEquals("1", handler.filenames.get(0));
-        assertEquals("2", handler.filenames.get(1));
-        assertEquals(null, handler.filenames.get(2));
-        assertEquals(null, handler.filenames.get(3));
-        assertEquals(null, handler.filenames.get(4));
-        assertEquals(null, handler.filenames.get(5));
-        assertEquals(null, handler.filenames.get(6));
-        // But we do know their types
-        assertEquals(TYPE_XLS, handler.mediaTypes.get(0)); // Embedded office 
doc
-        assertEquals(TYPE_DOC, handler.mediaTypes.get(1)); // Embedded office 
doc
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(2)); // Icon of embedded 
office doc
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(3)); // Icon of embedded 
office doc
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(4)); // Embedded image
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(5)); // Embedded image
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(6)); // Embedded image
-
-        // Run again on PowerPoint but with recursion
-        handler = process("testPPT_embeded.ppt", extractor, true);
-        assertEquals(11, handler.filenames.size());
-        assertEquals(11, handler.mediaTypes.size());
-
-        assertEquals("1", handler.filenames.get(0));
-        assertEquals(null, handler.filenames.get(1));
-        assertEquals("2", handler.filenames.get(2));
-        assertEquals("image1.png", handler.filenames.get(3));
-        assertEquals("image2.jpg", handler.filenames.get(4));
-        assertEquals("image3.png", handler.filenames.get(5));
-        assertEquals(null, handler.filenames.get(6));
-        assertEquals(null, handler.filenames.get(7));
-        assertEquals(null, handler.filenames.get(8));
-        assertEquals(null, handler.filenames.get(9));
-        assertEquals(null, handler.filenames.get(10));
-
-        assertEquals(TYPE_XLS, handler.mediaTypes.get(0)); // Embedded office 
doc
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(1)); //    PNG inside 
.xls
-        assertEquals(TYPE_DOC, handler.mediaTypes.get(2)); // Embedded office 
doc
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(3));  //    PNG inside 
.docx
-        assertEquals(TYPE_JPG, handler.mediaTypes.get(4));  //    JPG inside 
.docx
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(5));  //    PNG inside 
.docx
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(6)); // Icon of embedded 
office doc
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(7)); // Icon of embedded 
office doc
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(8)); // Embedded image
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(9)); // Embedded image
-        assertEquals(TYPE_PNG, handler.mediaTypes.get(10)); // Embedded image
-
-
-        // Word, with a non-office file (PDF)
-        handler = process("testWORD_embedded_pdf.doc", extractor, true);
-        assertEquals(2, handler.filenames.size());
-        assertEquals(2, handler.mediaTypes.size());
-
-        assertEquals("image1.emf", handler.filenames.get(0));
-        assertEquals("_1402837031.pdf", handler.filenames.get(1));
-
-        assertEquals(TYPE_EMF, handler.mediaTypes.get(0)); // Icon of embedded 
pdf
-        assertEquals(TYPE_PDF, handler.mediaTypes.get(1)); // The embedded PDF 
itself
-
-
-        // Outlook with a text file and a word document
-        handler = process("testMSG_att_doc.msg", extractor, true);
-        assertEquals(2, handler.filenames.size());
-        assertEquals(2, handler.mediaTypes.size());
-
-        assertEquals("test-unicode.doc", handler.filenames.get(0));
-        assertEquals(TYPE_DOC, handler.mediaTypes.get(0));
-
-        assertEquals("pj1.txt", handler.filenames.get(1));
-        assertEquals(TYPE_TXT, handler.mediaTypes.get(1));
-
-
-        // Outlook with a pdf and another outlook message
-        handler = process("testMSG_att_msg.msg", extractor, true);
-        assertEquals(2, handler.filenames.size());
-        assertEquals(2, handler.mediaTypes.size());
-
-        assertEquals("__substg1.0_3701000D.msg", handler.filenames.get(0));
-        assertEquals(TYPE_MSG, handler.mediaTypes.get(0));
-
-        assertEquals("smbprn.00009008.KdcPjl.pdf", handler.filenames.get(1));
-        assertEquals(TYPE_PDF, handler.mediaTypes.get(1));
-    }
-
     @Test
     public void testEmbeddedOfficeFilesXML() throws Exception {
         ContainerExtractor extractor = new ParserContainerExtractor();
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
index 45a1a39..4742aae 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
@@ -292,15 +292,6 @@ public class PowerPointParserTest extends TikaTest {
     }
 
     @Test
-    public void testEmbeddedPDF() throws Exception {
-        List<Metadata> metadataList = 
getRecursiveMetadata("testPPT_EmbeddedPDF.ppt");
-        assertContains("Apache Tika project", 
metadataList.get(1).get(RecursiveParserWrapper.TIKA_CONTENT));
-        assertEquals("3.pdf", 
metadataList.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));
-        assertContains("Hello World", 
metadataList.get(2).get(RecursiveParserWrapper.TIKA_CONTENT));
-        assertEquals("4.pdf", 
metadataList.get(2).get(TikaCoreProperties.RESOURCE_NAME_KEY));
-    }
-
-    @Test
     public void testMacros() throws  Exception {
         Metadata minExpected = new Metadata();
         minExpected.add(RecursiveParserWrapper.TIKA_CONTENT.getName(), "Sub 
Embolden()");
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
index edc9c94..1c0f3b5 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
@@ -544,7 +544,7 @@ public class WordParserTest extends TikaTest {
 
     @Test
     public void testOrigSourcePath() throws Exception {
-        Metadata embed1_zip_metadata = 
getRecursiveMetadata("test_recursive_embedded.doc").get(11);
+        Metadata embed1_zip_metadata = 
getRecursiveMetadata("test_recursive_embedded.doc").get(2);
         assertContains("C:\\Users\\tallison\\AppData\\Local\\Temp\\embed1.zip",
                 
Arrays.asList(embed1_zip_metadata.getValues(TikaCoreProperties.ORIGINAL_RESOURCE_NAME)));
         assertContains("C:\\Users\\tallison\\Desktop\\tmp\\New folder 
(2)\\embed1.zip",
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
index d5d1639..33e9a45 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/onenote/OneNoteParserTest.java
@@ -34,8 +34,6 @@ public class OneNoteParserTest extends TikaTest {
      */
     @Test
     public void testOneNote2013Doc1() throws Exception {
-//        List<Metadata> metadataList = 
getRecursiveMetadata("testOneNote1.one");
-  //      debug(metadataList);
         Metadata metadata = new Metadata();
         String txt = getText("testOneNote1.one", metadata);
         assertNoJunk(txt);
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index a38e4d7..fdefbc4 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -31,7 +31,6 @@ import java.io.File;
 import java.io.InputStream;
 import java.io.PrintStream;
 import java.io.StringWriter;
-import java.nio.file.Path;
 import java.text.DecimalFormatSymbols;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -1263,25 +1262,10 @@ public class OOXMLParserTest extends TikaTest {
         assertContains("<a 
href=\"http://tika.apache.org/1.12/gettingstarted.html\";>", xml);
     }
 
-    @Test
-    public void testEmbeddedPDFInPPTX() throws Exception {
-        List<Metadata> metadataList = 
getRecursiveMetadata("testPPT_EmbeddedPDF.pptx");
-        Metadata pdfMetadata1 = metadataList.get(4);
-        assertContains("Apache Tika", 
pdfMetadata1.get(RecursiveParserWrapper.TIKA_CONTENT));
-        Metadata pdfMetadata2 = metadataList.get(5);
-        assertContains("Hello World", 
pdfMetadata2.get(RecursiveParserWrapper.TIKA_CONTENT));
-    }
-
-    @Test
-    public void testEmbeddedPDFInXLSX() throws Exception {
-        List<Metadata> metadataList = 
getRecursiveMetadata("testExcel_embeddedPDF.xlsx");
-        Metadata pdfMetadata = metadataList.get(1);
-        assertContains("Hello World", 
pdfMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
-    }
 
     @Test
     public void testOrigSourcePath() throws Exception {
-        Metadata embed1_zip_metadata = 
getRecursiveMetadata("test_recursive_embedded.docx").get(11);
+        Metadata embed1_zip_metadata = 
getRecursiveMetadata("test_recursive_embedded.docx").get(2);
         assertContains("C:\\Users\\tallison\\AppData\\Local\\Temp\\embed1.zip",
                 
Arrays.asList(embed1_zip_metadata.getValues(TikaCoreProperties.ORIGINAL_RESOURCE_NAME)));
         assertContains("C:\\Users\\tallison\\Desktop\\tmp\\New folder 
(2)\\embed1.zip",
@@ -1741,11 +1725,6 @@ public class OOXMLParserTest extends TikaTest {
                 xlsx.get(Metadata.CONTENT_TYPE));
     }
 
-    @Test(expected = org.apache.tika.exception.TikaException.class)
-    public void testCorruptedZip() throws Exception {
-        //TIKA_2446
-        getRecursiveMetadata("testZIP_corrupted_oom.zip");
-    }
 
     @Test
     public void testSigned() throws Exception {
@@ -1788,11 +1767,6 @@ public class OOXMLParserTest extends TikaTest {
                 getRecursiveMetadata("testWORD_docSecurity.docx")
                         
.get(0).get(OfficeOpenXMLExtended.DOC_SECURITY_STRING));
     }
-
-    @Test
-    public void oneOff() throws Exception {
-        
debug(getRecursiveMetadata("CVLKRA-KYC_Download_File_Structure_V3.1.xlsx"));
-    }
 }
 
 
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXSLFExtractorTest.java
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXSLFExtractorTest.java
index f8c0ff2..d87b9ae 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXSLFExtractorTest.java
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXSLFExtractorTest.java
@@ -530,14 +530,6 @@ public class SXSLFExtractorTest extends TikaTest {
 
     }
 
-    @Test
-    public void testEmbeddedPDFInPPTX() throws Exception {
-        List<Metadata> metadataList = 
getRecursiveMetadata("testPPT_EmbeddedPDF.pptx", parseContext);
-        Metadata pdfMetadata1 = metadataList.get(4);
-        assertContains("Apache Tika", 
pdfMetadata1.get(RecursiveParserWrapper.TIKA_CONTENT));
-        Metadata pdfMetadata2 = metadataList.get(5);
-        assertContains("Hello World", 
pdfMetadata2.get(RecursiveParserWrapper.TIKA_CONTENT));
-    }
 
     @Test
     public void testMacrosInPptm() throws Exception {
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
index 4eca4b1..0c02056 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
@@ -674,7 +674,7 @@ public class SXWPFExtractorTest extends TikaTest {
 
     @Test
     public void testOrigSourcePath() throws Exception {
-        Metadata embed1_zip_metadata = 
getRecursiveMetadata("test_recursive_embedded.docx", parseContext).get(11);
+        Metadata embed1_zip_metadata = 
getRecursiveMetadata("test_recursive_embedded.docx", parseContext).get(2);
         assertContains("C:\\Users\\tallison\\AppData\\Local\\Temp\\embed1.zip",
                 
Arrays.asList(embed1_zip_metadata.getValues(TikaCoreProperties.ORIGINAL_RESOURCE_NAME)));
         assertContains("C:\\Users\\tallison\\Desktop\\tmp\\New folder 
(2)\\embed1.zip",
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/TruncatedOOXMLTest.java
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/TruncatedOOXMLTest.java
index 1247cc1..68a3528 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/TruncatedOOXMLTest.java
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/TruncatedOOXMLTest.java
@@ -59,26 +59,7 @@ public class TruncatedOOXMLTest extends TikaTest {
         assertContains("Suddenly some Japanese", content);
     }
 
-    @Test
-    public void testWordTrunc13138() throws Exception {
-        //this truncates the content_types.xml
-        //this tests that there's a backoff to the pkg parser
-        List<Metadata> metadataList = getRecursiveMetadata(truncate(
-                "testWORD_various.docx", 13138), true);
-        assertEquals(19, metadataList.size());
-        Metadata m = metadataList.get(0);
-        assertEquals("application/x-tika-ooxml", m.get(Metadata.CONTENT_TYPE));
-    }
 
-    @Test
-    public void testWordTrunc774() throws Exception {
-        //this is really truncated
-        List<Metadata> metadataList = getRecursiveMetadata(truncate(
-                "testWORD_various.docx", 774), true);
-        assertEquals(4, metadataList.size());
-        Metadata m = metadataList.get(0);
-        assertEquals("application/x-tika-ooxml", m.get(Metadata.CONTENT_TYPE));
-    }
 
     @Test
     public void testTruncation() throws Exception {
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/rtf/RTFParserTest.java
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/rtf/RTFParserTest.java
index 250fd82..011895c 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/rtf/RTFParserTest.java
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/rtf/RTFParserTest.java
@@ -335,7 +335,7 @@ public class RTFParserTest extends TikaTest {
     @Test
     public void testTurningOffList() throws Exception {
         InputStream is = getClass().getResourceAsStream(
-                
"/org/apache/tika/parser/rtf/ignoreListMarkup-tika-config.xml");
+                
"/org/apache/tika/parser/microsoft/rtf/ignoreListMarkup-tika-config.xml");
         assertNotNull(is);
         TikaConfig tikaConfig = new TikaConfig(is);
         Parser p = new AutoDetectParser(tikaConfig);
@@ -396,79 +396,7 @@ public class RTFParserTest extends TikaTest {
                 getText("testRTFCorruptListOverride.rtf"));
     }
 
-    // TIKA-1010
-    @Test
-    public void testEmbeddedMonster() throws Exception {
-
-        Map<Integer, Pair> expected = new HashMap<>();
-        expected.put(3, new Pair("Hw.txt","text/plain; charset=ISO-8859-1"));
-        expected.put(4, new Pair("file_0.doc", "application/msword"));
-        expected.put(7, new Pair("file_1.xlsx",
-                
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"));
-        expected.put(10, new Pair("text.html", "text/html; 
charset=windows-1252"));
-        expected.put(11, new Pair("html-within-zip.zip", "application/zip"));
-        expected.put(12, new 
Pair("test-zip-of-zip_\u666E\u6797\u65AF\u987F.zip", "application/zip"));
-        expected.put(15, new 
Pair("testHTML_utf8_\u666E\u6797\u65AF\u987F.html", "text/html; 
charset=UTF-8"));
-        expected.put(18, new Pair("testJPEG_\u666E\u6797\u65AF\u987F.jpg", 
"image/jpeg"));
-        expected.put(21, new Pair("file_2.xls", "application/vnd.ms-excel"));
-        expected.put(24, new Pair("testMSG_\u666E\u6797\u65AF\u987F.msg", 
"application/vnd.ms-outlook"));
-        expected.put(27, new Pair("file_3.pdf", "application/pdf"));
-        expected.put(30, new Pair("file_4.ppt", 
"application/vnd.ms-powerpoint"));
-        expected.put(34, new Pair("file_5.pptx", 
"application/vnd.openxmlformats-officedocument.presentationml.presentation"));
-        expected.put(33, new Pair("thumbnail.jpeg", "image/jpeg"));
-        expected.put(37, new Pair("file_6.doc", "application/msword"));
-        expected.put(40, new Pair("file_7.doc", "application/msword"));
-        expected.put(43, new Pair("file_8.docx", 
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"));
-        expected.put(46, new Pair("testJPEG_\u666E\u6797\u65AF\u987F.jpg", 
"image/jpeg"));
-
-
-        List<Metadata> metadataList = 
getRecursiveMetadata("testRTFEmbeddedFiles.rtf");
-        assertEquals(49, metadataList.size());
-        for (Map.Entry<Integer, Pair> e : expected.entrySet()) {
-            Metadata metadata = metadataList.get(e.getKey());
-            Pair p = e.getValue();
-            assertNotNull(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
-            //necessary to getName() because MSOffice extractor includes
-            //directory: _1457338524/HW.txt
-            Assert.assertEquals("filename equals ",
-                    p.fileName, FilenameUtils.getName(
-                            
metadata.get(AbstractRecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH)));
-
-            assertEquals(p.mimeType, metadata.get(Metadata.CONTENT_TYPE));
-        }
-        
assertEquals("C:\\Users\\tallison\\AppData\\Local\\Temp\\testJPEG_普林斯顿.jpg",
-                
metadataList.get(46).get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME));
-    }
-    
-    //TIKA-1010 test regular (not "embedded") images/picts
-    @Test
-    public void testRegularImages() throws Exception {
-        ParseContext ctx = new ParseContext();
-        RecursiveParserWrapper parser = new 
RecursiveParserWrapper(AUTO_DETECT_PARSER);
-        RecursiveParserWrapperHandler handler = new 
RecursiveParserWrapperHandler(
-                new 
BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.IGNORE, 
-1),-1);
-        Metadata rootMetadata = new Metadata();
-        rootMetadata.add(TikaCoreProperties.RESOURCE_NAME_KEY, 
"testRTFRegularImages.rtf");
-        try (TikaInputStream tis = 
TikaInputStream.get(getResourceAsStream("/test-documents/testRTFRegularImages.rtf")))
 {
-            parser.parse(tis, handler, rootMetadata, ctx);
-        }
-        List<Metadata> metadatas = handler.getMetadataList();
-
-        Metadata meta_jpg_exif = 
metadatas.get(1);//("testJPEG_EXIF_\u666E\u6797\u65AF\u987F.jpg");
-        Metadata meta_jpg = 
metadatas.get(3);//("testJPEG_\u666E\u6797\u65AF\u987F.jpg");
-
-        assertTrue(meta_jpg_exif != null);
-        assertTrue(meta_jpg != null);
-        
assertTrue(Arrays.asList(meta_jpg_exif.getValues(TikaCoreProperties.SUBJECT)).contains("serbor"));
-        
assertTrue(meta_jpg.get(TikaCoreProperties.COMMENTS).contains("Licensed to the 
Apache"));
-        //make sure old metadata doesn't linger between objects
-        
assertFalse(Arrays.asList(meta_jpg.getValues(TikaCoreProperties.SUBJECT)).contains("serbor"));
-        assertEquals("false", meta_jpg.get(RTFMetadata.THUMBNAIL));
-        assertEquals("false", meta_jpg_exif.get(RTFMetadata.THUMBNAIL));
 
-        assertEquals(51, meta_jpg.names().length);
-        assertEquals(112, meta_jpg_exif.names().length);
-    }
 
     @Test
     public void testMultipleNewlines() throws Exception {
@@ -517,7 +445,7 @@ public class RTFParserTest extends TikaTest {
         //test that memory allocation of the bin element is limited
         //via the config file.  Unfortunately, this test file's bin embedding 
contains 10 bytes
         //so we had to set the config to 0.
-        InputStream is = 
getClass().getResourceAsStream("/org/apache/tika/parser/rtf/tika-config.xml");
+        InputStream is = 
getClass().getResourceAsStream("/org/apache/tika/parser/microsoft/rtf/tika-config.xml");
         assertNotNull(is);
         TikaConfig tikaConfig = new TikaConfig(is);
         Parser p = new AutoDetectParser(tikaConfig);
@@ -567,13 +495,4 @@ public class RTFParserTest extends TikaTest {
                 getXML("testRTFTIKA_2899.rtf").xml);
     }
 
-
-    private static class Pair {
-        final String fileName;
-        final String mimeType;
-        Pair(String fileName, String mimeType) {
-            this.fileName = fileName;
-            this.mimeType = mimeType;
-        }
-    }
 }
diff --git 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/xml/XML2003ParserTest.java
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/xml/XML2003ParserTest.java
index ff716b0..0c1509b 100644
--- 
a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/xml/XML2003ParserTest.java
+++ 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/xml/XML2003ParserTest.java
@@ -43,56 +43,7 @@ public class XML2003ParserTest extends MultiThreadedTikaTest 
{
         XMLReaderUtils.setPoolSize(XMLReaderUtils.DEFAULT_POOL_SIZE);
     }
 
-    @Test
-    public void testBasicWord() throws Exception {
-        List<Metadata> list =  getRecursiveMetadata("testWORD2003.xml");
-        assertEquals(6, list.size());
-        Metadata m = list.get(0);//container doc
-        String xml = m.get(RecursiveParserWrapper.TIKA_CONTENT);
-        xml = xml.replaceAll("\\s+", " ");
-        //make sure that metadata gets dumped to xml
-        assertContains("<meta name=\"meta:character-count-with-spaces\" 
content=\"256\"", xml);
-        //do not allow nested <p> elements
-        assertContains("<p /> <img href=\"02000003.jpg\" /><p /> <p><img 
href=\"02000004.jpg\" /></p>", xml);
-        assertContains("<table><tbody>", xml);
-        assertContains("</tbody></table>", xml);
-        assertContains("<td><p>R1 c1</p> </td>", xml);
-        assertContains("<a href=\"https://tika.apache.org/\";>tika</a>", xml);
-        assertContains("footnote", xml);
-        assertContains("Mycomment", xml);
-        assertContains("Figure 1: My Figure", xml);
-        assertContains("myEndNote", xml);
-        assertContains("We have always been at war with OceaniaEurasia", xml);
-        assertContains("Text box", xml);
-        assertNotContained("Text boxText box", xml);
-        assertContains("MyHeader", xml);
-        assertContains("MyFooter", xml);
-        assertContains("<img href=\"02000003.jpg\" />", xml);
-        assertEquals("219", m.get(Office.CHARACTER_COUNT));
-        assertEquals("256", m.get(Office.CHARACTER_COUNT_WITH_SPACES));
-
-        assertEquals("38", m.get(Office.WORD_COUNT));
-        assertEquals("1", m.get(Office.PARAGRAPH_COUNT));
-        assertEquals("Allison, Timothy B.", m.get(TikaCoreProperties.CREATOR));
-        assertEquals("2016-04-27T17:49:00Z", 
m.get(TikaCoreProperties.CREATED));
-        assertEquals("application/vnd.ms-wordml", 
m.get(Metadata.CONTENT_TYPE));
-
-        //make sure embedded docs were properly processed
-        assertContains("moscow-birds",
-                
Arrays.asList(list.get(5).getValues(TikaCoreProperties.SUBJECT)));
 
-        assertEquals("testJPEG_EXIF.jpg", 
list.get(5).get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME));
-
-        //check that text is extracted with breaks between elements
-        String txt = 
getText(getResourceAsStream("/test-documents/testWORD2003.xml"),AUTO_DETECT_PARSER);
-        txt = txt.replaceAll("\\s+", " ");
-        assertNotContained("beforeR1", txt);
-        assertContains("R1 c1 R1 c2", txt);
-        assertNotContained("footnoteFigure", txt);
-        assertContains("footnote Figure", txt);
-        assertContains("test space", txt);
-
-    }
 
     @Test
     public void testBasicExcel() throws Exception {
diff --git 
a/tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/tika-config-custom-date-override.xml
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/ooxml/tika-config-custom-date-override.xml
similarity index 100%
rename from 
tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/tika-config-custom-date-override.xml
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/ooxml/tika-config-custom-date-override.xml
diff --git 
a/tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/ooxml/tika-config-dom-macros.xml
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/ooxml/tika-config-dom-macros.xml
similarity index 100%
rename from 
tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/ooxml/tika-config-dom-macros.xml
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/ooxml/tika-config-dom-macros.xml
diff --git 
a/tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/ooxml/tika-config-sax-macros.xml
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/ooxml/tika-config-sax-macros.xml
similarity index 100%
rename from 
tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/ooxml/tika-config-sax-macros.xml
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/ooxml/tika-config-sax-macros.xml
diff --git 
a/tika-parsers/src/test/resources/org/apache/tika/parser/rtf/ignoreListMarkup-tika-config.xml
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/rtf/ignoreListMarkup-tika-config.xml
similarity index 100%
rename from 
tika-parsers/src/test/resources/org/apache/tika/parser/rtf/ignoreListMarkup-tika-config.xml
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/rtf/ignoreListMarkup-tika-config.xml
diff --git 
a/tika-parsers/src/test/resources/org/apache/tika/parser/rtf/tika-config.xml 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/rtf/tika-config.xml
similarity index 100%
rename from 
tika-parsers/src/test/resources/org/apache/tika/parser/rtf/tika-config.xml
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/rtf/tika-config.xml
diff --git 
a/tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/ooxml/tika-config-custom-date-override.xml
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/tika-config-custom-date-override.xml
similarity index 100%
rename from 
tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/ooxml/tika-config-custom-date-override.xml
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/tika-config-custom-date-override.xml
diff --git 
a/tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/tika-config-exclude-phonetic.xml
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/tika-config-exclude-phonetic.xml
similarity index 100%
rename from 
tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/tika-config-exclude-phonetic.xml
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/tika-config-exclude-phonetic.xml
diff --git 
a/tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/tika-config-extract-all-alternatives-msg.xml
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/tika-config-extract-all-alternatives-msg.xml
similarity index 100%
rename from 
tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/tika-config-extract-all-alternatives-msg.xml
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/tika-config-extract-all-alternatives-msg.xml
diff --git 
a/tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/tika-config-macros.xml
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/tika-config-macros.xml
similarity index 100%
rename from 
tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/tika-config-macros.xml
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/tika-config-macros.xml
diff --git 
a/tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/tika-config-sax-docx.xml
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/tika-config-sax-docx.xml
similarity index 100%
rename from 
tika-parsers/src/test/resources/org/apache/tika/parser/microsoft/tika-config-sax-docx.xml
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/org/apache/tika/parser/microsoft/tika-config-sax-docx.xml
diff --git a/tika-parsers/src/test/resources/test-documents/Doc1_ole.doc 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/Doc1_ole.doc
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/Doc1_ole.doc
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/Doc1_ole.doc
diff --git 
a/tika-parsers/src/test/resources/test-documents/EmbeddedDocument.docx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/EmbeddedDocument.docx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/EmbeddedDocument.docx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/EmbeddedDocument.docx
diff --git 
a/tika-parsers/src/test/resources/test-documents/EmbeddedOutlook.docx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/EmbeddedOutlook.docx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/EmbeddedOutlook.docx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/EmbeddedOutlook.docx
diff --git a/tika-parsers/src/test/resources/test-documents/EmbeddedPDF.docx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/EmbeddedPDF.docx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/EmbeddedPDF.docx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/EmbeddedPDF.docx
diff --git a/tika-parsers/src/test/resources/test-documents/NullHeader.docx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/NullHeader.docx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/NullHeader.docx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/NullHeader.docx
diff --git a/tika-parsers/src/test/resources/test-documents/footnotes.docx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/footnotes.docx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/footnotes.docx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/footnotes.docx
diff --git a/tika-parsers/src/test/resources/test-documents/headerPic.docx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/headerPic.docx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/headerPic.docx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/headerPic.docx
diff --git a/tika-parsers/src/test/resources/test-documents/jxl.xls 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/jxl.xls
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/jxl.xls
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/jxl.xls
diff --git a/tika-parsers/src/test/resources/test-documents/pictures.ppt 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/pictures.ppt
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/pictures.ppt
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/pictures.ppt
diff --git a/tika-parsers/src/test/resources/test-documents/protect.xlsx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/protect.xlsx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/protect.xlsx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/protect.xlsx
diff --git a/tika-parsers/src/test/resources/test-documents/protectedFile.xlsx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/protectedFile.xlsx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/protectedFile.xlsx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/protectedFile.xlsx
diff --git 
a/tika-parsers/src/test/resources/test-documents/protectedSheets.xlsx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/protectedSheets.xlsx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/protectedSheets.xlsx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/protectedSheets.xlsx
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xls 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test-columnar.xls
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/test-columnar.xls
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test-columnar.xls
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xlsb 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test-columnar.xlsb
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/test-columnar.xlsb
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test-columnar.xlsb
diff --git a/tika-parsers/src/test/resources/test-documents/test-columnar.xlsx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test-columnar.xlsx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/test-columnar.xlsx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test-columnar.xlsx
diff --git a/tika-parsers/src/test/resources/test-documents/test-outlook.msg 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test-outlook.msg
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/test-outlook.msg
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test-outlook.msg
diff --git 
a/tika-parsers/src/test/resources/test-documents/test-outlook2003.msg 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test-outlook2003.msg
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/test-outlook2003.msg
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test-outlook2003.msg
diff --git a/tika-parsers/src/test/resources/test-documents/test.doc 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test.doc
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/test.doc
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test.doc
diff --git a/tika-parsers/src/test/resources/test-documents/testACCESS.mdb 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testACCESS.mdb
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testACCESS.mdb
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testACCESS.mdb
diff --git a/tika-parsers/src/test/resources/test-documents/testAccess2.accdb 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAccess2.accdb
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testAccess2.accdb
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAccess2.accdb
diff --git 
a/tika-parsers/src/test/resources/test-documents/testAccess2_2000.mdb 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAccess2_2000.mdb
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testAccess2_2000.mdb
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAccess2_2000.mdb
diff --git 
a/tika-parsers/src/test/resources/test-documents/testAccess2_2002-2003.mdb 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAccess2_2002-2003.mdb
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/testAccess2_2002-2003.mdb
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAccess2_2002-2003.mdb
diff --git 
a/tika-parsers/src/test/resources/test-documents/testAccess2_encrypted.accdb 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAccess2_encrypted.accdb
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/testAccess2_encrypted.accdb
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAccess2_encrypted.accdb
diff --git 
a/tika-parsers/src/test/resources/test-documents/testAccess_V1997.mdb 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAccess_V1997.mdb
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testAccess_V1997.mdb
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAccess_V1997.mdb
diff --git 
a/tika-parsers/src/test/resources/test-documents/testBinControlWord.rtf 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testBinControlWord.rtf
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/testBinControlWord.rtf
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testBinControlWord.rtf
diff --git a/tika-parsers/src/test/resources/test-documents/testComment.doc 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.doc
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testComment.doc
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.doc
diff --git a/tika-parsers/src/test/resources/test-documents/testComment.docx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.docx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testComment.docx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.docx
diff --git a/tika-parsers/src/test/resources/test-documents/testComment.ppt 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.ppt
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testComment.ppt
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.ppt
diff --git a/tika-parsers/src/test/resources/test-documents/testComment.pptx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.pptx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testComment.pptx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.pptx
diff --git a/tika-parsers/src/test/resources/test-documents/testComment.rtf 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.rtf
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testComment.rtf
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.rtf
diff --git a/tika-parsers/src/test/resources/test-documents/testComment.xls 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.xls
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testComment.xls
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.xls
diff --git a/tika-parsers/src/test/resources/test-documents/testComment.xlsx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.xlsx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testComment.xlsx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testComment.xlsx
diff --git 
a/tika-parsers/src/test/resources/test-documents/testControlCharacters.doc 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testControlCharacters.doc
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/testControlCharacters.doc
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testControlCharacters.doc
diff --git 
a/tika-parsers/src/test/resources/test-documents/testDOCX_Thumbnail.docx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDOCX_Thumbnail.docx
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/testDOCX_Thumbnail.docx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDOCX_Thumbnail.docx
diff --git a/tika-parsers/src/test/resources/test-documents/testDOTM.dotm 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDOTM.dotm
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testDOTM.dotm
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDOTM.dotm
diff --git 
a/tika-parsers/src/test/resources/test-documents/testDocumentLink.doc 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDocumentLink.doc
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testDocumentLink.doc
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDocumentLink.doc
diff --git a/tika-parsers/src/test/resources/test-documents/testEMF.emf 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testEMF.emf
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testEMF.emf
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testEMF.emf
diff --git a/tika-parsers/src/test/resources/test-documents/testEMLX.emlx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testEMLX.emlx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testEMLX.emlx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testEMLX.emlx
diff --git 
a/tika-parsers/src/test/resources/test-documents/testEML_embedded_xhtml_and_img.eml
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testEML_embedded_xhtml_and_img.eml
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/testEML_embedded_xhtml_and_img.eml
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testEML_embedded_xhtml_and_img.eml
diff --git 
a/tika-parsers/src/test/resources/test-documents/testFontAfterBufferedText.rtf 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testFontAfterBufferedText.rtf
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/testFontAfterBufferedText.rtf
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testFontAfterBufferedText.rtf
diff --git a/tika-parsers/src/test/resources/test-documents/testOneNote.one 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote.one
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testOneNote.one
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote.one
diff --git a/tika-parsers/src/test/resources/test-documents/testOneNote1.one 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote1.one
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testOneNote1.one
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote1.one
diff --git a/tika-parsers/src/test/resources/test-documents/testOneNote2.one 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote2.one
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testOneNote2.one
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote2.one
diff --git 
a/tika-parsers/src/test/resources/test-documents/testOneNote2007OrEarlier1.one 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote2007OrEarlier1.one
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/testOneNote2007OrEarlier1.one
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote2007OrEarlier1.one
diff --git 
a/tika-parsers/src/test/resources/test-documents/testOneNote2007OrEarlier2.one 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote2007OrEarlier2.one
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/testOneNote2007OrEarlier2.one
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote2007OrEarlier2.one
diff --git a/tika-parsers/src/test/resources/test-documents/testOneNote2016.one 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote2016.one
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testOneNote2016.one
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote2016.one
diff --git a/tika-parsers/src/test/resources/test-documents/testOneNote3.one 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote3.one
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testOneNote3.one
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote3.one
diff --git a/tika-parsers/src/test/resources/test-documents/testOneNote4.one 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote4.one
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testOneNote4.one
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNote4.one
diff --git 
a/tika-parsers/src/test/resources/test-documents/testOneNoteEmbeddedWordDoc.one 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNoteEmbeddedWordDoc.one
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/testOneNoteEmbeddedWordDoc.one
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testOneNoteEmbeddedWordDoc.one
diff --git a/tika-parsers/src/test/resources/test-documents/testPROJECT2003.mpp 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testPROJECT2003.mpp
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testPROJECT2003.mpp
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testPROJECT2003.mpp
diff --git a/tika-parsers/src/test/resources/test-documents/testPROJECT2007.mpp 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testPROJECT2007.mpp
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testPROJECT2007.mpp
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testPROJECT2007.mpp
diff --git a/tika-parsers/src/test/resources/test-documents/testPUBLISHER.pub 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testPUBLISHER.pub
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testPUBLISHER.pub
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testPUBLISHER.pub
diff --git a/tika-parsers/src/test/resources/test-documents/testWINMAIL.dat 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testWINMAIL.dat
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testWINMAIL.dat
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testWINMAIL.dat
diff --git a/tika-parsers/src/test/resources/test-documents/testWMF.wmf 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testWMF.wmf
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testWMF.wmf
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testWMF.wmf
diff --git a/tika-parsers/src/test/resources/test-documents/testWMF_charset.wmf 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testWMF_charset.wmf
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testWMF_charset.wmf
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testWMF_charset.wmf
diff --git 
a/tika-parsers/src/test/resources/test-documents/testWORKSSpreadsheet7.0.xlr 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testWORKSSpreadsheet7.0.xlr
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/testWORKSSpreadsheet7.0.xlr
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testWORKSSpreadsheet7.0.xlr
diff --git a/tika-parsers/src/test/resources/test-documents/testWordArt.pptx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testWordArt.pptx
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testWordArt.pptx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testWordArt.pptx
diff --git 
a/tika-parsers/src/test/resources/test-documents/testXLSX_Thumbnail.xlsx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testXLSX_Thumbnail.xlsx
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/testXLSX_Thumbnail.xlsx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testXLSX_Thumbnail.xlsx
diff --git a/tika-parsers/src/test/resources/test-documents/testXPS_various.xps 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testXPS_various.xps
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/testXPS_various.xps
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testXPS_various.xps
diff --git 
a/tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testZIP_corrupted_oom.zip
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testZIP_corrupted_oom.zip
similarity index 100%
rename from 
tika-parser-modules/tika-parser-pkg-module/src/test/resources/test-documents/testZIP_corrupted_oom.zip
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testZIP_corrupted_oom.zip
diff --git a/tika-parsers/src/test/resources/test-documents/test_TIKA-1251.doc 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test_TIKA-1251.doc
similarity index 100%
rename from tika-parsers/src/test/resources/test-documents/test_TIKA-1251.doc
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test_TIKA-1251.doc
diff --git 
a/tika-parsers/src/test/resources/test-documents/test_embedded_zip.pptx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test_embedded_zip.pptx
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/test_embedded_zip.pptx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test_embedded_zip.pptx
diff --git 
a/tika-parsers/src/test/resources/test-documents/test_list_override.rtf 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test_list_override.rtf
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/test_list_override.rtf
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test_list_override.rtf
diff --git 
a/tika-parsers/src/test/resources/test-documents/test_recursive_embedded.doc 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test_recursive_embedded.doc
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/test_recursive_embedded.doc
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test_recursive_embedded.doc
diff --git 
a/tika-parsers/src/test/resources/test-documents/test_recursive_embedded.docx 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test_recursive_embedded.docx
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/test_recursive_embedded.docx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test_recursive_embedded.docx
diff --git 
a/tika-parsers/src/test/resources/test-documents/test_recursive_embedded_npe.docx
 
b/tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test_recursive_embedded_npe.docx
similarity index 100%
rename from 
tika-parsers/src/test/resources/test-documents/test_recursive_embedded_npe.docx
rename to 
tika-parser-modules/tika-parser-microsoft-module/src/test/resources/test-documents/test_recursive_embedded_npe.docx
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index e8d300c..dd6e25b 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -146,16 +146,6 @@
       <version>1.5</version>
     </dependency>
     <dependency>
-      <groupId>org.apache.james</groupId>
-      <artifactId>apache-mime4j-core</artifactId>
-      <version>${mime4j.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.james</groupId>
-      <artifactId>apache-mime4j-dom</artifactId>
-      <version>${mime4j.version}</version>
-    </dependency>
-    <dependency>
       <groupId>com.googlecode.plist</groupId>
       <artifactId>dd-plist</artifactId>
       <version>1.23</version>
diff --git 
a/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
 
b/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
index 64a8f8f..ebeb5d2 100644
--- 
a/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
+++ 
b/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
@@ -42,18 +42,6 @@ org.apache.tika.parser.image.JpegParser
 org.apache.tika.parser.mail.RFC822Parser
 org.apache.tika.parser.mbox.MboxParser
 org.apache.tika.parser.mbox.OutlookPSTParser
-org.apache.tika.parser.microsoft.EMFParser
-org.apache.tika.parser.microsoft.WMFParser
-org.apache.tika.parser.microsoft.JackcessParser
-org.apache.tika.parser.microsoft.MSOwnerFileParser
-org.apache.tika.parser.microsoft.OfficeParser
-org.apache.tika.parser.microsoft.OldExcelParser
-org.apache.tika.parser.microsoft.TNEFParser
-org.apache.tika.parser.microsoft.onenote.OneNoteParser
-org.apache.tika.parser.microsoft.ooxml.OOXMLParser
-org.apache.tika.parser.microsoft.ooxml.xwpf.ml2006.Word2006MLParser
-org.apache.tika.parser.microsoft.xml.WordMLParser
-org.apache.tika.parser.microsoft.xml.SpreadsheetMLParser
 org.apache.tika.parser.mp3.Mp3Parser
 org.apache.tika.parser.mp4.MP4Parser
 org.apache.tika.parser.hdf.HDFParser

Reply via email to