Author: jukka Date: Sun Jun 28 17:31:14 2009 New Revision: 789130 URL: http://svn.apache.org/viewvc?rev=789130&view=rev Log: TIKA-254: parse ooxml templates and macro-enabled formats
Fixed some incorrect type settings from TIKA-253. See http://office.microsoft.com/en-us/help/HA100069351033.aspx#3 for a good overview of all these file types. Modified: lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml Modified: lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=789130&r1=789129&r2=789130&view=diff ============================================================================== --- lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml (original) +++ lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml Sun Jun 28 17:31:14 2009 @@ -188,26 +188,6 @@ <sub-class-of type="application/x-tika-msoffice"/> </mime-type> - <mime-type type="application/vnd.ms-powerpoint.addin.macroenabled.12"> - <glob pattern="*.ppam"/> - <sub-class-of type="application/x-tika-msoffice"/> - </mime-type> - - <mime-type type="application/vnd.ms-powerpoint.presentation.macroenabled.12"> - <glob pattern="*.pptm"/> - <sub-class-of type="application/x-tika-msoffice"/> - </mime-type> - - <mime-type type="application/vnd.ms-powerpoint.presentation.macroenabled.12"> - <glob pattern="*.potm"/> - <sub-class-of type="application/x-tika-msoffice"/> - </mime-type> - - <mime-type type="application/vnd.ms-powerpoint.slideshow.macroenabled.12"> - <glob pattern="*.ppsm"/> - <sub-class-of type="application/x-tika-msoffice"/> - </mime-type> - <!-- http://www.iana.org/assignments/media-types/application/vnd.ms-excel --> <mime-type type="application/vnd.ms-excel"> <comment>Microsoft Excel Spreadsheet</comment> @@ -230,24 +210,10 @@ <sub-class-of type="application/x-tika-msoffice"/> </mime-type> - <mime-type type="application/vnd.ms-excel.sheet.macroenabled.12"> - <glob pattern="*.xlsm"/> - <sub-class-of type="application/x-tika-msoffice"/> - </mime-type> - - <mime-type type="application/vnd.ms-excel.template.macroenabled.12"> - <glob pattern="*.xltm"/> - <sub-class-of type="application/x-tika-msoffice"/> - </mime-type> - - <mime-type type="application/vnd.ms-excel.addin.macroenabled.12"> - <glob pattern="*.xlam"/> - <sub-class-of type="application/x-tika-msoffice"/> - </mime-type> - <mime-type type="application/vnd.ms-excel.sheet.binary.macroenabled.12"> + <comment>Microsoft Excel 2007 Binary Spreadsheet</comment> <glob pattern="*.xlsb"/> - <sub-class-of type="application/x-tika-msoffice"/> + <sub-class-of type="application/vnd.ms-excel"/> </mime-type> <!-- http://www.iana.org/assignments/media-types/application/msword --> @@ -272,16 +238,6 @@ <sub-class-of type="application/x-tika-msoffice"/> </mime-type> - <mime-type type="application/vnd.ms-word.document.macroenabled.12"> - <glob pattern="*.docm"/> - <sub-class-of type="application/x-tika-msoffice"/> - </mime-type> - - <mime-type type="application/vnd.ms-word.template.macroenabled.12"> - <glob pattern="*.dotm"/> - <sub-class-of type="application/x-tika-msoffice"/> - </mime-type> - <mime-type type="application/vnd.ms-outlook"> <comment>Microsoft Outlook Message</comment> <glob pattern="*.msg" /> @@ -300,9 +256,19 @@ <mime-type type="application/vnd.openxmlformats-officedocument.presentationml.presentation"> <comment>Office Open XML Presentation</comment> <glob pattern="*.pptx"/> + <glob pattern="*.sldx"/> + <glob pattern="*.thmx"/> <sub-class-of type="application/x-tika-ooxml"/> </mime-type> + <mime-type type="application/vnd.ms-powerpoint.presentation.macroenabled.12"> + <comment>Office Open XML Presentation (macro-enabled)</comment> + <glob pattern="*.pptm"/> + <glob pattern="*.potm"/> + <glob pattern="*.sldm"/> + <sub-class-of type="application/x-tika-msoffice"/> + </mime-type> + <mime-type type="application/vnd.openxmlformats-officedocument.presentationml.template"> <comment>Office Open XML Presentation Template</comment> <glob pattern="*.potx"/> @@ -315,30 +281,72 @@ <sub-class-of type="application/x-tika-ooxml"/> </mime-type> + <mime-type type="application/vnd.ms-powerpoint.slideshow.macroenabled.12"> + <comment>Office Open XML Presentation Slideshow (macro-enabled)</comment> + <glob pattern="*.ppsm"/> + <sub-class-of type="application/x-tika-msoffice"/> + </mime-type> + + <mime-type type="application/vnd.ms-powerpoint.addin.macroenabled.12"> + <comment>Office Open XML Presentation Add-in (macro-enabled)</comment> + <glob pattern="*.ppam"/> + <sub-class-of type="application/x-tika-msoffice"/> + </mime-type> + <mime-type type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"> - <comment>Office Open XML Spreadsheet</comment> + <comment>Office Open XML Workbook</comment> <glob pattern="*.xlsx"/> <sub-class-of type="application/x-tika-ooxml"/> </mime-type> + <mime-type type="application/vnd.ms-excel.sheet.macroenabled.12"> + <comment>Office Open XML Workbook (macro-enabled)</comment> + <glob pattern="*.xlsm"/> + <sub-class-of type="application/x-tika-ooxml"/> + </mime-type> + <mime-type type="application/vnd.openxmlformats-officedocument.spreadsheetml.template"> - <comment>Office Open XML Spreadsheet Template</comment> + <comment>Office Open XML Workbook Template</comment> <glob pattern="*.xltx"/> <sub-class-of type="application/x-tika-ooxml"/> </mime-type> + <mime-type type="application/vnd.ms-excel.template.macroenabled.12"> + <comment>Office Open XML Workbook Template (macro-enabled)</comment> + <glob pattern="*.xltm"/> + <sub-class-of type="application/x-tika-ooxml"/> + </mime-type> + + <mime-type type="application/vnd.ms-excel.addin.macroenabled.12"> + <comment>Office Open XML Workbook Add-in (macro-enabled)</comment> + <glob pattern="*.xlam"/> + <sub-class-of type="application/x-tika-ooxml"/> + </mime-type> + <mime-type type="application/vnd.openxmlformats-officedocument.wordprocessingml.document"> <comment>Office Open XML Document</comment> <glob pattern="*.docx"/> <sub-class-of type="application/x-tika-ooxml"/> </mime-type> + <mime-type type="application/vnd.ms-word.document.macroenabled.12"> + <comment>Office Open XML Document (macro-enabled)</comment> + <glob pattern="*.docm"/> + <sub-class-of type="application/x-tika-ooxml"/> + </mime-type> + <mime-type type="application/vnd.openxmlformats-officedocument.wordprocessingml.template"> <comment>Office Open XML Document Template</comment> <glob pattern="*.dotx"/> <sub-class-of type="application/x-tika-ooxml"/> </mime-type> + <mime-type type="application/vnd.ms-word.template.macroenabled.12"> + <comment>Office Open XML Document Template (macro-enabled)</comment> + <glob pattern="*.dotm"/> + <sub-class-of type="application/x-tika-ooxml"/> + </mime-type> + <!-- ===================================================================== --> <!-- Open Document Format for Office Applications (OpenDocument) v1.0 --> <!-- http://www.oasis-open.org/specs/index.php#opendocumentv1.0 --> Modified: lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml?rev=789130&r1=789129&r2=789130&view=diff ============================================================================== --- lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml (original) +++ lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml Sun Jun 28 17:31:14 2009 @@ -32,30 +32,29 @@ <mime>application/x-tika-msoffice</mime> <mime>application/vnd.visio</mime> <mime>application/vnd.ms-powerpoint</mime> - <mime>application/vnd.ms-powerpoint.addin.macroenabled.12</mime> - <mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime> - <mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime> - <mime>application/vnd.ms-powerpoint.slideshow.macroenabled.12</mime> <mime>application/vnd.ms-excel</mime> - <mime>application/vnd.ms-excel.sheet.macroenabled.12</mime> - <mime>application/vnd.ms-excel.template.macroenabled.12</mime> - <mime>application/vnd.ms-excel.addin.macroenabled.12</mime> <mime>application/vnd.ms-excel.sheet.binary.macroenabled.12</mime> <mime>application/msword</mime> - <mime>application/vnd.ms-word.document.macroenabled.12</mime> - <mime>application/vnd.ms-word.template.macroenabled.12</mime> <mime>application/vnd.ms-outlook</mime> </parser> <parser name="parse-ooxml" class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser"> <mime>application/x-tika-ooxml</mime> <mime>application/vnd.openxmlformats-officedocument.presentationml.presentation</mime> + <mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime> <mime>application/vnd.openxmlformats-officedocument.presentationml.template</mime> <mime>application/vnd.openxmlformats-officedocument.presentationml.slideshow</mime> + <mime>application/vnd.ms-powerpoint.slideshow.macroenabled.12</mime> + <mime>application/vnd.ms-powerpoint.addin.macroenabled.12</mime> <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</mime> + <mime>application/vnd.ms-excel.sheet.macroenabled.12</mime> <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.template</mime> + <mime>application/vnd.ms-excel.template.macroenabled.12</mime> + <mime>application/vnd.ms-excel.addin.macroenabled.12</mime> <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.document</mime> + <mime>application/vnd.ms-word.document.macroenabled.12</mime> <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.template</mime> + <mime>application/vnd.ms-word.template.macroenabled.12</mime> </parser> <parser name="parse-html" class="org.apache.tika.parser.html.HtmlParser">
