Author: jukka
Date: Sun Jun 28 17:31:14 2009
New Revision: 789130

URL: http://svn.apache.org/viewvc?rev=789130&view=rev
Log:
TIKA-254: parse ooxml templates and macro-enabled formats

Fixed some incorrect type settings from TIKA-253. See 
http://office.microsoft.com/en-us/help/HA100069351033.aspx#3 for a good 
overview of all these file types.

Modified:
    
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
    
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml

Modified: 
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=789130&r1=789129&r2=789130&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
 (original)
+++ 
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
 Sun Jun 28 17:31:14 2009
@@ -188,26 +188,6 @@
     <sub-class-of type="application/x-tika-msoffice"/>
   </mime-type>
 
-  <mime-type type="application/vnd.ms-powerpoint.addin.macroenabled.12">
-    <glob pattern="*.ppam"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-powerpoint.presentation.macroenabled.12">
-    <glob pattern="*.pptm"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-powerpoint.presentation.macroenabled.12">
-    <glob pattern="*.potm"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-powerpoint.slideshow.macroenabled.12">
-    <glob pattern="*.ppsm"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
   <!-- http://www.iana.org/assignments/media-types/application/vnd.ms-excel -->
   <mime-type type="application/vnd.ms-excel">
     <comment>Microsoft Excel Spreadsheet</comment>
@@ -230,24 +210,10 @@
     <sub-class-of type="application/x-tika-msoffice"/>
   </mime-type>
 
-  <mime-type type="application/vnd.ms-excel.sheet.macroenabled.12">
-    <glob pattern="*.xlsm"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-excel.template.macroenabled.12">
-    <glob pattern="*.xltm"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-excel.addin.macroenabled.12">
-    <glob pattern="*.xlam"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
   <mime-type type="application/vnd.ms-excel.sheet.binary.macroenabled.12">
+    <comment>Microsoft Excel 2007 Binary Spreadsheet</comment>
     <glob pattern="*.xlsb"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
+    <sub-class-of type="application/vnd.ms-excel"/>
   </mime-type>
 
   <!-- http://www.iana.org/assignments/media-types/application/msword -->
@@ -272,16 +238,6 @@
     <sub-class-of type="application/x-tika-msoffice"/>
   </mime-type>
 
-  <mime-type type="application/vnd.ms-word.document.macroenabled.12">
-    <glob pattern="*.docm"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-word.template.macroenabled.12">
-    <glob pattern="*.dotm"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
   <mime-type type="application/vnd.ms-outlook">
     <comment>Microsoft Outlook Message</comment>
     <glob pattern="*.msg" />
@@ -300,9 +256,19 @@
   <mime-type 
type="application/vnd.openxmlformats-officedocument.presentationml.presentation">
     <comment>Office Open XML Presentation</comment>
     <glob pattern="*.pptx"/>
+    <glob pattern="*.sldx"/>
+    <glob pattern="*.thmx"/>
     <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
 
+  <mime-type type="application/vnd.ms-powerpoint.presentation.macroenabled.12">
+    <comment>Office Open XML Presentation (macro-enabled)</comment>
+    <glob pattern="*.pptm"/>
+    <glob pattern="*.potm"/>
+    <glob pattern="*.sldm"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
+  </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.presentationml.template">
     <comment>Office Open XML Presentation Template</comment>
     <glob pattern="*.potx"/>
@@ -315,30 +281,72 @@
     <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
 
+  <mime-type type="application/vnd.ms-powerpoint.slideshow.macroenabled.12">
+    <comment>Office Open XML Presentation Slideshow (macro-enabled)</comment>
+    <glob pattern="*.ppsm"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.ms-powerpoint.addin.macroenabled.12">
+    <comment>Office Open XML Presentation Add-in (macro-enabled)</comment>
+    <glob pattern="*.ppam"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
+  </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet">
-    <comment>Office Open XML Spreadsheet</comment>
+    <comment>Office Open XML Workbook</comment>
     <glob pattern="*.xlsx"/>
     <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
 
+  <mime-type type="application/vnd.ms-excel.sheet.macroenabled.12">
+    <comment>Office Open XML Workbook (macro-enabled)</comment>
+    <glob pattern="*.xlsm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.spreadsheetml.template">
-    <comment>Office Open XML Spreadsheet Template</comment>
+    <comment>Office Open XML Workbook Template</comment>
     <glob pattern="*.xltx"/>
     <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
 
+  <mime-type type="application/vnd.ms-excel.template.macroenabled.12">
+    <comment>Office Open XML Workbook Template (macro-enabled)</comment>
+    <glob pattern="*.xltm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type type="application/vnd.ms-excel.addin.macroenabled.12">
+    <comment>Office Open XML Workbook Add-in (macro-enabled)</comment>
+    <glob pattern="*.xlam"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.wordprocessingml.document">
     <comment>Office Open XML Document</comment>
     <glob pattern="*.docx"/>
     <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
 
+  <mime-type type="application/vnd.ms-word.document.macroenabled.12">
+    <comment>Office Open XML Document (macro-enabled)</comment>
+    <glob pattern="*.docm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.wordprocessingml.template">
     <comment>Office Open XML Document Template</comment>
     <glob pattern="*.dotx"/>
     <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
 
+  <mime-type type="application/vnd.ms-word.template.macroenabled.12">
+    <comment>Office Open XML Document Template (macro-enabled)</comment>
+    <glob pattern="*.dotm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
   <!-- ===================================================================== 
-->
   <!-- Open Document Format for Office Applications (OpenDocument) v1.0      
-->
   <!-- http://www.oasis-open.org/specs/index.php#opendocumentv1.0            
-->

Modified: 
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml?rev=789130&r1=789129&r2=789130&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml 
(original)
+++ 
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/tika-config.xml 
Sun Jun 28 17:31:14 2009
@@ -32,30 +32,29 @@
                 <mime>application/x-tika-msoffice</mime>
                 <mime>application/vnd.visio</mime>
                 <mime>application/vnd.ms-powerpoint</mime>
-                
<mime>application/vnd.ms-powerpoint.addin.macroenabled.12</mime>
-                
<mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime>
-                
<mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime>
-                
<mime>application/vnd.ms-powerpoint.slideshow.macroenabled.12</mime>
                 <mime>application/vnd.ms-excel</mime>
-                <mime>application/vnd.ms-excel.sheet.macroenabled.12</mime>
-                <mime>application/vnd.ms-excel.template.macroenabled.12</mime>
-                <mime>application/vnd.ms-excel.addin.macroenabled.12</mime>
                 
<mime>application/vnd.ms-excel.sheet.binary.macroenabled.12</mime>
                 <mime>application/msword</mime>
-                <mime>application/vnd.ms-word.document.macroenabled.12</mime>
-                <mime>application/vnd.ms-word.template.macroenabled.12</mime>
                 <mime>application/vnd.ms-outlook</mime>
         </parser>
         
         <parser name="parse-ooxml" 
class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
                 <mime>application/x-tika-ooxml</mime>
                 
<mime>application/vnd.openxmlformats-officedocument.presentationml.presentation</mime>
+                
<mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime>
                 
<mime>application/vnd.openxmlformats-officedocument.presentationml.template</mime>
                 
<mime>application/vnd.openxmlformats-officedocument.presentationml.slideshow</mime>
+                
<mime>application/vnd.ms-powerpoint.slideshow.macroenabled.12</mime>
+                
<mime>application/vnd.ms-powerpoint.addin.macroenabled.12</mime>
                 
<mime>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</mime>
+                <mime>application/vnd.ms-excel.sheet.macroenabled.12</mime>
                 
<mime>application/vnd.openxmlformats-officedocument.spreadsheetml.template</mime>
+                <mime>application/vnd.ms-excel.template.macroenabled.12</mime>
+                <mime>application/vnd.ms-excel.addin.macroenabled.12</mime>
                 
<mime>application/vnd.openxmlformats-officedocument.wordprocessingml.document</mime>
+                <mime>application/vnd.ms-word.document.macroenabled.12</mime>
                 
<mime>application/vnd.openxmlformats-officedocument.wordprocessingml.template</mime>
+                <mime>application/vnd.ms-word.template.macroenabled.12</mime>
         </parser>
 
         <parser name="parse-html" 
class="org.apache.tika.parser.html.HtmlParser">


Reply via email to