Author: jukka
Date: Sun Sep 27 16:54:07 2009
New Revision: 819342

URL: http://svn.apache.org/viewvc?rev=819342&view=rev
Log:
TIKA-285: Update media type registry to the latest httpd mime type database

Merged the Microsoft Office types with the mime.types information.

Modified:
    
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml

Modified: 
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=819342&r1=819341&r2=819342&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
 (original)
+++ 
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
 Sun Sep 27 16:54:07 2009
@@ -22,210 +22,6 @@
 -->
 <mime-info>
 
-  <!-- ===================================================================== 
-->
-  <!-- Microsoft Office binary file formats                                  
-->
-  <!-- http://www.microsoft.com/interop/docs/OfficeBinaryFormats.mspx        
-->
-  <!-- ===================================================================== 
-->
-
-  <mime-type type="application/x-tika-msoffice">
-    <magic>
-      <match value="0xd0cf11e0a1b11ae1" type="string" offset="0:8" />
-    </magic>
-  </mime-type>
-
-  <!-- http://www.iana.org/assignments/media-types/application/vnd.visio -->
-  <mime-type type="application/vnd.visio">
-    <comment>Microsoft Visio Diagram</comment>
-    <glob pattern="*.vsd" />
-    <glob pattern="*.vst" />
-    <glob pattern="*.vsw" />
-    <glob pattern="*.vss" />
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <!-- 
http://www.iana.org/assignments/media-types/application/vnd.ms-powerpoint -->
-  <mime-type type="application/vnd.ms-powerpoint">
-    <comment>Microsoft Powerpoint Presentation</comment>
-    <glob pattern="*.ppz" />
-    <glob pattern="*.ppt" />
-    <glob pattern="*.pps" />
-    <glob pattern="*.pot" />
-    <glob pattern="*.ppa" />
-    <alias type="application/mspowerpoint" />
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <!-- http://www.iana.org/assignments/media-types/application/vnd.ms-excel -->
-  <mime-type type="application/vnd.ms-excel">
-    <comment>Microsoft Excel Spreadsheet</comment>
-    <magic priority="50">
-      <match value="Microsoft\ Excel\ 5.0\ Worksheet" type="string" 
offset="2080" />
-      <match value="Foglio\ di\ lavoro\ Microsoft\ Exce" type="string" 
offset="2080" />
-      <match value="Biff5" type="string" offset="2114" />
-      <match value="Biff5" type="string" offset="2121" />
-      <match value="\x09\x04\x06\x00\x00\x00\x10\x00" type="string" offset="0" 
/>
-    </magic>
-    <glob pattern="*.xls" />
-    <glob pattern="*.xlc" />
-    <glob pattern="*.xll" />
-    <glob pattern="*.xlm" />
-    <glob pattern="*.xlw" />
-    <glob pattern="*.xla" />
-    <glob pattern="*.xlt" />
-    <glob pattern="*.xld" />
-    <alias type="application/msexcel" />
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-excel.sheet.binary.macroenabled.12">
-    <comment>Microsoft Excel 2007 Binary Spreadsheet</comment>
-    <glob pattern="*.xlsb"/>
-    <sub-class-of type="application/vnd.ms-excel"/>
-  </mime-type>
-
-  <!-- http://www.iana.org/assignments/media-types/application/msword -->
-  <mime-type type="application/msword">
-    <comment>Microsoft Word Document</comment>
-    <magic priority="50">
-      <match value="Microsoft\ Word\ 6.0\ Document" type="string" 
offset="2080" />
-      <match value="Documento\ Microsoft\ Word\ 6" type="string" offset="2080" 
/>
-      <match value="MSWordDoc" type="string" offset="2112" />
-      <match value="0x31be0000" type="big32" offset="0" />
-      <match value="PO^Q`" type="string" offset="0" />
-      <match value="\376\067\0\043" type="string" offset="0" />
-      <match value="\333\245-\0\0\0" type="string" offset="0" />
-      <match value="\354\245\301" type="string" offset="512" />
-      <match value="\320\317\021\340\241\261\032\341" type="string" offset="0" 
/>
-      <match value="\224\246\056" type="string" offset="0" />
-      <match value="R\0o\0o\0t\0\ \0E\0n\0t\0r\0y" type="string" offset="512" 
/>
-    </magic>
-    <glob pattern="*.doc" />
-    <glob pattern="*.dot" />
-    <alias type="application/vnd.ms-word" />
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-outlook">
-    <comment>Microsoft Outlook Message</comment>
-    <glob pattern="*.msg" />
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <!-- ===================================================================== 
-->
-  <!-- Office Open XML file formats                                          
-->
-  <!-- http://www.ecma-international.org/publications/standards/Ecma-376.htm 
-->
-  <!-- ===================================================================== 
-->
-
-  <mime-type type="application/x-tika-ooxml">
-    <sub-class-of type="application/zip"/>
-    <magic priority="50">
-      <match value="PK\003\004" type="string" offset="0">
-        <match value="[Content_Types].xml" type="string" offset="30"/>
-      </match>
-    </magic>
-  </mime-type>
-
-  <mime-type 
type="application/vnd.openxmlformats-officedocument.presentationml.presentation">
-    <comment>Office Open XML Presentation</comment>
-    <glob pattern="*.pptx"/>
-    <glob pattern="*.sldx"/>
-    <glob pattern="*.thmx"/>
-    <sub-class-of type="application/x-tika-ooxml"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-powerpoint.presentation.macroenabled.12">
-    <comment>Office Open XML Presentation (macro-enabled)</comment>
-    <glob pattern="*.pptm"/>
-    <glob pattern="*.potm"/>
-    <glob pattern="*.sldm"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <mime-type 
type="application/vnd.openxmlformats-officedocument.presentationml.template">
-    <comment>Office Open XML Presentation Template</comment>
-    <glob pattern="*.potx"/>
-    <sub-class-of type="application/x-tika-ooxml"/>
-  </mime-type>
-
-  <mime-type 
type="application/vnd.openxmlformats-officedocument.presentationml.slideshow">
-    <comment>Office Open XML Presentation Slideshow</comment>
-    <glob pattern="*.ppsx"/>
-    <sub-class-of type="application/x-tika-ooxml"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-powerpoint.slideshow.macroenabled.12">
-    <comment>Office Open XML Presentation Slideshow (macro-enabled)</comment>
-    <glob pattern="*.ppsm"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-powerpoint.addin.macroenabled.12">
-    <comment>Office Open XML Presentation Add-in (macro-enabled)</comment>
-    <glob pattern="*.ppam"/>
-    <sub-class-of type="application/x-tika-msoffice"/>
-  </mime-type>
-
-  <mime-type 
type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet">
-    <comment>Office Open XML Workbook</comment>
-    <glob pattern="*.xlsx"/>
-    <sub-class-of type="application/x-tika-ooxml"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-excel.sheet.macroenabled.12">
-    <comment>Office Open XML Workbook (macro-enabled)</comment>
-    <glob pattern="*.xlsm"/>
-    <sub-class-of type="application/x-tika-ooxml"/>
-  </mime-type>
-
-  <mime-type 
type="application/vnd.openxmlformats-officedocument.spreadsheetml.template">
-    <comment>Office Open XML Workbook Template</comment>
-    <glob pattern="*.xltx"/>
-    <sub-class-of type="application/x-tika-ooxml"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-excel.template.macroenabled.12">
-    <comment>Office Open XML Workbook Template (macro-enabled)</comment>
-    <glob pattern="*.xltm"/>
-    <sub-class-of type="application/x-tika-ooxml"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-excel.addin.macroenabled.12">
-    <comment>Office Open XML Workbook Add-in (macro-enabled)</comment>
-    <glob pattern="*.xlam"/>
-    <sub-class-of type="application/x-tika-ooxml"/>
-  </mime-type>
-
-  <mime-type 
type="application/vnd.openxmlformats-officedocument.wordprocessingml.document">
-    <comment>Office Open XML Document</comment>
-    <glob pattern="*.docx"/>
-    <sub-class-of type="application/x-tika-ooxml"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-word.document.macroenabled.12">
-    <comment>Office Open XML Document (macro-enabled)</comment>
-    <glob pattern="*.docm"/>
-    <sub-class-of type="application/x-tika-ooxml"/>
-  </mime-type>
-
-  <mime-type 
type="application/vnd.openxmlformats-officedocument.wordprocessingml.template">
-    <comment>Office Open XML Document Template</comment>
-    <glob pattern="*.dotx"/>
-    <sub-class-of type="application/x-tika-ooxml"/>
-  </mime-type>
-
-  <mime-type type="application/vnd.ms-word.template.macroenabled.12">
-    <comment>Office Open XML Document Template (macro-enabled)</comment>
-    <glob pattern="*.dotm"/>
-    <sub-class-of type="application/x-tika-ooxml"/>
-  </mime-type>
-
-
-
-
-
-
-
-
   <mime-type type="application/activemessage"/>
   <mime-type type="application/andrew-inset">
     <glob pattern="*.ez"/>
@@ -400,10 +196,29 @@
   <mime-type type="application/mpeg4-generic"/>
   <mime-type type="application/mpeg4-iod"/>
   <mime-type type="application/mpeg4-iod-xmt"/>
+
+  <!-- http://www.iana.org/assignments/media-types/application/msword -->
   <mime-type type="application/msword">
+    <alias type="application/vnd.ms-word"/>
+    <comment>Microsoft Word Document</comment>
+    <magic priority="50">
+      <match value="Microsoft\ Word\ 6.0\ Document" type="string" 
offset="2080"/>
+      <match value="Documento\ Microsoft\ Word\ 6" type="string" 
offset="2080"/>
+      <match value="MSWordDoc" type="string" offset="2112"/>
+      <match value="0x31be0000" type="big32" offset="0"/>
+      <match value="PO^Q`" type="string" offset="0"/>
+      <match value="\376\067\0\043" type="string" offset="0"/>
+      <match value="\333\245-\0\0\0" type="string" offset="0"/>
+      <match value="\354\245\301" type="string" offset="512"/>
+      <match value="\320\317\021\340\241\261\032\341" type="string" 
offset="0"/>
+      <match value="\224\246\056" type="string" offset="0"/>
+      <match value="R\0o\0o\0t\0\ \0E\0n\0t\0r\0y" type="string" offset="512"/>
+    </magic>
     <glob pattern="*.doc"/>
     <glob pattern="*.dot"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
   </mime-type>
+
   <mime-type type="application/mxf">
     <glob pattern="*.mxf"/>
   </mime-type>
@@ -1284,26 +1099,52 @@
   <mime-type type="application/vnd.ms-cab-compressed">
     <glob pattern="*.cab"/>
   </mime-type>
+
+  <!-- http://www.iana.org/assignments/media-types/application/vnd.ms-excel -->
   <mime-type type="application/vnd.ms-excel">
+    <alias type="application/msexcel" />
+    <comment>Microsoft Excel Spreadsheet</comment>
+    <magic priority="50">
+      <match value="Microsoft\ Excel\ 5.0\ Worksheet" type="string" 
offset="2080"/>
+      <match value="Foglio\ di\ lavoro\ Microsoft\ Exce" type="string" 
offset="2080"/>
+      <match value="Biff5" type="string" offset="2114"/>
+      <match value="Biff5" type="string" offset="2121"/>
+      <match value="\x09\x04\x06\x00\x00\x00\x10\x00" type="string" 
offset="0"/>
+    </magic>
     <glob pattern="*.xls"/>
     <glob pattern="*.xlm"/>
     <glob pattern="*.xla"/>
     <glob pattern="*.xlc"/>
     <glob pattern="*.xlt"/>
     <glob pattern="*.xlw"/>
+    <glob pattern="*.xll"/>
+    <glob pattern="*.xld"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
   </mime-type>
+
   <mime-type type="application/vnd.ms-excel.addin.macroenabled.12">
+    <comment>Office Open XML Workbook Add-in (macro-enabled)</comment>
     <glob pattern="*.xlam"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
-  <mime-type type="application/vnd.ms-excel.sheet.binary.macroenabled.12">
-    <glob pattern="*.xlsb"/>
-  </mime-type>
+
   <mime-type type="application/vnd.ms-excel.sheet.macroenabled.12">
+    <comment>Office Open XML Workbook (macro-enabled)</comment>
     <glob pattern="*.xlsm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
+
+  <mime-type type="application/vnd.ms-excel.sheet.binary.macroenabled.12">
+    <comment>Microsoft Excel 2007 Binary Spreadsheet</comment>
+    <glob pattern="*.xlsb"/>
+    <sub-class-of type="application/vnd.ms-excel"/>
+  </mime-type>
+
   <mime-type type="application/vnd.ms-excel.template.macroenabled.12">
     <glob pattern="*.xltm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
+
   <mime-type type="application/vnd.ms-fontobject">
     <glob pattern="*.eot"/>
   </mime-type>
@@ -1316,6 +1157,13 @@
   <mime-type type="application/vnd.ms-lrm">
     <glob pattern="*.lrm"/>
   </mime-type>
+
+  <mime-type type="application/vnd.ms-outlook">
+    <comment>Microsoft Outlook Message</comment>
+    <glob pattern="*.msg" />
+    <sub-class-of type="application/x-tika-msoffice"/>
+  </mime-type>
+
   <mime-type type="application/vnd.ms-pki.seccat">
     <glob pattern="*.cat"/>
   </mime-type>
@@ -1323,43 +1171,66 @@
     <glob pattern="*.stl"/>
   </mime-type>
   <mime-type type="application/vnd.ms-playready.initiator+xml"/>
-<!--
+
+  <!-- 
http://www.iana.org/assignments/media-types/application/vnd.ms-powerpoint -->
   <mime-type type="application/vnd.ms-powerpoint">
+    <alias type="application/mspowerpoint"/>
+    <comment>Microsoft Powerpoint Presentation</comment>
+    <glob pattern="*.ppz"/>
     <glob pattern="*.ppt"/>
     <glob pattern="*.pps"/>
     <glob pattern="*.pot"/>
+    <glob pattern="*.ppa"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
   </mime-type>
+
   <mime-type type="application/vnd.ms-powerpoint.addin.macroenabled.12">
+    <comment>Office Open XML Presentation Add-in (macro-enabled)</comment>
     <glob pattern="*.ppam"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
   </mime-type>
+
   <mime-type type="application/vnd.ms-powerpoint.presentation.macroenabled.12">
+    <comment>Office Open XML Presentation (macro-enabled)</comment>
     <glob pattern="*.pptm"/>
+    <glob pattern="*.potm"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
   </mime-type>
+
   <mime-type type="application/vnd.ms-powerpoint.slide.macroenabled.12">
     <glob pattern="*.sldm"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
   </mime-type>
+
   <mime-type type="application/vnd.ms-powerpoint.slideshow.macroenabled.12">
+    <comment>Office Open XML Presentation Slideshow (macro-enabled)</comment>
     <glob pattern="*.ppsm"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
   </mime-type>
-  <mime-type type="application/vnd.ms-powerpoint.template.macroenabled.12">
-    <glob pattern="*.potm"/>
-  </mime-type>
+
   <mime-type type="application/vnd.ms-project">
     <glob pattern="*.mpp"/>
     <glob pattern="*.mpt"/>
   </mime-type>
--->
+
   <mime-type type="application/vnd.ms-tnef"/>
   <mime-type type="application/vnd.ms-wmdrm.lic-chlg-req"/>
   <mime-type type="application/vnd.ms-wmdrm.lic-resp"/>
   <mime-type type="application/vnd.ms-wmdrm.meter-chlg-req"/>
   <mime-type type="application/vnd.ms-wmdrm.meter-resp"/>
+
   <mime-type type="application/vnd.ms-word.document.macroenabled.12">
+    <comment>Office Open XML Document (macro-enabled)</comment>
     <glob pattern="*.docm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
+
   <mime-type type="application/vnd.ms-word.template.macroenabled.12">
+    <comment>Office Open XML Document Template (macro-enabled)</comment>
     <glob pattern="*.dotm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
+
   <mime-type type="application/vnd.ms-works">
     <glob pattern="*.wps"/>
     <glob pattern="*.wks"/>
@@ -1673,35 +1544,70 @@
   <mime-type type="application/vnd.omads-file+xml"/>
   <mime-type type="application/vnd.omads-folder+xml"/>
   <mime-type type="application/vnd.omaloc-supl-init"/>
-<!--
+
   <mime-type type="application/vnd.openofficeorg.extension">
     <glob pattern="*.oxt"/>
   </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.presentationml.presentation">
-    <glob pattern="pptx"/>
+    <comment>Office Open XML Presentation</comment>
+    <glob pattern="*.pptx"/>
+    <glob pattern="*.thmx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.presentationml.slide">
     <glob pattern="*.sldx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.presentationml.slideshow">
     <glob pattern="*.ppsx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.presentationml.template">
+    <comment>Office Open XML Presentation Template</comment>
     <glob pattern="*.potx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
+  <mime-type 
type="application/vnd.openxmlformats-officedocument.presentationml.slideshow">
+    <comment>Office Open XML Presentation Slideshow</comment>
+    <glob pattern="*.ppsx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet">
+    <comment>Office Open XML Workbook</comment>
     <glob pattern="*.xlsx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.spreadsheetml.template">
+    <comment>Office Open XML Workbook Template</comment>
     <glob pattern="*.xltx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
+
+  <mime-type type="application/vnd.ms-excel.template.macroenabled.12">
+    <comment>Office Open XML Workbook Template (macro-enabled)</comment>
+    <glob pattern="*.xltm"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
+  </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.wordprocessingml.document">
+    <comment>Office Open XML Document</comment>
     <glob pattern="*.docx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
+
   <mime-type 
type="application/vnd.openxmlformats-officedocument.wordprocessingml.template">
+    <comment>Office Open XML Document Template</comment>
     <glob pattern="*.dotx"/>
+    <sub-class-of type="application/x-tika-ooxml"/>
   </mime-type>
--->
+
   <mime-type type="application/vnd.osa.netdeploy"/>
   <mime-type type="application/vnd.osgi.bundle"/>
   <mime-type type="application/vnd.osgi.dp">
@@ -1979,12 +1885,17 @@
   <mime-type type="application/vnd.vd-study"/>
   <mime-type type="application/vnd.vectorworks"/>
   <mime-type type="application/vnd.vidsoft.vidconference"/>
+
+  <!-- http://www.iana.org/assignments/media-types/application/vnd.visio -->
   <mime-type type="application/vnd.visio">
+    <comment>Microsoft Visio Diagram</comment>
     <glob pattern="*.vsd"/>
     <glob pattern="*.vst"/>
     <glob pattern="*.vss"/>
     <glob pattern="*.vsw"/>
+    <sub-class-of type="application/x-tika-msoffice"/>
   </mime-type>
+
   <mime-type type="application/vnd.visionary">
     <glob pattern="*.vis"/>
   </mime-type>
@@ -2573,6 +2484,29 @@
     <glob pattern="*.texi"/>
   </mime-type>
 
+  <!-- =================================================================== -->
+  <!-- Microsoft Office binary file formats                                -->
+  <!-- http://www.microsoft.com/interop/docs/OfficeBinaryFormats.mspx      -->
+  <!-- =================================================================== -->
+  <mime-type type="application/x-tika-msoffice">
+    <magic>
+      <match value="0xd0cf11e0a1b11ae1" type="string" offset="0:8"/>
+    </magic>
+  </mime-type>
+
+  <!-- =================================================================== -->
+  <!-- Office Open XML file formats                                        -->
+  <!-- http://www.ecma-international.org/publications/standards/Ecma-376.htm 
-->
+  <!-- =================================================================== -->
+  <mime-type type="application/x-tika-ooxml">
+    <sub-class-of type="application/zip"/>
+    <magic priority="50">
+      <match value="PK\003\004" type="string" offset="0">
+        <match value="[Content_Types].xml" type="string" offset="30"/>
+      </match>
+    </magic>
+  </mime-type>
+
   <mime-type type="application/x-ustar">
     <glob pattern="*.ustar"/>
   </mime-type>


Reply via email to