Author: jukka
Date: Sun Sep 27 16:54:07 2009
New Revision: 819342
URL: http://svn.apache.org/viewvc?rev=819342&view=rev
Log:
TIKA-285: Update media type registry to the latest httpd mime type database
Merged the Microsoft Office types with the mime.types information.
Modified:
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
Modified:
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=819342&r1=819341&r2=819342&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
(original)
+++
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
Sun Sep 27 16:54:07 2009
@@ -22,210 +22,6 @@
-->
<mime-info>
- <!-- =====================================================================
-->
- <!-- Microsoft Office binary file formats
-->
- <!-- http://www.microsoft.com/interop/docs/OfficeBinaryFormats.mspx
-->
- <!-- =====================================================================
-->
-
- <mime-type type="application/x-tika-msoffice">
- <magic>
- <match value="0xd0cf11e0a1b11ae1" type="string" offset="0:8" />
- </magic>
- </mime-type>
-
- <!-- http://www.iana.org/assignments/media-types/application/vnd.visio -->
- <mime-type type="application/vnd.visio">
- <comment>Microsoft Visio Diagram</comment>
- <glob pattern="*.vsd" />
- <glob pattern="*.vst" />
- <glob pattern="*.vsw" />
- <glob pattern="*.vss" />
- <sub-class-of type="application/x-tika-msoffice"/>
- </mime-type>
-
- <!--
http://www.iana.org/assignments/media-types/application/vnd.ms-powerpoint -->
- <mime-type type="application/vnd.ms-powerpoint">
- <comment>Microsoft Powerpoint Presentation</comment>
- <glob pattern="*.ppz" />
- <glob pattern="*.ppt" />
- <glob pattern="*.pps" />
- <glob pattern="*.pot" />
- <glob pattern="*.ppa" />
- <alias type="application/mspowerpoint" />
- <sub-class-of type="application/x-tika-msoffice"/>
- </mime-type>
-
- <!-- http://www.iana.org/assignments/media-types/application/vnd.ms-excel -->
- <mime-type type="application/vnd.ms-excel">
- <comment>Microsoft Excel Spreadsheet</comment>
- <magic priority="50">
- <match value="Microsoft\ Excel\ 5.0\ Worksheet" type="string"
offset="2080" />
- <match value="Foglio\ di\ lavoro\ Microsoft\ Exce" type="string"
offset="2080" />
- <match value="Biff5" type="string" offset="2114" />
- <match value="Biff5" type="string" offset="2121" />
- <match value="\x09\x04\x06\x00\x00\x00\x10\x00" type="string" offset="0"
/>
- </magic>
- <glob pattern="*.xls" />
- <glob pattern="*.xlc" />
- <glob pattern="*.xll" />
- <glob pattern="*.xlm" />
- <glob pattern="*.xlw" />
- <glob pattern="*.xla" />
- <glob pattern="*.xlt" />
- <glob pattern="*.xld" />
- <alias type="application/msexcel" />
- <sub-class-of type="application/x-tika-msoffice"/>
- </mime-type>
-
- <mime-type type="application/vnd.ms-excel.sheet.binary.macroenabled.12">
- <comment>Microsoft Excel 2007 Binary Spreadsheet</comment>
- <glob pattern="*.xlsb"/>
- <sub-class-of type="application/vnd.ms-excel"/>
- </mime-type>
-
- <!-- http://www.iana.org/assignments/media-types/application/msword -->
- <mime-type type="application/msword">
- <comment>Microsoft Word Document</comment>
- <magic priority="50">
- <match value="Microsoft\ Word\ 6.0\ Document" type="string"
offset="2080" />
- <match value="Documento\ Microsoft\ Word\ 6" type="string" offset="2080"
/>
- <match value="MSWordDoc" type="string" offset="2112" />
- <match value="0x31be0000" type="big32" offset="0" />
- <match value="PO^Q`" type="string" offset="0" />
- <match value="\376\067\0\043" type="string" offset="0" />
- <match value="\333\245-\0\0\0" type="string" offset="0" />
- <match value="\354\245\301" type="string" offset="512" />
- <match value="\320\317\021\340\241\261\032\341" type="string" offset="0"
/>
- <match value="\224\246\056" type="string" offset="0" />
- <match value="R\0o\0o\0t\0\ \0E\0n\0t\0r\0y" type="string" offset="512"
/>
- </magic>
- <glob pattern="*.doc" />
- <glob pattern="*.dot" />
- <alias type="application/vnd.ms-word" />
- <sub-class-of type="application/x-tika-msoffice"/>
- </mime-type>
-
- <mime-type type="application/vnd.ms-outlook">
- <comment>Microsoft Outlook Message</comment>
- <glob pattern="*.msg" />
- <sub-class-of type="application/x-tika-msoffice"/>
- </mime-type>
-
- <!-- =====================================================================
-->
- <!-- Office Open XML file formats
-->
- <!-- http://www.ecma-international.org/publications/standards/Ecma-376.htm
-->
- <!-- =====================================================================
-->
-
- <mime-type type="application/x-tika-ooxml">
- <sub-class-of type="application/zip"/>
- <magic priority="50">
- <match value="PK\003\004" type="string" offset="0">
- <match value="[Content_Types].xml" type="string" offset="30"/>
- </match>
- </magic>
- </mime-type>
-
- <mime-type
type="application/vnd.openxmlformats-officedocument.presentationml.presentation">
- <comment>Office Open XML Presentation</comment>
- <glob pattern="*.pptx"/>
- <glob pattern="*.sldx"/>
- <glob pattern="*.thmx"/>
- <sub-class-of type="application/x-tika-ooxml"/>
- </mime-type>
-
- <mime-type type="application/vnd.ms-powerpoint.presentation.macroenabled.12">
- <comment>Office Open XML Presentation (macro-enabled)</comment>
- <glob pattern="*.pptm"/>
- <glob pattern="*.potm"/>
- <glob pattern="*.sldm"/>
- <sub-class-of type="application/x-tika-msoffice"/>
- </mime-type>
-
- <mime-type
type="application/vnd.openxmlformats-officedocument.presentationml.template">
- <comment>Office Open XML Presentation Template</comment>
- <glob pattern="*.potx"/>
- <sub-class-of type="application/x-tika-ooxml"/>
- </mime-type>
-
- <mime-type
type="application/vnd.openxmlformats-officedocument.presentationml.slideshow">
- <comment>Office Open XML Presentation Slideshow</comment>
- <glob pattern="*.ppsx"/>
- <sub-class-of type="application/x-tika-ooxml"/>
- </mime-type>
-
- <mime-type type="application/vnd.ms-powerpoint.slideshow.macroenabled.12">
- <comment>Office Open XML Presentation Slideshow (macro-enabled)</comment>
- <glob pattern="*.ppsm"/>
- <sub-class-of type="application/x-tika-msoffice"/>
- </mime-type>
-
- <mime-type type="application/vnd.ms-powerpoint.addin.macroenabled.12">
- <comment>Office Open XML Presentation Add-in (macro-enabled)</comment>
- <glob pattern="*.ppam"/>
- <sub-class-of type="application/x-tika-msoffice"/>
- </mime-type>
-
- <mime-type
type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet">
- <comment>Office Open XML Workbook</comment>
- <glob pattern="*.xlsx"/>
- <sub-class-of type="application/x-tika-ooxml"/>
- </mime-type>
-
- <mime-type type="application/vnd.ms-excel.sheet.macroenabled.12">
- <comment>Office Open XML Workbook (macro-enabled)</comment>
- <glob pattern="*.xlsm"/>
- <sub-class-of type="application/x-tika-ooxml"/>
- </mime-type>
-
- <mime-type
type="application/vnd.openxmlformats-officedocument.spreadsheetml.template">
- <comment>Office Open XML Workbook Template</comment>
- <glob pattern="*.xltx"/>
- <sub-class-of type="application/x-tika-ooxml"/>
- </mime-type>
-
- <mime-type type="application/vnd.ms-excel.template.macroenabled.12">
- <comment>Office Open XML Workbook Template (macro-enabled)</comment>
- <glob pattern="*.xltm"/>
- <sub-class-of type="application/x-tika-ooxml"/>
- </mime-type>
-
- <mime-type type="application/vnd.ms-excel.addin.macroenabled.12">
- <comment>Office Open XML Workbook Add-in (macro-enabled)</comment>
- <glob pattern="*.xlam"/>
- <sub-class-of type="application/x-tika-ooxml"/>
- </mime-type>
-
- <mime-type
type="application/vnd.openxmlformats-officedocument.wordprocessingml.document">
- <comment>Office Open XML Document</comment>
- <glob pattern="*.docx"/>
- <sub-class-of type="application/x-tika-ooxml"/>
- </mime-type>
-
- <mime-type type="application/vnd.ms-word.document.macroenabled.12">
- <comment>Office Open XML Document (macro-enabled)</comment>
- <glob pattern="*.docm"/>
- <sub-class-of type="application/x-tika-ooxml"/>
- </mime-type>
-
- <mime-type
type="application/vnd.openxmlformats-officedocument.wordprocessingml.template">
- <comment>Office Open XML Document Template</comment>
- <glob pattern="*.dotx"/>
- <sub-class-of type="application/x-tika-ooxml"/>
- </mime-type>
-
- <mime-type type="application/vnd.ms-word.template.macroenabled.12">
- <comment>Office Open XML Document Template (macro-enabled)</comment>
- <glob pattern="*.dotm"/>
- <sub-class-of type="application/x-tika-ooxml"/>
- </mime-type>
-
-
-
-
-
-
-
-
<mime-type type="application/activemessage"/>
<mime-type type="application/andrew-inset">
<glob pattern="*.ez"/>
@@ -400,10 +196,29 @@
<mime-type type="application/mpeg4-generic"/>
<mime-type type="application/mpeg4-iod"/>
<mime-type type="application/mpeg4-iod-xmt"/>
+
+ <!-- http://www.iana.org/assignments/media-types/application/msword -->
<mime-type type="application/msword">
+ <alias type="application/vnd.ms-word"/>
+ <comment>Microsoft Word Document</comment>
+ <magic priority="50">
+ <match value="Microsoft\ Word\ 6.0\ Document" type="string"
offset="2080"/>
+ <match value="Documento\ Microsoft\ Word\ 6" type="string"
offset="2080"/>
+ <match value="MSWordDoc" type="string" offset="2112"/>
+ <match value="0x31be0000" type="big32" offset="0"/>
+ <match value="PO^Q`" type="string" offset="0"/>
+ <match value="\376\067\0\043" type="string" offset="0"/>
+ <match value="\333\245-\0\0\0" type="string" offset="0"/>
+ <match value="\354\245\301" type="string" offset="512"/>
+ <match value="\320\317\021\340\241\261\032\341" type="string"
offset="0"/>
+ <match value="\224\246\056" type="string" offset="0"/>
+ <match value="R\0o\0o\0t\0\ \0E\0n\0t\0r\0y" type="string" offset="512"/>
+ </magic>
<glob pattern="*.doc"/>
<glob pattern="*.dot"/>
+ <sub-class-of type="application/x-tika-msoffice"/>
</mime-type>
+
<mime-type type="application/mxf">
<glob pattern="*.mxf"/>
</mime-type>
@@ -1284,26 +1099,52 @@
<mime-type type="application/vnd.ms-cab-compressed">
<glob pattern="*.cab"/>
</mime-type>
+
+ <!-- http://www.iana.org/assignments/media-types/application/vnd.ms-excel -->
<mime-type type="application/vnd.ms-excel">
+ <alias type="application/msexcel" />
+ <comment>Microsoft Excel Spreadsheet</comment>
+ <magic priority="50">
+ <match value="Microsoft\ Excel\ 5.0\ Worksheet" type="string"
offset="2080"/>
+ <match value="Foglio\ di\ lavoro\ Microsoft\ Exce" type="string"
offset="2080"/>
+ <match value="Biff5" type="string" offset="2114"/>
+ <match value="Biff5" type="string" offset="2121"/>
+ <match value="\x09\x04\x06\x00\x00\x00\x10\x00" type="string"
offset="0"/>
+ </magic>
<glob pattern="*.xls"/>
<glob pattern="*.xlm"/>
<glob pattern="*.xla"/>
<glob pattern="*.xlc"/>
<glob pattern="*.xlt"/>
<glob pattern="*.xlw"/>
+ <glob pattern="*.xll"/>
+ <glob pattern="*.xld"/>
+ <sub-class-of type="application/x-tika-msoffice"/>
</mime-type>
+
<mime-type type="application/vnd.ms-excel.addin.macroenabled.12">
+ <comment>Office Open XML Workbook Add-in (macro-enabled)</comment>
<glob pattern="*.xlam"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
- <mime-type type="application/vnd.ms-excel.sheet.binary.macroenabled.12">
- <glob pattern="*.xlsb"/>
- </mime-type>
+
<mime-type type="application/vnd.ms-excel.sheet.macroenabled.12">
+ <comment>Office Open XML Workbook (macro-enabled)</comment>
<glob pattern="*.xlsm"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
+
+ <mime-type type="application/vnd.ms-excel.sheet.binary.macroenabled.12">
+ <comment>Microsoft Excel 2007 Binary Spreadsheet</comment>
+ <glob pattern="*.xlsb"/>
+ <sub-class-of type="application/vnd.ms-excel"/>
+ </mime-type>
+
<mime-type type="application/vnd.ms-excel.template.macroenabled.12">
<glob pattern="*.xltm"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
+
<mime-type type="application/vnd.ms-fontobject">
<glob pattern="*.eot"/>
</mime-type>
@@ -1316,6 +1157,13 @@
<mime-type type="application/vnd.ms-lrm">
<glob pattern="*.lrm"/>
</mime-type>
+
+ <mime-type type="application/vnd.ms-outlook">
+ <comment>Microsoft Outlook Message</comment>
+ <glob pattern="*.msg" />
+ <sub-class-of type="application/x-tika-msoffice"/>
+ </mime-type>
+
<mime-type type="application/vnd.ms-pki.seccat">
<glob pattern="*.cat"/>
</mime-type>
@@ -1323,43 +1171,66 @@
<glob pattern="*.stl"/>
</mime-type>
<mime-type type="application/vnd.ms-playready.initiator+xml"/>
-<!--
+
+ <!--
http://www.iana.org/assignments/media-types/application/vnd.ms-powerpoint -->
<mime-type type="application/vnd.ms-powerpoint">
+ <alias type="application/mspowerpoint"/>
+ <comment>Microsoft Powerpoint Presentation</comment>
+ <glob pattern="*.ppz"/>
<glob pattern="*.ppt"/>
<glob pattern="*.pps"/>
<glob pattern="*.pot"/>
+ <glob pattern="*.ppa"/>
+ <sub-class-of type="application/x-tika-msoffice"/>
</mime-type>
+
<mime-type type="application/vnd.ms-powerpoint.addin.macroenabled.12">
+ <comment>Office Open XML Presentation Add-in (macro-enabled)</comment>
<glob pattern="*.ppam"/>
+ <sub-class-of type="application/x-tika-msoffice"/>
</mime-type>
+
<mime-type type="application/vnd.ms-powerpoint.presentation.macroenabled.12">
+ <comment>Office Open XML Presentation (macro-enabled)</comment>
<glob pattern="*.pptm"/>
+ <glob pattern="*.potm"/>
+ <sub-class-of type="application/x-tika-msoffice"/>
</mime-type>
+
<mime-type type="application/vnd.ms-powerpoint.slide.macroenabled.12">
<glob pattern="*.sldm"/>
+ <sub-class-of type="application/x-tika-msoffice"/>
</mime-type>
+
<mime-type type="application/vnd.ms-powerpoint.slideshow.macroenabled.12">
+ <comment>Office Open XML Presentation Slideshow (macro-enabled)</comment>
<glob pattern="*.ppsm"/>
+ <sub-class-of type="application/x-tika-msoffice"/>
</mime-type>
- <mime-type type="application/vnd.ms-powerpoint.template.macroenabled.12">
- <glob pattern="*.potm"/>
- </mime-type>
+
<mime-type type="application/vnd.ms-project">
<glob pattern="*.mpp"/>
<glob pattern="*.mpt"/>
</mime-type>
--->
+
<mime-type type="application/vnd.ms-tnef"/>
<mime-type type="application/vnd.ms-wmdrm.lic-chlg-req"/>
<mime-type type="application/vnd.ms-wmdrm.lic-resp"/>
<mime-type type="application/vnd.ms-wmdrm.meter-chlg-req"/>
<mime-type type="application/vnd.ms-wmdrm.meter-resp"/>
+
<mime-type type="application/vnd.ms-word.document.macroenabled.12">
+ <comment>Office Open XML Document (macro-enabled)</comment>
<glob pattern="*.docm"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
+
<mime-type type="application/vnd.ms-word.template.macroenabled.12">
+ <comment>Office Open XML Document Template (macro-enabled)</comment>
<glob pattern="*.dotm"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
+
<mime-type type="application/vnd.ms-works">
<glob pattern="*.wps"/>
<glob pattern="*.wks"/>
@@ -1673,35 +1544,70 @@
<mime-type type="application/vnd.omads-file+xml"/>
<mime-type type="application/vnd.omads-folder+xml"/>
<mime-type type="application/vnd.omaloc-supl-init"/>
-<!--
+
<mime-type type="application/vnd.openofficeorg.extension">
<glob pattern="*.oxt"/>
</mime-type>
+
<mime-type
type="application/vnd.openxmlformats-officedocument.presentationml.presentation">
- <glob pattern="pptx"/>
+ <comment>Office Open XML Presentation</comment>
+ <glob pattern="*.pptx"/>
+ <glob pattern="*.thmx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
+
<mime-type
type="application/vnd.openxmlformats-officedocument.presentationml.slide">
<glob pattern="*.sldx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
+
<mime-type
type="application/vnd.openxmlformats-officedocument.presentationml.slideshow">
<glob pattern="*.ppsx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
+
<mime-type
type="application/vnd.openxmlformats-officedocument.presentationml.template">
+ <comment>Office Open XML Presentation Template</comment>
<glob pattern="*.potx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
+ <mime-type
type="application/vnd.openxmlformats-officedocument.presentationml.slideshow">
+ <comment>Office Open XML Presentation Slideshow</comment>
+ <glob pattern="*.ppsx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
+
<mime-type
type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet">
+ <comment>Office Open XML Workbook</comment>
<glob pattern="*.xlsx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
+
<mime-type
type="application/vnd.openxmlformats-officedocument.spreadsheetml.template">
+ <comment>Office Open XML Workbook Template</comment>
<glob pattern="*.xltx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
+
+ <mime-type type="application/vnd.ms-excel.template.macroenabled.12">
+ <comment>Office Open XML Workbook Template (macro-enabled)</comment>
+ <glob pattern="*.xltm"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
+ </mime-type>
+
<mime-type
type="application/vnd.openxmlformats-officedocument.wordprocessingml.document">
+ <comment>Office Open XML Document</comment>
<glob pattern="*.docx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
+
<mime-type
type="application/vnd.openxmlformats-officedocument.wordprocessingml.template">
+ <comment>Office Open XML Document Template</comment>
<glob pattern="*.dotx"/>
+ <sub-class-of type="application/x-tika-ooxml"/>
</mime-type>
--->
+
<mime-type type="application/vnd.osa.netdeploy"/>
<mime-type type="application/vnd.osgi.bundle"/>
<mime-type type="application/vnd.osgi.dp">
@@ -1979,12 +1885,17 @@
<mime-type type="application/vnd.vd-study"/>
<mime-type type="application/vnd.vectorworks"/>
<mime-type type="application/vnd.vidsoft.vidconference"/>
+
+ <!-- http://www.iana.org/assignments/media-types/application/vnd.visio -->
<mime-type type="application/vnd.visio">
+ <comment>Microsoft Visio Diagram</comment>
<glob pattern="*.vsd"/>
<glob pattern="*.vst"/>
<glob pattern="*.vss"/>
<glob pattern="*.vsw"/>
+ <sub-class-of type="application/x-tika-msoffice"/>
</mime-type>
+
<mime-type type="application/vnd.visionary">
<glob pattern="*.vis"/>
</mime-type>
@@ -2573,6 +2484,29 @@
<glob pattern="*.texi"/>
</mime-type>
+ <!-- =================================================================== -->
+ <!-- Microsoft Office binary file formats -->
+ <!-- http://www.microsoft.com/interop/docs/OfficeBinaryFormats.mspx -->
+ <!-- =================================================================== -->
+ <mime-type type="application/x-tika-msoffice">
+ <magic>
+ <match value="0xd0cf11e0a1b11ae1" type="string" offset="0:8"/>
+ </magic>
+ </mime-type>
+
+ <!-- =================================================================== -->
+ <!-- Office Open XML file formats -->
+ <!-- http://www.ecma-international.org/publications/standards/Ecma-376.htm
-->
+ <!-- =================================================================== -->
+ <mime-type type="application/x-tika-ooxml">
+ <sub-class-of type="application/zip"/>
+ <magic priority="50">
+ <match value="PK\003\004" type="string" offset="0">
+ <match value="[Content_Types].xml" type="string" offset="30"/>
+ </match>
+ </magic>
+ </mime-type>
+
<mime-type type="application/x-ustar">
<glob pattern="*.ustar"/>
</mime-type>