Author: jukka
Date: Fri Oct 16 15:14:50 2009
New Revision: 825928
URL: http://svn.apache.org/viewvc?rev=825928&view=rev
Log:
TIKA-302: patch: initial support for ePUB
More detailed application/epub+zip type information. We can now reliably
autodetect the format.
Modified:
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
Modified:
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=825928&r1=825927&r2=825928&view=diff
==============================================================================
---
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
(original)
+++
lucene/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
Fri Oct 16 15:14:50 2009
@@ -81,9 +81,18 @@
<glob pattern="*.emma"/>
</mime-type>
<mime-type type="application/epp+xml"/>
+
<mime-type type="application/epub+zip">
+ <acronym>EPUB</acronym>
+ <comment>Electronic Publication</comment>
+ <magic priority="50">
+ <match value="PK\003\004" type="string" offset="0">
+ <match value="mimetypeapplication/epub+zip" type="string" offset="30"/>
+ </match>
+ </magic>
<glob pattern="*.epub"/>
</mime-type>
+
<mime-type type="application/eshop"/>
<mime-type type="application/example"/>
<mime-type type="application/fastinfoset"/>
Modified:
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
URL:
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java?rev=825928&r1=825927&r2=825928&view=diff
==============================================================================
---
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
(original)
+++
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
Fri Oct 16 15:14:50 2009
@@ -123,6 +123,10 @@
assertAutoDetect(resource, badResource, type, wrongMimeType, content);
}
+ public void testEpub() throws Exception {
+ assertAutoDetect(
+ "testEPUB.epub", "application/epub+zip", "");
+ }
public void testExcel() throws Exception {
assertAutoDetect("testEXCEL.xls", EXCEL, "Sample Excel Worksheet");