Author: nick
Date: Fri Jul 15 17:04:09 2011
New Revision: 1147250
URL: http://svn.apache.org/viewvc?rev=1147250&view=rev
Log:
TIKA-507 Split the mime type entries for AFM and PFM (font metrics) out from
the fonts themselves, and add magic detection patterns for them
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testAFM.afm
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Modified:
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml?rev=1147250&r1=1147249&r2=1147250&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
(original)
+++
tika/trunk/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
Fri Jul 15 17:04:09 2011
@@ -2354,9 +2354,34 @@
<mime-type type="application/x-font-type1">
<glob pattern="*.pfa"/>
<glob pattern="*.pfb"/>
- <glob pattern="*.pfm"/>
+ <magic priority="40">
+ <!-- Match for PFB, the binary format -->
+ <match value="\x80\x01\xFF\xFF\x00\x00%!PS-Adobe-Font" type="string"
+ mask="0xFFFF0000FFFFFFFFFFFFFFFFFFFFFFFFF" offset="0" />
+ <!-- Match for PFA, the text format" -->
+ <match value="%!PS-AdobeFont-1.0" type="string" offset="0" />
+ </magic>
+ </mime-type>
+
+ <mime-type type="application/x-font-adobe-metric">
+ <_comment>Adobe Font Metric</_comment>
<glob pattern="*.afm"/>
+ <glob pattern="*.acfm"/>
+ <glob pattern="*.amfm"/>
+ <magic priority="40">
+ <match value="StartFontMetrics" type="string" offset="0"/>
+ </magic>
</mime-type>
+
+ <mime-type type="application/x-font-printer-metric">
+ <_comment>Printer Font Metric</_comment>
+ <glob pattern="*.pfm"/>
+ <magic priority="40">
+ <match value="0x0001FFFF0000436f707972" type="string" offset="0"
+ mask="0xFFFF0000FFFFFFFFFFFFFF" />
+ </magic>
+ </mime-type>
+
<mime-type type="application/x-font-vfont"/>
<mime-type type="application/x-foxmail">
Modified:
tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java?rev=1147250&r1=1147249&r2=1147250&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
(original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
Fri Jul 15 17:04:09 2011
@@ -552,8 +552,11 @@ public class TikaDetectionTest extends T
assertEquals("application/x-font-ttf", tika.detect("x.ttc"));
assertEquals("application/x-font-type1", tika.detect("x.pfa"));
assertEquals("application/x-font-type1", tika.detect("x.pfb"));
- assertEquals("application/x-font-type1", tika.detect("x.pfm"));
- assertEquals("application/x-font-type1", tika.detect("x.afm"));
+ // TODO Get these fixed upstream too
+ //assertEquals("application/x-font-type1", tika.detect("x.pfm"));
+ //assertEquals("application/x-font-type1", tika.detect("x.afm"));
+ assertEquals("application/x-font-printer-metric",
tika.detect("x.pfm"));
+ assertEquals("application/x-font-adobe-metric", tika.detect("x.afm"));
assertEquals("application/x-futuresplash", tika.detect("x.spl"));
assertEquals("application/x-gnumeric", tika.detect("x.gnumeric"));
assertEquals("application/x-gtar", tika.detect("x.gtar"));
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java?rev=1147250&r1=1147249&r2=1147250&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
Fri Jul 15 17:04:09 2011
@@ -404,6 +404,19 @@ public class TestMimeTypes extends TestC
assertTypeByName("image/x-raw-casio", "x.bay");
assertTypeByName("image/x-raw-rawzor", "x.rwz");
}
+
+ /**
+ * Tests that we correctly detect the font types
+ */
+ public void testFontDetection() throws Exception {
+ assertTypeByName("application/x-font-adobe-metric", "x.afm");
+ assertTypeByData("application/x-font-adobe-metric", "testAFM.afm");
+
+ assertTypeByName("application/x-font-printer-metric", "x.pfm");
+
+ assertTypeByName("application/x-font-type1", "x.pfa");
+ assertTypeByName("application/x-font-type1", "x.pfb");
+ }
/**
* Tests MimeTypes.getMimeType(URL), which examines both the byte header
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testAFM.afm
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testAFM.afm?rev=1147250&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testAFM.afm
(added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testAFM.afm Fri
Jul 15 17:04:09 2011
@@ -0,0 +1,50 @@
+StartFontMetrics 4.1
+
+Comment This is a comment in a sample file
+
+Comment Creation Date: Fri Jul 15 17:50:51 2011
+
+Comment UniqueID 12345
+
+Comment VMusage 30820 39997
+
+FontName TestFontName
+
+FullName TestFullName
+
+FamilyName TestSymbol
+
+Weight Medium
+
+ItalicAngle 0
+
+IsFixedPitch false
+
+CharacterSet Special
+
+FontBBox -180 -293 1010 1090
+
+UnderlinePosition -100
+
+UnderlineThickness 50
+
+Version 001.008
+
+Notice This is a sample file
+
+EncodingScheme FontSpecific
+
+StdHW 91
+
+StdVW 86
+
+StartCharMetrics 190
+
+C 32 ; WX 250 ; N space ; B 0 0 0 0 ;
+
+C 33 ; WX 313 ; N exclam ; B 128 -17 240 672 ;
+
+EndCharMetrics
+
+EndFontMetrics
+