Author: nick
Date: Fri Jan 20 15:56:05 2012
New Revision: 1233973

URL: http://svn.apache.org/viewvc?rev=1233973&view=rev
Log:
TIKA-507 FontBox powered .afm font metrics parser, patch from Fernando Arreola

Added:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/AdobeFontMetricParserTest.java
Modified:
    
tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
    tika/trunk/tika-parsers/src/test/resources/test-documents/testAFM.afm

Added: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java?rev=1233973&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java
 (added)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java
 Fri Jan 20 15:56:05 2012
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.font;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.fontbox.afm.AFMParser;
+import org.apache.fontbox.afm.FontMetric;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parser for AFM Font Files
+ */
+public class AdobeFontMetricParser extends AbstractParser { 
+    /** Serial version UID */
+    private static final long serialVersionUID = -4820306522217196835L;
+
+    private static final MediaType AFM_TYPE =
+         MediaType.application( "x-font-adobe-metric" );
+
+    private static final Set<MediaType> SUPPORTED_TYPES = 
Collections.singleton(AFM_TYPE);
+        
+    public Set<MediaType> getSupportedTypes( ParseContext context ) { 
+       return SUPPORTED_TYPES;
+    }
+
+    public void parse(InputStream stream, ContentHandler handler,
+                      Metadata metadata, ParseContext context)
+                      throws IOException, SAXException, TikaException { 
+       FontMetric fontMetrics;
+       AFMParser  parser      = new AFMParser( stream );
+
+       // Have FontBox process the file
+       parser.parse();
+       fontMetrics = parser.getResult();
+
+       // Get the comments in the file to display in xhtml
+       List<String> comments = fontMetrics.getComments();
+
+       // Get the creation date
+       extractCreationDate( metadata, comments );
+
+       metadata.set( Metadata.CONTENT_TYPE, AFM_TYPE.toString() );
+       metadata.set( Metadata.TITLE, fontMetrics.getFullName() );
+
+       // Add metadata associated with the font type
+       addMetadataByString( metadata, "AvgCharacterWidth", Float.toString( 
fontMetrics.getAverageCharacterWidth() ) );
+       addMetadataByString( metadata, "DocVersion", Float.toString( 
fontMetrics.getAFMVersion() ) );
+       addMetadataByString( metadata, "FontName", fontMetrics.getFontName() );
+       addMetadataByString( metadata, "FontFullName", 
fontMetrics.getFullName() );
+       addMetadataByString( metadata, "FontFamilyName", 
fontMetrics.getFamilyName() );
+       addMetadataByString( metadata, "FontVersion", 
fontMetrics.getFontVersion() );
+       addMetadataByString( metadata, "FontWeight", fontMetrics.getWeight() );
+       addMetadataByString( metadata, "FontNotice", fontMetrics.getNotice() );
+       addMetadataByString( metadata, "FontUnderlineThickness", 
Float.toString( fontMetrics.getUnderlineThickness() ) );
+
+       // Output the remaining comments as text
+       XHTMLContentHandler xhtml = new XHTMLContentHandler( handler, metadata 
);
+       xhtml.startDocument();
+
+       // Display the comments
+       if (comments.size() > 0) {
+          xhtml.element( "h1", "Comments" );
+          xhtml.startElement("div", "class", "comments");
+          for (String comment : comments) {
+              xhtml.element( "p", comment );
+          }
+          xhtml.endElement("div");
+       }
+
+       xhtml.endDocument();
+    }
+
+    private void addMetadataByString( Metadata metadata, String name, String 
value ) { 
+       // Add metadata if an appropriate value is passed 
+       if (value != null) { 
+          metadata.add( name, value );
+       }
+    }
+
+    private void addMetadataByProperty( Metadata metadata, Property property, 
String value ) { 
+       // Add metadata if an appropriate value is passed 
+       if (value != null) 
+       {
+          metadata.set( property, value );
+       }
+    }
+
+
+    private void extractCreationDate( Metadata metadata, List<String> comments 
) {
+       String   date = null;
+
+       for (String value : comments) {
+          // Look for the creation date
+          if( value.matches( ".*Creation\\sDate.*" ) ) {
+             date = value.substring( value.indexOf( ":" ) + 2 );
+             comments.remove( value );
+
+             break;
+          }
+       }
+
+       // If appropriate date then store as metadata
+       if( date != null ) {
+          addMetadataByProperty( metadata, Metadata.CREATION_DATE, date );
+       }
+    }
+}

Modified: 
tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser?rev=1233973&r1=1233972&r2=1233973&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
 (original)
+++ 
tika/trunk/tika-parsers/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
 Fri Jan 20 15:56:05 2012
@@ -19,6 +19,7 @@ org.apache.tika.parser.audio.MidiParser
 org.apache.tika.parser.dwg.DWGParser
 org.apache.tika.parser.epub.EpubParser
 org.apache.tika.parser.feed.FeedParser
+org.apache.tika.parser.font.AdobeFontMetricParser
 org.apache.tika.parser.font.TrueTypeParser
 org.apache.tika.parser.html.HtmlParser
 org.apache.tika.parser.image.ImageParser

Added: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/AdobeFontMetricParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/AdobeFontMetricParserTest.java?rev=1233973&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/AdobeFontMetricParserTest.java
 (added)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/font/AdobeFontMetricParserTest.java
 Fri Jan 20 15:56:05 2012
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.font;
+
+import junit.framework.TestCase;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import org.apache.tika.io.TikaInputStream;
+
+/**
+ * Test case for parsing afm files.
+ */
+public class AdobeFontMetricParserTest extends TestCase {
+    public void testAdobeFontMetricParsing() throws Exception {
+        Parser parser = new AutoDetectParser(); // Should auto-detect!
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+        TikaInputStream stream = TikaInputStream.get(
+                AdobeFontMetricParserTest.class.getResource(
+                        "/test-documents/testAFM.afm"));
+
+        try {
+            parser.parse(stream, handler, metadata, context);
+        } finally {
+            stream.close();
+        }
+
+        assertEquals("application/x-font-adobe-metric", 
metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("TestFullName", metadata.get(Metadata.TITLE));
+        assertEquals("Fri Jul 15 17:50:51 2011", 
metadata.get(Metadata.CREATION_DATE));
+        
+        assertEquals("TestFontName", metadata.get("FontName"));
+        assertEquals("TestFullName", metadata.get("FontFullName"));
+        assertEquals("TestSymbol",   metadata.get("FontFamilyName"));
+        
+        assertEquals("Medium",  metadata.get("FontWeight"));
+        assertEquals("001.008", metadata.get("FontVersion"));
+
+        String content = handler.toString();
+
+        // Test that the comments got extracted
+        assertTrue(content.contains("Comments"));
+        assertTrue(content.contains("This is a comment in a sample file"));
+        assertTrue(content.contains("UniqueID 12345"));
+    }
+}

Modified: tika/trunk/tika-parsers/src/test/resources/test-documents/testAFM.afm
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testAFM.afm?rev=1233973&r1=1233972&r2=1233973&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testAFM.afm 
(original)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testAFM.afm Fri 
Jan 20 15:56:05 2012
@@ -38,7 +38,7 @@ StdHW 91
 
 StdVW 86
 
-StartCharMetrics 190
+StartCharMetrics 2
 
 C 32 ; WX 250 ; N space ; B 0 0 0 0 ;
 


Reply via email to