Author: jukka
Date: Thu Jun 25 14:00:09 2009
New Revision: 788360

URL: http://svn.apache.org/viewvc?rev=788360&view=rev
Log:
TIKA-247: parse language and category from MS Office properties

Patch contributed by Daan de Wit.

Modified:
    
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java

Modified: 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
URL: 
http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=788360&r1=788359&r2=788360&view=diff
==============================================================================
--- 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
 (original)
+++ 
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
 Thu Jun 25 14:00:09 2009
@@ -22,6 +22,7 @@
 import java.util.Iterator;
 
 import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hpsf.CustomProperties;
 import org.apache.poi.hpsf.DocumentSummaryInformation;
 import org.apache.poi.hpsf.MarkUnsupportedException;
 import org.apache.poi.hpsf.NoPropertySetStreamException;
@@ -147,6 +148,19 @@
     private void parse(DocumentSummaryInformation summary, Metadata metadata) {
         set(metadata, Metadata.COMPANY, summary.getCompany());
         set(metadata, Metadata.MANAGER, summary.getManager());
+        set(metadata, Metadata.LANGUAGE, getLanguage(summary));
+        set(metadata, Metadata.CATEGORY, summary.getCategory());
+    }
+
+    private String getLanguage(DocumentSummaryInformation summary) {
+        CustomProperties customProperties = summary.getCustomProperties();
+        if (customProperties != null) {
+            Object value = customProperties.get("Language");
+            if (value instanceof String) {
+                return (String) value;
+            }
+        }
+        return null;
     }
 
     private void setType(Metadata metadata, String type) {


Reply via email to