Author: tallison
Date: Thu Jul 16 00:55:40 2015
New Revision: 1691297

URL: http://svn.apache.org/r1691297
Log:
TIKA-1684

Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java?rev=1691297&r1=1691296&r2=1691297&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
 Thu Jul 16 00:55:40 2015
@@ -24,9 +24,11 @@ import java.math.BigDecimal;
 import java.text.DateFormat;
 import java.text.NumberFormat;
 import java.util.Date;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
+import java.util.Set;
 
 import com.healthmarketscience.jackcess.Column;
 import com.healthmarketscience.jackcess.DataType;
@@ -41,6 +43,8 @@ import org.apache.tika.exception.TikaExc
 import org.apache.tika.io.IOUtils;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.html.HtmlParser;
 import org.apache.tika.sax.BodyContentHandler;
@@ -53,6 +57,10 @@ import org.xml.sax.SAXException;
  */
 class JackcessExtractor extends AbstractPOIFSExtractor {
 
+    final static String TITLE_PROP_KEY = "Title";
+    final static String AUTHOR_PROP_KEY = "Author";
+    final static String COMPANY_PROP_KEY = "Company";
+
     final static String TEXT_FORMAT_KEY = "TextFormat";
     final static String CURRENCY_FORMAT_KEY = "Format";
     final static byte TEXT_FORMAT = 0;
@@ -85,15 +93,38 @@ class JackcessExtractor extends Abstract
         }
 
         PropertyMap up = db.getUserDefinedProperties();
-        for (PropertyMap.Property p : up
-                ) {
+        for (PropertyMap.Property p : up) {
             metadata.add(JackcessParser.USER_DEFINED_PROPERTY_PREFIX+ 
p.getName(),
                     toString(p.getValue(), p.getType()));
         }
 
-        for (PropertyMap.Property p : db.getSummaryProperties()) {
-            metadata.add(JackcessParser.SUMMARY_PROPERTY_PREFIX+ p.getName(),
-                    toString(p.getValue(), p.getType()));
+        Set<String> found = new HashSet<>();
+        PropertyMap summaryProperties = db.getSummaryProperties();
+        if (summaryProperties != null) {
+            //try to get core properties
+            PropertyMap.Property title = summaryProperties.get(TITLE_PROP_KEY);
+            if (title != null) {
+                metadata.set(TikaCoreProperties.TITLE, 
toString(title.getValue(), title.getType()));
+                found.add(title.getName());
+            }
+            PropertyMap.Property author = 
summaryProperties.get(AUTHOR_PROP_KEY);
+            if (author != null) {
+                metadata.set(TikaCoreProperties.CREATOR, 
toString(author.getValue(), author.getType()));
+                found.add(author.getName());
+            }
+            PropertyMap.Property company = 
summaryProperties.get(COMPANY_PROP_KEY);
+            if (company != null) {
+                metadata.set(OfficeOpenXMLExtended.COMPANY, 
toString(company.getValue(), company.getType()));
+                found.add(company.getName());
+            }
+
+            for (PropertyMap.Property p : db.getSummaryProperties()) {
+                if (! found.contains(p.getName())) {
+                    metadata.add(JackcessParser.SUMMARY_PROPERTY_PREFIX + 
p.getName(),
+                            toString(p.getValue(), p.getType()));
+                }
+            }
+
         }
 
         Iterator<Table> it = db.newIterable().

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java?rev=1691297&r1=1691296&r2=1691297&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
 Thu Jul 16 00:55:40 2015
@@ -47,7 +47,7 @@ import org.xml.sax.SAXException;
  */
 public class JackcessParser extends AbstractParser {
 
-    public static final String SUMMARY_PROPERTY_PREFIX = "MDB_SUMMARY_PROP";
+    public static final String SUMMARY_PROPERTY_PREFIX = "MDB_SUMMARY_PROP" + 
Metadata.NAMESPACE_PREFIX_DELIMITER;
     public static String MDB_PROPERTY_PREFIX = "MDB_PROP" + 
Metadata.NAMESPACE_PREFIX_DELIMITER;
     public static String USER_DEFINED_PROPERTY_PREFIX = "MDB_USER_PROP" + 
Metadata.NAMESPACE_PREFIX_DELIMITER;
     public static Property MDB_PW = Property.externalText("Password");
@@ -102,4 +102,4 @@ public class JackcessParser extends Abst
             throw new AssertionError("DO NOT ALLOW RESOLVING OF LINKS!!!");
         }
     }
-}
+}
\ No newline at end of file

Modified: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java?rev=1691297&r1=1691296&r2=1691297&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
 (original)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/JackcessParserTest.java
 Thu Jul 16 00:55:40 2015
@@ -25,6 +25,8 @@ import java.util.List;
 import org.apache.tika.TikaTest;
 import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
@@ -88,4 +90,13 @@ public class JackcessParserTest extends
         XMLResult r = getXML("testAccess_V1997.mdb");
         assertContains("hijklmnop", r.xml);
     }
+
+    @Test
+    public void testMetadata() throws Exception {
+        //basic tests for normalized metadata
+        XMLResult r = getXML("testAccess_V1997.mdb");
+        assertEquals("tmccune", r.metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Health Market Science", 
r.metadata.get(OfficeOpenXMLExtended.COMPANY));
+        assertEquals("test", r.metadata.get(TikaCoreProperties.TITLE));
+    }
 }


Reply via email to