This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 3806e55ee TIKA-4360 -- improve extraction of mapi metadata (#2073)
3806e55ee is described below

commit 3806e55ee5f16ac10241f1e2e68f2237d6ec576f
Author: Tim Allison <[email protected]>
AuthorDate: Wed Dec 4 15:36:32 2024 -0500

    TIKA-4360 -- improve extraction of mapi metadata (#2073)
---
 .../main/java/org/apache/tika/metadata/MAPI.java   |  63 +++++++
 .../main/java/org/apache/tika/metadata/Office.java |  32 ----
 .../tika/parser/microsoft/OutlookExtractor.java    | 190 ++++++++++++++++-----
 .../parser/microsoft/pst/PSTMailItemParser.java    |  18 +-
 .../tika/parser/microsoft/OutlookParserTest.java   |  33 +++-
 .../parser/microsoft/libpst/TestLibPstParser.java  |   4 +-
 .../parser/microsoft/pst/OutlookPSTParserTest.java |  18 +-
 7 files changed, 252 insertions(+), 106 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/metadata/MAPI.java 
b/tika-core/src/main/java/org/apache/tika/metadata/MAPI.java
new file mode 100644
index 000000000..2cf41c7e0
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/metadata/MAPI.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.metadata;
+
+/**
+ * Office Document properties collection. These properties apply to
+ * Office / Productivity Documents of all forms, including (but not limited
+ * to) MS Office and OpenDocument formats.
+ * This is a logical collection of properties, which may be drawn from a
+ * few different external definitions.
+ *
+ * @since Apache Tika 1.2
+ */
+public interface MAPI {
+
+    String PREFIX_MAPI_META = "mapi" + 
TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
+
+    /**
+     * MAPI message class.  What type of .msg/MAPI file is it?
+     */
+    Property MESSAGE_CLASS =
+            Property.internalClosedChoise(PREFIX_MAPI_META + "message-class", 
"APPOINTMENT", "CONTACT", "NOTE", "STICKY_NOTE",
+                    "POST", "TASK", "UNKNOWN", "UNSPECIFIED");
+
+    Property SENT_BY_SERVER_TYPE = Property.internalText(PREFIX_MAPI_META + 
"sent-by-server-type");
+
+    Property FROM_REPRESENTING_NAME = Property.internalText(PREFIX_MAPI_META + 
"from-representing-name");
+
+    Property FROM_REPRESENTING_EMAIL = Property.internalText(PREFIX_MAPI_META 
+ "from-representing-email");
+
+    Property SUBMISSION_ACCEPTED_AT_TIME = 
Property.internalDate(PREFIX_MAPI_META + "msg-submission-accepted-at-time");
+
+    Property SUBMISSION_ID = Property.internalText(PREFIX_MAPI_META + 
"msg-submission-id");
+
+    Property INTERNET_MESSAGE_ID = Property.internalText(PREFIX_MAPI_META + 
"internet-message-id");
+
+    Property INTERNET_REFERENCES = Property.internalTextBag(PREFIX_MAPI_META + 
"internet-references");
+
+
+    Property CONVERSATION_TOPIC = Property.internalText(PREFIX_MAPI_META + 
"conversation-topic");
+
+    Property CONVERSATION_INDEX = Property.internalText(PREFIX_MAPI_META + 
"conversation-index");
+    Property IN_REPLY_TO_ID = Property.internalText(PREFIX_MAPI_META + 
"in-reply-to-id");
+
+    Property RECIPIENTS_STRING = Property.internalText(PREFIX_MAPI_META + 
"recipients-string");
+    Property IMPORTANCE = Property.internalInteger(PREFIX_MAPI_META + 
"importance");
+    Property PRIORTY = Property.internalInteger(PREFIX_MAPI_META + "priority");
+    Property IS_FLAGGED = Property.internalBoolean(PREFIX_MAPI_META + 
"is-flagged");
+}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Office.java 
b/tika-core/src/main/java/org/apache/tika/metadata/Office.java
index 2a9e428eb..aa4b9f002 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/Office.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Office.java
@@ -145,30 +145,6 @@ public interface Office {
     Property OBJECT_COUNT = Property.internalInteger(
             PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + 
"object-count");
 
-    /**
-     * MAPI message class.  What type of .msg/MAPI file is it?
-     */
-    Property MAPI_MESSAGE_CLASS = Property.internalClosedChoise(
-            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + 
"mapi-message-class",
-            "APPOINTMENT", "CONTACT", "NOTE", "STICKY_NOTE", "POST", "TASK", 
"UNKNOWN",
-            "UNSPECIFIED");
-
-    Property MAPI_SENT_BY_SERVER_TYPE = Property.internalText(
-            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER +
-                    "mapi-sent-by-server-type");
-
-    Property MAPI_FROM_REPRESENTING_NAME = Property.internalText(
-            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER +
-                    "mapi-from-representing-name");
-
-    Property MAPI_FROM_REPRESENTING_EMAIL = Property.internalText(
-            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER +
-                    "mapi-from-representing-email");
-
-    Property MAPI_MESSAGE_CLIENT_SUBMIT_TIME = Property.internalDate(
-            PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER +
-                    "mapi-msg-client-submit-time");
-
     /**
      * Embedded files may have a "progID" associated with them, such as
      * Word.Document.12 or AcroExch.Document.DC
@@ -176,12 +152,4 @@ public interface Office {
     Property PROG_ID = Property.internalText("msoffice:progID");
 
     Property OCX_NAME = Property.internalText("msoffice:ocxName");
-    Property MAPI_RECIPIENTS_STRING = Property.internalText(PREFIX_DOC_META +
-            TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + 
"mapi-recipients-string");
-    Property MAPI_IMPORTANCE = Property.internalInteger(PREFIX_DOC_META +
-            TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "mapi-importance");
-    Property MAPI_PRIORTY = Property.internalInteger(PREFIX_DOC_META +
-            TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "mapi-importance");
-    Property MAPI_IS_FLAGGED = Property.internalBoolean(PREFIX_DOC_META +
-            TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "mapi-is-flagged");
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index 5a2dc996e..8f381e923 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -24,8 +24,10 @@ import java.nio.charset.Charset;
 import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.UnsupportedCharsetException;
 import java.util.ArrayList;
+import java.util.Calendar;
 import java.util.Collections;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.LinkedList;
 import java.util.List;
@@ -34,6 +36,7 @@ import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.commons.codec.binary.Hex;
 import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
 import org.apache.james.mime4j.codec.DecodeMonitor;
 import org.apache.james.mime4j.codec.DecoderUtil;
@@ -44,6 +47,7 @@ import org.apache.poi.hsmf.datatypes.ByteChunk;
 import org.apache.poi.hsmf.datatypes.Chunk;
 import org.apache.poi.hsmf.datatypes.Chunks;
 import org.apache.poi.hsmf.datatypes.MAPIProperty;
+import org.apache.poi.hsmf.datatypes.MessageSubmissionChunk;
 import org.apache.poi.hsmf.datatypes.PropertyValue;
 import org.apache.poi.hsmf.datatypes.RecipientChunks;
 import org.apache.poi.hsmf.datatypes.StringChunk;
@@ -56,9 +60,9 @@ import org.xml.sax.SAXException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.MAPI;
 import org.apache.tika.metadata.Message;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
@@ -80,17 +84,48 @@ import org.apache.tika.sax.XHTMLContentHandler;
 public class OutlookExtractor extends AbstractPOIFSExtractor {
 
     private static final Metadata EMPTY_METADATA = new Metadata();
-
-    private static Pattern HEADER_KEY_PAT =
-            Pattern.compile("\\A([\\x21-\\x39\\x3B-\\x7E]+):(.*?)\\Z");
-
-    private final MAPIMessage msg;
+    private static final MAPIProperty[] LITERAL_TIME_MAPI_PROPERTIES = new 
MAPIProperty[] {
+            MAPIProperty.CLIENT_SUBMIT_TIME,
+            MAPIProperty.CREATION_TIME,
+            MAPIProperty.DEFERRED_DELIVERY_TIME,
+            MAPIProperty.DELIVER_TIME,
+            //EXPAND BEGIN and EXPAND END?
+            MAPIProperty.EXPIRY_TIME,
+            MAPIProperty.LAST_MODIFICATION_TIME,
+            MAPIProperty.LATEST_DELIVERY_TIME,
+            MAPIProperty.MESSAGE_DELIVERY_TIME,
+            MAPIProperty.MESSAGE_DOWNLOAD_TIME,
+            MAPIProperty.ORIGINAL_DELIVERY_TIME,
+            MAPIProperty.ORIGINAL_SUBMIT_TIME,
+            MAPIProperty.PROVIDER_SUBMIT_TIME,
+            MAPIProperty.RECEIPT_TIME,
+            MAPIProperty.REPLY_TIME,
+            MAPIProperty.REPORT_TIME
+
+    };
+
+    private static final Map<MAPIProperty, Property> LITERAL_TIME_PROPERTIES = 
new HashMap<>();
+
+    static {
+        for (MAPIProperty property : LITERAL_TIME_MAPI_PROPERTIES) {
+            String name = property.mapiProperty.toLowerCase(Locale.ROOT);
+            name = name.substring(3);
+            name = name.replace('_', '-');
+            name = MAPI.PREFIX_MAPI_META + name;
+            Property tikaProp = Property.internalDate(name);
+            LITERAL_TIME_PROPERTIES.put(property, tikaProp);
+        }
+    }
     //this according to the spec; in practice, it is probably more likely
     //that a "split field" fails to start with a space character than
     //that a real header contains anything but [-_A-Za-z0-9].
     //e.g.
     //header: this header goes onto the next line
     //<mailto:[email protected]...
+    private static Pattern HEADER_KEY_PAT =
+            Pattern.compile("\\A([\\x21-\\x39\\x3B-\\x7E]+):(.*?)\\Z");
+
+    private final MAPIMessage msg;
     private final ParseContext parseContext;
     private final boolean extractAllAlternatives;
     HtmlEncodingDetector detector = new HtmlEncodingDetector();
@@ -158,7 +193,7 @@ public class OutlookExtractor extends 
AbstractPOIFSExtractor {
             msg.setReturnNullOnMissingChunk(true);
 
             try {
-                parentMetadata.set(Office.MAPI_MESSAGE_CLASS, 
msg.getMessageClassEnum().name());
+                parentMetadata.set(MAPI.MESSAGE_CLASS, 
msg.getMessageClassEnum().name());
             } catch (ChunkNotFoundException e) {
                 //swallow
             }
@@ -170,15 +205,10 @@ public class OutlookExtractor extends 
AbstractPOIFSExtractor {
             }
 
             // Start with the metadata
-            String subject = msg.getSubject();
             Map<String, String[]> headers = normalizeHeaders(msg.getHeaders());
-            String from = msg.getDisplayFrom();
 
             handleFromTo(headers, parentMetadata);
-
-            parentMetadata.set(TikaCoreProperties.TITLE, subject);
-            parentMetadata.set(TikaCoreProperties.SUBJECT, 
msg.getConversationTopic());
-            parentMetadata.set(TikaCoreProperties.DESCRIPTION, 
msg.getConversationTopic());
+            handleMessageInfo(msg, headers, parentMetadata);
 
             try {
                 for (String recipientAddress : 
msg.getRecipientEmailAddressList()) {
@@ -197,35 +227,7 @@ public class OutlookExtractor extends 
AbstractPOIFSExtractor {
                 }
             }
 
-            // Date - try two ways to find it
-            // First try via the proper chunk
-            if (msg.getMessageDate() != null) {
-                parentMetadata.set(TikaCoreProperties.CREATED, 
msg.getMessageDate().getTime());
-                parentMetadata.set(TikaCoreProperties.MODIFIED, 
msg.getMessageDate().getTime());
-            } else {
-                if (headers != null && headers.size() > 0) {
-                    for (Map.Entry<String, String[]> header : 
headers.entrySet()) {
-                        String headerKey = header.getKey();
-                        if 
(headerKey.toLowerCase(Locale.ROOT).startsWith("date:")) {
-                            String date = 
headerKey.substring(headerKey.indexOf(':') + 1).trim();
-
-                            // See if we can parse it as a normal mail date
-                            try {
-                                Date d = MailDateParser.parseDateLenient(date);
-                                parentMetadata.set(TikaCoreProperties.CREATED, 
d);
-                                
parentMetadata.set(TikaCoreProperties.MODIFIED, d);
-                            } catch (SecurityException e ) {
-                                throw e;
-                            } catch (Exception e) {
-                                // Store it as-is, and hope for the best...
-                                parentMetadata.set(TikaCoreProperties.CREATED, 
date);
-                                
parentMetadata.set(TikaCoreProperties.MODIFIED, date);
-                            }
-                            break;
-                        }
-                    }
-                }
-            }
+            handleGeneralDates(msg, headers, parentMetadata);
 
             // Get the message body. Preference order is: html, rtf, text
             Chunk htmlChunk = null;
@@ -277,6 +279,104 @@ public class OutlookExtractor extends 
AbstractPOIFSExtractor {
         }
     }
 
+    private void handleMessageInfo(MAPIMessage msg, Map<String, String[]> 
headers, Metadata metadata)
+            throws ChunkNotFoundException {
+        //this is the literal subject including "re: "
+        metadata.set(TikaCoreProperties.TITLE, msg.getSubject());
+        //this is the original topic for the thread without the "re: "
+        String topic = msg.getConversationTopic();
+        metadata.set(TikaCoreProperties.SUBJECT, topic);
+        metadata.set(TikaCoreProperties.DESCRIPTION, topic);
+        metadata.set(MAPI.CONVERSATION_TOPIC, topic);
+        Chunks mainChunks = msg.getMainChunks();
+        if (mainChunks != null) {
+            if (mainChunks.getMessageId() != null) {
+                metadata.set(MAPI.INTERNET_MESSAGE_ID, mainChunks
+                        .getMessageId()
+                        .getValue());
+            }
+
+            List<Chunk> conversationIndex = 
mainChunks.getAll().get(MAPIProperty.CONVERSATION_INDEX);
+            if (conversationIndex != null && ! conversationIndex.isEmpty()) {
+                Chunk chunk = conversationIndex.get(0);
+                if (chunk instanceof  ByteChunk) {
+                    byte[] bytes = ((ByteChunk)chunk).getValue();
+                    String hex = Hex.encodeHexString(bytes);
+                    metadata.set(MAPI.CONVERSATION_INDEX, hex);
+                }
+            }
+
+            List<Chunk> internetReferences = 
mainChunks.getAll().get(MAPIProperty.INTERNET_REFERENCES);
+            if (internetReferences != null) {
+                for (Chunk ref : internetReferences) {
+                    if (ref instanceof StringChunk) {
+                        metadata.add(MAPI.INTERNET_REFERENCES, ((StringChunk) 
ref).getValue());
+                    }
+                }
+            }
+            List<Chunk> inReplyToIds = 
mainChunks.getAll().get(MAPIProperty.IN_REPLY_TO_ID);
+            if (inReplyToIds != null && ! inReplyToIds.isEmpty()) {
+                metadata.add(MAPI.IN_REPLY_TO_ID, 
inReplyToIds.get(0).toString());
+            }
+
+            for (Map.Entry<MAPIProperty, Property> e : 
LITERAL_TIME_PROPERTIES.entrySet()) {
+                List<PropertyValue> timeProp = 
mainChunks.getProperties().get(e.getKey());
+                if (timeProp != null && ! timeProp.isEmpty()) {
+                    Calendar cal = 
((PropertyValue.TimePropertyValue)timeProp.get(0)).getValue();
+                    metadata.set(e.getValue(), cal);
+                }
+            }
+
+            MessageSubmissionChunk messageSubmissionChunk = 
mainChunks.getSubmissionChunk();
+            if (messageSubmissionChunk != null) {
+                String submissionId = messageSubmissionChunk.getSubmissionId();
+                metadata.set(MAPI.SUBMISSION_ID, submissionId);
+                metadata.set(MAPI.SUBMISSION_ACCEPTED_AT_TIME, 
messageSubmissionChunk.getAcceptedAtTime());
+            }
+
+        }
+    }
+
+
+    private void handleGeneralDates(MAPIMessage msg, Map<String, String[]> 
headers, Metadata metadata) throws ChunkNotFoundException {
+        // Date - try two ways to find it
+        // First try via the proper chunk
+        if (msg.getMessageDate() != null) {
+            metadata.set(TikaCoreProperties.CREATED, 
msg.getMessageDate().getTime());
+            metadata.set(TikaCoreProperties.MODIFIED, 
msg.getMessageDate().getTime());
+        } else {
+            if (headers != null && headers.size() > 0) {
+                for (Map.Entry<String, String[]> header : headers.entrySet()) {
+                    String headerKey = header.getKey();
+                    if 
(headerKey.toLowerCase(Locale.ROOT).startsWith("date:")) {
+                        String date = 
headerKey.substring(headerKey.indexOf(':') + 1).trim();
+
+                        // See if we can parse it as a normal mail date
+                        try {
+                            Date d = MailDateParser.parseDateLenient(date);
+                            metadata.set(TikaCoreProperties.CREATED, d);
+                            metadata.set(TikaCoreProperties.MODIFIED, d);
+                        } catch (SecurityException e ) {
+                            throw e;
+                        } catch (Exception e) {
+                            // Store it as-is, and hope for the best...
+                            metadata.set(TikaCoreProperties.CREATED, date);
+                            metadata.set(TikaCoreProperties.MODIFIED, date);
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+        //try to overwrite the modified property if the actual 
LAST_MODIFICATION_TIME property exists.
+        List<PropertyValue> timeProp = 
msg.getMainChunks().getProperties().get(MAPIProperty.LAST_MODIFICATION_TIME);
+        if (timeProp != null && ! timeProp.isEmpty()) {
+            Calendar cal = 
((PropertyValue.TimePropertyValue)timeProp.get(0)).getValue();
+            metadata.set(TikaCoreProperties.MODIFIED, cal);
+        }
+
+    }
+
     private void handleBodyChunks(Chunk htmlChunk, Chunk rtfChunk, Chunk 
textChunk,
                                   XHTMLContentHandler xhtml)
             throws SAXException, IOException, TikaException {
@@ -395,7 +495,7 @@ public class OutlookExtractor extends 
AbstractPOIFSExtractor {
         Chunks chunks = msg.getMainChunks();
         StringChunk sentByServerType = chunks.getSentByServerType();
         if (sentByServerType != null) {
-            metadata.set(Office.MAPI_SENT_BY_SERVER_TYPE, 
sentByServerType.getValue());
+            metadata.set(MAPI.SENT_BY_SERVER_TYPE, 
sentByServerType.getValue());
         }
 
         Map<MAPIProperty, List<Chunk>> mainChunks = 
msg.getMainChunks().getAll();
@@ -411,12 +511,12 @@ public class OutlookExtractor extends 
AbstractPOIFSExtractor {
         setFirstChunk(mainChunks.get(MAPIProperty.SENDER_NAME), 
Message.MESSAGE_FROM_NAME,
                 metadata);
         setFirstChunk(mainChunks.get(MAPIProperty.SENT_REPRESENTING_NAME),
-                Office.MAPI_FROM_REPRESENTING_NAME, metadata);
+                MAPI.FROM_REPRESENTING_NAME, metadata);
 
         setFirstChunk(mainChunks.get(MAPIProperty.SENDER_EMAIL_ADDRESS), 
Message.MESSAGE_FROM_EMAIL,
                 metadata);
         
setFirstChunk(mainChunks.get(MAPIProperty.SENT_REPRESENTING_EMAIL_ADDRESS),
-                Office.MAPI_FROM_REPRESENTING_EMAIL, metadata);
+                MAPI.FROM_REPRESENTING_EMAIL, metadata);
 
         for (Recipient recipient : buildRecipients()) {
             switch (recipient.recipientType) {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/pst/PSTMailItemParser.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/pst/PSTMailItemParser.java
index a87c6cb84..f1c9f9e66 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/pst/PSTMailItemParser.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/pst/PSTMailItemParser.java
@@ -36,9 +36,9 @@ import org.apache.tika.exception.TikaException;
 import org.apache.tika.extractor.EmbeddedDocumentExtractor;
 import org.apache.tika.extractor.EmbeddedDocumentUtil;
 import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.MAPI;
 import org.apache.tika.metadata.Message;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.PST;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
@@ -139,31 +139,31 @@ public class PSTMailItemParser implements Parser {
         metadata.set(Metadata.MESSAGE_FROM, pstMail.getSenderName());
         metadata.set(TikaCoreProperties.CREATOR, pstMail.getSenderName());
         metadata.set(TikaCoreProperties.CREATED, pstMail.getCreationTime());
-        metadata.set(Office.MAPI_MESSAGE_CLIENT_SUBMIT_TIME, 
pstMail.getClientSubmitTime());
+        metadata.set(MAPI.SUBMISSION_ACCEPTED_AT_TIME, 
pstMail.getClientSubmitTime());
         metadata.set(TikaCoreProperties.MODIFIED, 
pstMail.getLastModificationTime());
         metadata.set(TikaCoreProperties.COMMENTS, pstMail.getComment());
         metadata.set(PST.DESCRIPTOR_NODE_ID, 
valueOf(pstMail.getDescriptorNodeId()));
         metadata.set(Message.MESSAGE_FROM_EMAIL, 
pstMail.getSenderEmailAddress());
         if (! StringUtils.isBlank(pstMail.getRecipientsString()) &&
                 ! pstMail.getRecipientsString().equals("No recipients 
table!")) {
-            metadata.set(Office.MAPI_RECIPIENTS_STRING, 
pstMail.getRecipientsString());
+            metadata.set(MAPI.RECIPIENTS_STRING, 
pstMail.getRecipientsString());
         }
         metadata.set(Message.MESSAGE_TO_DISPLAY_NAME, pstMail.getDisplayTo());
         metadata.set(Message.MESSAGE_CC_DISPLAY_NAME, pstMail.getDisplayCC());
         metadata.set(Message.MESSAGE_BCC_DISPLAY_NAME, 
pstMail.getDisplayBCC());
-        metadata.set(Office.MAPI_IMPORTANCE, pstMail.getImportance());
-        metadata.set(Office.MAPI_PRIORTY, pstMail.getPriority());
-        metadata.set(Office.MAPI_IS_FLAGGED, pstMail.isFlagged());
-        metadata.set(Office.MAPI_MESSAGE_CLASS,
+        metadata.set(MAPI.IMPORTANCE, pstMail.getImportance());
+        metadata.set(MAPI.PRIORTY, pstMail.getPriority());
+        metadata.set(MAPI.IS_FLAGGED, pstMail.isFlagged());
+        metadata.set(MAPI.MESSAGE_CLASS,
                 OutlookExtractor.getMessageClass(pstMail.getMessageClass()));
 
         metadata.set(Message.MESSAGE_FROM_EMAIL, 
pstMail.getSenderEmailAddress());
 
-        metadata.set(Office.MAPI_FROM_REPRESENTING_EMAIL,
+        metadata.set(MAPI.FROM_REPRESENTING_EMAIL,
                 pstMail.getSentRepresentingEmailAddress());
 
         metadata.set(Message.MESSAGE_FROM_NAME, pstMail.getSenderName());
-        metadata.set(Office.MAPI_FROM_REPRESENTING_NAME, 
pstMail.getSentRepresentingName());
+        metadata.set(MAPI.FROM_REPRESENTING_NAME, 
pstMail.getSentRepresentingName());
 
         //add recipient details
         try {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
index f10f4aa7c..a01bd5a8d 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
@@ -36,9 +36,9 @@ import org.xml.sax.ContentHandler;
 
 import org.apache.tika.TikaTest;
 import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.MAPI;
 import org.apache.tika.metadata.Message;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
@@ -119,12 +119,12 @@ public class OutlookParserTest extends TikaTest {
                         "    by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 29 
Jan 2009 11:17:08 " +
                         "-0800",
                 
Arrays.asList(metadata.getValues("Message:Raw-Header:Received")));
-        assertEquals("EX", metadata.get(Office.MAPI_SENT_BY_SERVER_TYPE));
-        assertEquals("NOTE", metadata.get(Office.MAPI_MESSAGE_CLASS));
+        assertEquals("EX", metadata.get(MAPI.SENT_BY_SERVER_TYPE));
+        assertEquals("NOTE", metadata.get(MAPI.MESSAGE_CLASS));
         assertEquals("Jukka Zitting", metadata.get(Message.MESSAGE_FROM_NAME));
         assertEquals("[email protected]", 
metadata.get(Message.MESSAGE_FROM_EMAIL));
-        assertEquals("Jukka Zitting", 
metadata.get(Office.MAPI_FROM_REPRESENTING_NAME));
-        assertEquals("[email protected]", 
metadata.get(Office.MAPI_FROM_REPRESENTING_EMAIL));
+        assertEquals("Jukka Zitting", 
metadata.get(MAPI.FROM_REPRESENTING_NAME));
+        assertEquals("[email protected]", 
metadata.get(MAPI.FROM_REPRESENTING_EMAIL));
 
         //to-name is empty, make sure that we get an empty string.
         assertEquals("[email protected]", 
metadata.get(Message.MESSAGE_TO_EMAIL));
@@ -199,9 +199,19 @@ public class OutlookParserTest extends TikaTest {
 
         assertEquals("[email protected]", 
metadata.get(Message.MESSAGE_TO_EMAIL));
 
-        assertEquals("Tests Chang@FT (張毓倫)", 
metadata.get(Office.MAPI_FROM_REPRESENTING_NAME));
+        assertEquals("Tests Chang@FT (張毓倫)", 
metadata.get(MAPI.FROM_REPRESENTING_NAME));
         assertEquals("/O=FT GROUP/OU=FT/CN=RECIPIENTS/CN=LYDIACHANG",
-                metadata.get(Office.MAPI_FROM_REPRESENTING_EMAIL));
+                metadata.get(MAPI.FROM_REPRESENTING_EMAIL));
+
+        assertEquals("c=TW;a= ;p=FT GROUP;l=FTM02-110329085248Z-89735\u0000",
+                metadata.get(MAPI.SUBMISSION_ID));
+        
assertEquals("<[email protected]>",
+                metadata.get(MAPI.INTERNET_MESSAGE_ID));
+        
assertTrue(metadata.get(MAPI.SUBMISSION_ACCEPTED_AT_TIME).startsWith("2011-03-29"));
+        
assertTrue(metadata.get("mapi:client-submit-time").startsWith("2011-03-29"));
+        
assertTrue(metadata.get("mapi:message-delivery-time").startsWith("2011-03-29"));
+        
assertTrue(metadata.get("mapi:last-modification-time").startsWith("2011-03-29"));
+        
assertTrue(metadata.get("mapi:creation-time").startsWith("2011-03-29"));
     }
 
     @Test
@@ -224,6 +234,11 @@ public class OutlookParserTest extends TikaTest {
         String content = sw.toString();
         assertEquals(2, content.split("<body>").length);
         assertEquals(2, content.split("<\\/body>").length);
+        assertEquals("01ccb5408a75b6cf3ad7837949b698499034202313ef000002a160", 
metadata.get(MAPI.CONVERSATION_INDEX));
+        
assertEquals("<c8508767c15dbf40a21693142739ea8d564d18f...@exvmbx018-1.exch018.msoutlookonline.net>",
+                metadata.get(MAPI.INTERNET_REFERENCES));
+        
assertEquals("<c8508767c15dbf40a21693142739ea8d564d18f...@exvmbx018-1.exch018.msoutlookonline.net>",
+                metadata.get(MAPI.IN_REPLY_TO_ID));
     }
 
     @Test
@@ -289,8 +304,8 @@ public class OutlookParserTest extends TikaTest {
 
     private void testMsgClass(String expected, Metadata metadata) {
         assertTrue(expected.equalsIgnoreCase(
-                                
metadata.get(Office.MAPI_MESSAGE_CLASS).replaceAll("_", "")),
-                expected + ", but got: " + 
metadata.get(Office.MAPI_MESSAGE_CLASS));
+                                
metadata.get(MAPI.MESSAGE_CLASS).replaceAll("_", "")),
+                expected + ", but got: " + metadata.get(MAPI.MESSAGE_CLASS));
     }
 
     @Test
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/libpst/TestLibPstParser.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/libpst/TestLibPstParser.java
index 8e6863596..73e623393 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/libpst/TestLibPstParser.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/libpst/TestLibPstParser.java
@@ -26,9 +26,9 @@ import org.junit.jupiter.api.Test;
 
 import org.apache.tika.TikaTest;
 import org.apache.tika.config.TikaConfig;
+import org.apache.tika.metadata.MAPI;
 import org.apache.tika.metadata.Message;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.PST;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.parser.AutoDetectParser;
@@ -78,7 +78,7 @@ public class TestLibPstParser extends TikaTest {
 
         assertEquals("NOTE", metadataList
                 .get(7)
-                .get(Office.MAPI_MESSAGE_CLASS));
+                .get(MAPI.MESSAGE_CLASS));
     }
 
     @Test
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/pst/OutlookPSTParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/pst/OutlookPSTParserTest.java
index 6e9a6d6d1..e73c6c9fe 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/pst/OutlookPSTParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/pst/OutlookPSTParserTest.java
@@ -26,9 +26,9 @@ import java.util.List;
 import org.junit.jupiter.api.Test;
 
 import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.MAPI;
 import org.apache.tika.metadata.Message;
 import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.Office;
 import org.apache.tika.metadata.PST;
 import org.apache.tika.metadata.TikaCoreProperties;
 import org.apache.tika.mime.MediaType;
@@ -74,9 +74,9 @@ public class OutlookPSTParserTest extends TikaTest {
         assertEquals("", m1.get(Message.MESSAGE_CC_DISPLAY_NAME));
         assertEquals("", m1.get(Message.MESSAGE_BCC_DISPLAY_NAME));
         assertEquals("[email protected]", m1.get(Message.MESSAGE_FROM_EMAIL));
-        assertEquals("Jörn Kottmann", 
m1.get(Office.MAPI_FROM_REPRESENTING_NAME));
-        assertEquals("[email protected]", 
m1.get(Office.MAPI_FROM_REPRESENTING_EMAIL));
-        assertEquals("NOTE", m1.get(Office.MAPI_MESSAGE_CLASS));
+        assertEquals("Jörn Kottmann", m1.get(MAPI.FROM_REPRESENTING_NAME));
+        assertEquals("[email protected]", 
m1.get(MAPI.FROM_REPRESENTING_EMAIL));
+        assertEquals("NOTE", m1.get(MAPI.MESSAGE_CLASS));
         assertEquals("/Début du fichier de données Outlook", 
m1.get(PST.PST_FOLDER_PATH));
         //test that subject is making it into the xhtml
         assertContains("<meta name=\"dc:subject\" content=\"Re: Feature 
Generators\"", m1.get(TikaCoreProperties.TIKA_CONTENT));
@@ -84,11 +84,11 @@ public class OutlookPSTParserTest extends TikaTest {
         Metadata m6 = metadataList.get(6);
         assertEquals("Couchbase", m6.get(Message.MESSAGE_FROM_NAME));
         assertEquals("[email protected]", 
m6.get(Message.MESSAGE_FROM_EMAIL));
-        assertEquals("Couchbase", m6.get(Office.MAPI_FROM_REPRESENTING_NAME));
-        assertEquals("[email protected]", 
m6.get(Office.MAPI_FROM_REPRESENTING_EMAIL));
-        assertEquals("NOTE", m1.get(Office.MAPI_MESSAGE_CLASS));
-        assertNull(m1.get(Office.MAPI_RECIPIENTS_STRING));
-        assertContains("2014-02-26", 
m1.get(Office.MAPI_MESSAGE_CLIENT_SUBMIT_TIME));
+        assertEquals("Couchbase", m6.get(MAPI.FROM_REPRESENTING_NAME));
+        assertEquals("[email protected]", 
m6.get(MAPI.FROM_REPRESENTING_EMAIL));
+        assertEquals("NOTE", m1.get(MAPI.MESSAGE_CLASS));
+        assertNull(m1.get(MAPI.RECIPIENTS_STRING));
+        assertContains("2014-02-26", m1.get(MAPI.SUBMISSION_ACCEPTED_AT_TIME));
 
         //test full EX email
         assertEquals(

Reply via email to