Repository: tika
Updated Branches:
refs/heads/master bfd1d9139 -> 8e819c3ca
TIKA-2122 : add all headers from MSG and RFC822 files
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/8e819c3c
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/8e819c3c
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/8e819c3c
Branch: refs/heads/master
Commit: 8e819c3caf3ff3b0492f600b4193d1b3ee74f51b
Parents: bfd1d91
Author: tballison
Authored: Mon Oct 17 14:10:46 2016 -0400
Committer: tballison
Committed: Mon Oct 17 14:10:46 2016 -0400
--
.../java/org/apache/tika/metadata/Message.java | 6 ++
.../src/test/java/org/apache/tika/TikaTest.java | 8 ++
.../tika/parser/mail/MailContentHandler.java| 5 ++
.../tika/parser/microsoft/OutlookExtractor.java | 87 +++-
.../tika/parser/mail/RFC822ParserTest.java | 1 +
.../parser/microsoft/OutlookParserTest.java | 15
6 files changed, 121 insertions(+), 1 deletion(-)
--
http://git-wip-us.apache.org/repos/asf/tika/blob/8e819c3c/tika-core/src/main/java/org/apache/tika/metadata/Message.java
--
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Message.java
b/tika-core/src/main/java/org/apache/tika/metadata/Message.java
index ffb9413..dad3952 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/Message.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Message.java
@@ -16,10 +16,16 @@
*/
package org.apache.tika.metadata;
+import org.apache.tika.Tika;
+
/**
* A collection of Message related property names.
*/
public interface Message {
+String MESSAGE_PREFIX = "Message"+ Metadata.NAMESPACE_PREFIX_DELIMITER;
+
+String MESSAGE_RAW_HEADER_PREFIX =
MESSAGE_PREFIX+"Raw-Header"+Metadata.NAMESPACE_PREFIX_DELIMITER;
+
String MESSAGE_RECIPIENT_ADDRESS = "Message-Recipient-Address";
String MESSAGE_FROM = "Message-From";
http://git-wip-us.apache.org/repos/asf/tika/blob/8e819c3c/tika-core/src/test/java/org/apache/tika/TikaTest.java
--
diff --git a/tika-core/src/test/java/org/apache/tika/TikaTest.java
b/tika-core/src/test/java/org/apache/tika/TikaTest.java
index 690db33..0bc5a83 100644
--- a/tika-core/src/test/java/org/apache/tika/TikaTest.java
+++ b/tika-core/src/test/java/org/apache/tika/TikaTest.java
@@ -296,4 +296,12 @@ public abstract class TikaTest {
i++;
}
}
+
+public static void debug(Metadata metadata) {
+for (String n : metadata.names()) {
+for (String v : metadata.getValues(n)) {
+System.out.println(n + " : "+v);
+}
+}
+}
}
http://git-wip-us.apache.org/repos/asf/tika/blob/8e819c3c/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
--
diff --git
a/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
b/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
index 6a9bc1b..60170e6 100644
---
a/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
+++
b/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
@@ -51,6 +51,7 @@ import org.apache.tika.config.TikaConfig;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Message;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.AutoDetectParser;
@@ -238,6 +239,7 @@ class MailContentHandler implements ContentHandler {
try {
String fieldname = field.getName();
+
ParsedField parsedField = LenientFieldParser.getParser().parse(
field, DecodeMonitor.SILENT);
if (fieldname.equalsIgnoreCase("From")) {
@@ -276,6 +278,9 @@ class MailContentHandler implements ContentHandler {
date = tryOtherDateFormats(field.getBody());
}
metadata.set(TikaCoreProperties.CREATED, date);
+} else {
+
metadata.add(Metadata.MESSAGE_RAW_HEADER_PREFIX+parsedField.getName(),
+field.getBody());
}
} catch (RuntimeException me) {
if (strictParsing) {
http://git-wip-us.apache.org/repos/asf/tika/blob/8e819c3c/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
--
diff --git
a/tika-parsers/src/main/java/org/apache/t