Repository: tika Updated Branches: refs/heads/2.x 6c4c9820f -> 396ca7f24
TIKA-1977: change "add" to "set" for title in RFC822Parser Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/396ca7f2 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/396ca7f2 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/396ca7f2 Branch: refs/heads/2.x Commit: 396ca7f249a0730aab5d8131e3d51bb3b7d7efe4 Parents: 6c4c982 Author: tballison <[email protected]> Authored: Wed May 18 08:52:51 2016 -0400 Committer: tballison <[email protected]> Committed: Wed May 18 08:52:51 2016 -0400 ---------------------------------------------------------------------- .../org/apache/tika/parser/mail/MailContentHandler.java | 3 +-- .../org/apache/tika/parser/mail/RFC822ParserTest.java | 12 ++++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/396ca7f2/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java b/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java index 8b00004..7cf8886 100644 --- a/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java +++ b/tika-parser-modules/tika-parser-web-module/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java @@ -82,7 +82,6 @@ class MailContentHandler implements ContentHandler { return sdf; } - private boolean strictParsing = false; private XHTMLContentHandler handler; @@ -218,7 +217,7 @@ class MailContentHandler implements ContentHandler { metadata.add(TikaCoreProperties.CREATOR, from); } } else if (fieldname.equalsIgnoreCase("Subject")) { - metadata.add(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, + metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, ((UnstructuredField) parsedField).getValue()); } else if (fieldname.equalsIgnoreCase("To")) { processAddressList(parsedField, "To:", Metadata.MESSAGE_TO); http://git-wip-us.apache.org/repos/asf/tika/blob/396ca7f2/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java b/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java index e598f59..dc59436 100644 --- a/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java +++ b/tika-parser-modules/tika-parser-web-module/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java @@ -395,4 +395,16 @@ public class RFC822ParserTest extends TikaTest { r = getXML("testRFC822_eml"); assertEquals("2016-05-16T08:30:32Z", r.metadata.get(TikaCoreProperties.CREATED)); } + + @Test + public void testMultipleSubjects() throws Exception { + //adapted from govdocs1 303710.txt + String s = "From: Shawn Jones [[email protected]]\n" + + "Subject: 2006N-3502\n" + + "Subject: I Urge You to Require Notice of Mercury"; + Parser p = new RFC822Parser(); + Metadata m = new Metadata(); + p.parse(TikaInputStream.get(s.getBytes()), new DefaultHandler(), m, new ParseContext()); + assertEquals("I Urge You to Require Notice of Mercury", m.get(TikaCoreProperties.TITLE)); + } }
