Repository: tika Updated Branches: refs/heads/master 740853183 -> 89881ce20
TIKA-1977 set title vs add title -- also clean up of javadoc href and whitespace from TIKA-1970 Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/89881ce2 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/89881ce2 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/89881ce2 Branch: refs/heads/master Commit: 89881ce207444a9d53f041bf9a14b5b7d623dca6 Parents: 7408531 Author: tballison <[email protected]> Authored: Wed May 18 08:50:13 2016 -0400 Committer: tballison <[email protected]> Committed: Wed May 18 08:50:13 2016 -0400 ---------------------------------------------------------------------- .../org/apache/tika/parser/mail/MailContentHandler.java | 10 +++------- .../org/apache/tika/parser/mail/RFC822ParserTest.java | 12 ++++++++++++ 2 files changed, 15 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/89881ce2/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java index 5a6984c..acdd28c 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java @@ -58,7 +58,6 @@ import org.apache.tika.parser.Parser; import org.apache.tika.sax.XHTMLContentHandler; import org.xml.sax.SAXException; -import static org.apache.tika.utils.DateUtils.MIDDAY; import static org.apache.tika.utils.DateUtils.UTC; /** @@ -86,9 +85,6 @@ class MailContentHandler implements ContentHandler { return sdf; } - - - private boolean strictParsing = false; private XHTMLContentHandler handler; @@ -188,7 +184,8 @@ class MailContentHandler implements ContentHandler { /** * Header for the whole message or its parts * - * @see http://james.apache.org/mime4j/apidocs/org/apache/james/mime4j/parser/ + * @see <a href="http://james.apache.org/mime4j/apidocs/org/apache/james/mime4j/parser/"> + * http://james.apache.org/mime4j/apidocs/org/apache/james/mime4j/parser/</a> * Field.html */ public void field(Field field) throws MimeException { @@ -223,7 +220,7 @@ class MailContentHandler implements ContentHandler { metadata.add(TikaCoreProperties.CREATOR, from); } } else if (fieldname.equalsIgnoreCase("Subject")) { - metadata.add(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, + metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, ((UnstructuredField) parsedField).getValue()); } else if (fieldname.equalsIgnoreCase("To")) { processAddressList(parsedField, "To:", Metadata.MESSAGE_TO); @@ -240,7 +237,6 @@ class MailContentHandler implements ContentHandler { metadata.set(TikaCoreProperties.CREATED, date); } } catch (RuntimeException me) { - me.printStackTrace(); if (strictParsing) { throw me; } http://git-wip-us.apache.org/repos/asf/tika/blob/89881ce2/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java index 6a69ea5..89993fb 100644 --- a/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java +++ b/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java @@ -407,4 +407,16 @@ public class RFC822ParserTest extends TikaTest { assertEquals("2016-05-16T08:30:32Z", r.metadata.get(TikaCoreProperties.CREATED)); } + @Test + public void testMultipleSubjects() throws Exception { + //adapted from govdocs1 303710.txt + String s = "From: Shawn Jones [[email protected]]\n" + + "Subject: 2006N-3502\n" + + "Subject: I Urge You to Require Notice of Mercury"; + Parser p = new RFC822Parser(); + Metadata m = new Metadata(); + p.parse(TikaInputStream.get(s.getBytes()), new DefaultHandler(), m, new ParseContext()); + assertEquals("I Urge You to Require Notice of Mercury", m.get(TikaCoreProperties.TITLE)); + } + }
