This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4418 in repository https://gitbox.apache.org/repos/asf/tika.git
commit f2553d64edc18e2f95abd5164b4a1ae9b3d77c07 Author: tallison <[email protected]> AuthorDate: Mon May 19 10:31:15 2025 -0400 TIKA-4418 -- actually include title in body when injecting select metadata into the body for msg files --- .../main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java | 2 +- .../test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java index b965f6f4c..912e8d258 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java @@ -870,7 +870,7 @@ public class OutlookExtractor extends AbstractPOIFSExtractor { if (! officeParserConfig.isWriteSelectHeadersInBody()) { return; } - String subject = metadata.get(TikaCoreProperties.SUBJECT); + String subject = metadata.get(TikaCoreProperties.TITLE); subject = (subject == null) ? "" : subject; xhtml.element("h1", subject); diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java index 382a1fd97..81809d485 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java @@ -54,7 +54,6 @@ public class OutlookParserTest extends TikaTest { @Test public void testOutlookParsing() throws Exception { - ContentHandler handler = new BodyContentHandler(); Metadata metadata = new Metadata(); @@ -84,7 +83,7 @@ public class OutlookParserTest extends TikaTest { assertEquals("2007-04-05T16:26:06Z", metadata.get(TikaCoreProperties.CREATED)); String content = handler.toString(); - assertContains("Microsoft Outlook Express 6", content); + assertTrue(content.startsWith("Microsoft Outlook Express 6")); assertContains("L'\u00C9quipe Microsoft Outlook Express", content); assertContains("Nouvel utilisateur de Outlook Express", content); assertContains("Messagerie et groupes de discussion", content);
