This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push: new e88be05ad TIKA-4219 -- clean up...do not include font names in main package e88be05ad is described below commit e88be05ad588a59916f199643f51673d693b0642 Author: tallison <talli...@apache.org> AuthorDate: Tue Mar 26 09:10:01 2024 -0400 TIKA-4219 -- clean up...do not include font names in main package --- .../src/main/java/org/apache/tika/parser/epub/EpubParser.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java index 7c4168b0c..56ff532d9 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java @@ -451,9 +451,13 @@ public class EpubParser implements Parser { xhtml.startElement("div", "class", "embedded"); try { + boolean outputHtml = true; + if (hRefMediaPair.media.contains("font") || hRefMediaPair.href.startsWith("fonts")) { + outputHtml = false; + } embeddedDocumentExtractor .parseEmbedded(stream, new EmbeddedContentHandler(xhtml), embeddedMetadata, - true); + outputHtml); } finally { IOUtils.closeQuietly(stream);