This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_2x in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_2x by this push: new 2d585b830 TIKA-4219 -- clean up...do not include font names in main package 2d585b830 is described below commit 2d585b8306f61dbdf7df148f952378ccada866a8 Author: tallison <talli...@apache.org> AuthorDate: Tue Mar 26 09:10:01 2024 -0400 TIKA-4219 -- clean up...do not include font names in main package --- .../src/main/java/org/apache/tika/parser/epub/EpubParser.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java index a572ad2cc..b9f74cf3e 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-miscoffice-module/src/main/java/org/apache/tika/parser/epub/EpubParser.java @@ -452,9 +452,13 @@ public class EpubParser extends AbstractParser { xhtml.startElement("div", "class", "embedded"); try { + boolean outputHtml = true; + if (hRefMediaPair.media.contains("font") || hRefMediaPair.href.startsWith("fonts")) { + outputHtml = false; + } embeddedDocumentExtractor .parseEmbedded(stream, new EmbeddedContentHandler(xhtml), embeddedMetadata, - true); + outputHtml); } finally { IOUtils.closeQuietly(stream);