This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4137 in repository https://gitbox.apache.org/repos/asf/tika.git
commit ca11da94c2316356fed721623387c0ba00a5b938 Author: tallison <[email protected]> AuthorDate: Thu Sep 21 10:28:54 2023 -0400 TIKA-4137 -- java 20 can't handle an empty char array in handler.characters() -- don't output the title element if it is empty. --- .../src/main/java/org/apache/tika/sax/XHTMLContentHandler.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java b/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java index 6ba423220..51f09ebbe 100644 --- a/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java +++ b/tika-core/src/main/java/org/apache/tika/sax/XHTMLContentHandler.java @@ -175,16 +175,13 @@ public class XHTMLContentHandler extends SafeContentHandler { } } - super.startElement(XHTML, "title", "title", EMPTY_ATTRIBUTES); String title = metadata.get(TikaCoreProperties.TITLE); if (title != null && title.length() > 0) { + super.startElement(XHTML, "title", "title", EMPTY_ATTRIBUTES); char[] titleChars = title.toCharArray(); super.characters(titleChars, 0, titleChars.length); - } else { - // TIKA-725: Prefer <title></title> over <title/> - super.characters(new char[0], 0, 0); + super.endElement(XHTML, "title", "title"); } - super.endElement(XHTML, "title", "title"); newline(); super.endElement(XHTML, "head", "head");
