This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4710-rtf-attachments-in-html-decapsulation in repository https://gitbox.apache.org/repos/asf/tika.git
commit 57e18b6a8decc1f151b610c946583d19ff4f276d Author: tallison <[email protected]> AuthorDate: Mon Apr 6 09:27:57 2026 -0400 TIKA-4710 -- extract RTF attachments during html decapsulation in msgs --- .../apache/tika/parser/microsoft/rtf/jflex/RTFObjDataStreamParser.java | 2 +- .../main/java/org/apache/tika/parser/microsoft/rtf/jflex/RTFToken.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/jflex/RTFObjDataStreamParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/jflex/RTFObjDataStreamParser.java index c45b0a3817..c3313174ee 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/jflex/RTFObjDataStreamParser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/jflex/RTFObjDataStreamParser.java @@ -453,7 +453,7 @@ public class RTFObjDataStreamParser implements Closeable { try { return new String(buf, 0, len, WIN_ASCII).trim(); } catch (java.io.UnsupportedEncodingException e) { - return new String(buf, 0, len).trim(); + return new String(buf, 0, len, java.nio.charset.StandardCharsets.US_ASCII).trim(); } } diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/jflex/RTFToken.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/jflex/RTFToken.java index ec287f5c7e..725e24d4d1 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/jflex/RTFToken.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/rtf/jflex/RTFToken.java @@ -78,7 +78,7 @@ public class RTFToken { case CONTROL_SYMBOL: return "\\" + name; case HEX_ESCAPE: - return String.format("\\'%02x", parameter); + return String.format(java.util.Locale.ROOT, "\\'%02x", parameter); case UNICODE_ESCAPE: return "\\u" + parameter; case TEXT:
