ppkarwasz commented on code in PR #4079:
URL: https://github.com/apache/logging-log4j2/pull/4079#discussion_r2984260732
##########
log4j-api/src/main/java/org/apache/logging/log4j/util/StringBuilders.java:
##########
@@ -369,8 +392,32 @@ public static void escapeXml(final StringBuilder
toAppendTo, final int start) {
toAppendTo.setCharAt(lastPos--, '&');
break;
default:
- toAppendTo.setCharAt(lastPos--, c);
+ toAppendTo.setCharAt(lastPos--, isValidXml10(c) ? c :
REPLACEMENT_CHAR);
}
}
}
+
+ /**
+ * Checks if a code point is a valid XML 1.0 character
+ *
+ * <p>This method is restricted to characters in the BMP, i.e. represented
by one UTF-16 code unit.</p>
+ *
+ * @param codePoint a code point
+ * @return {@code true} if it is a valid XML 1.0 code point
+ */
+ private static boolean isValidXml10(final char codePoint) {
+ // XML 1.0 valid characters (Fifth Edition):
+ // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
[#x10000-#x10FFFF]
+
+ // [#x20–#xD7FF] (placed early as a fast path for the most common case)
+ return (codePoint >= ' ' && codePoint < Character.MIN_SURROGATE)
+ // #x9
+ || codePoint == '\t'
+ // #xA
+ || codePoint == '\n'
+ // #xD
+ || codePoint == '\r'
+ // [#xE000-#xFFFD]
+ || (codePoint > Character.MAX_SURROGATE && codePoint <=
0xFFFD);
Review Comment:
Fixed in
https://github.com/apache/logging-log4j2/pull/4079/commits/e04526fd76c87a08f427bfb7014c0dce420c4fea
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]