This is an automated email from the ASF dual-hosted git repository. swebb2066 pushed a commit to branch xml_layout_unicode_support in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git
commit 42f543a11fe1f6970d8ac669365a29e2b6f253e1 Author: Stephen Webb <[email protected]> AuthorDate: Thu Mar 19 17:20:25 2026 +1100 Restore support for multi-byte code points in XML and HTML output --- src/main/cpp/transform.cpp | 55 +++++++++++++++++--------------------- src/test/cpp/xml/xmllayouttest.cpp | 12 ++++++--- 2 files changed, 33 insertions(+), 34 deletions(-) diff --git a/src/main/cpp/transform.cpp b/src/main/cpp/transform.cpp index 9c56317b..be1812e8 100644 --- a/src/main/cpp/transform.cpp +++ b/src/main/cpp/transform.cpp @@ -36,10 +36,11 @@ void appendValidCharacters(LogString& buf, const LogString& input, CharProcessor , 0x3E /* > */ , 0x00 }; - size_t start = 0; - for (size_t index = 0; index < input.size(); ++index) + auto start = input.begin(); + for (auto nextCodePoint = start; input.end() != nextCodePoint; ) { - int ch = input[index]; + auto lastCodePoint = nextCodePoint; + auto ch = Transcoder::decode(input, nextCodePoint); // Allowable XML 1.0 characters are: // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] if (0x20 <= ch && ch <= 0xD7FF) @@ -57,9 +58,9 @@ void appendValidCharacters(LogString& buf, const LogString& input, CharProcessor continue; } - if (start < index) - buf.append(input, start, index - start); - start = index + 1; + if (start != lastCodePoint) + buf.append(start, lastCodePoint); + start = nextCodePoint; switch (ch) { case 0: // Do not output a NUL character @@ -86,11 +87,7 @@ void appendValidCharacters(LogString& buf, const LogString& input, CharProcessor break; } } - - if (start < input.size()) - { - buf.append(input, start, input.size() - start); - } + buf.append(start, input.end()); } } // namespace @@ -101,48 +98,46 @@ void Transform::appendEscapingCDATA( static const LogString CDATA_END(LOG4CXX_STR("]]>")); const LogString::size_type CDATA_END_LEN = 3; static const LogString CDATA_EMBEDED_END(LOG4CXX_STR("]]><![CDATA[")); - size_t start = 0; - for (size_t index = 0; index < input.size(); ++index) + auto start = input.begin(); + for (auto nextCodePoint = start; input.end() != nextCodePoint; ) { - int ch = input[index]; + auto lastCodePoint = nextCodePoint; + auto ch = Transcoder::decode(input, nextCodePoint); bool cdataEnd = false; // Allowable XML 1.0 characters are: // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] - if (0x20 <= ch && ch <= 0xD7FF) + if (CDATA_END[0] == ch) { - if (CDATA_END[0] == ch && - index + CDATA_END_LEN <= input.size() && - 0 == input.compare(index, CDATA_END_LEN, CDATA_END)) + if (CDATA_END[1] != Transcoder::decode(input, nextCodePoint)) { - index += CDATA_END_LEN; - cdataEnd = true; + --nextCodePoint; + continue; } - else + if (CDATA_END[2] != Transcoder::decode(input, nextCodePoint)) { + --nextCodePoint; + --nextCodePoint; continue; } + cdataEnd = true; } else if (0x9 == ch || 0xA == ch || 0xD == ch || + (0x20 <= ch && ch <= 0xD7FF) || (0xE000 <= ch && ch <= 0xFFFD) || (0x10000 <= ch && ch <= 0x10FFFF)) { continue; } - if (start < index) - buf.append(input, start, index - start); + if (start != lastCodePoint) + buf.append(start, lastCodePoint); if (cdataEnd) - { buf.append(CDATA_EMBEDED_END); - --index; - } else if (0 != ch) appendCharacterReference(buf, ch); - start = index + 1; + start = nextCodePoint; } - - if (start < input.size()) - buf.append(input, start, input.size() - start); + buf.append(start, input.end()); } void Transform::appendCharacterReference(LogString& buf, int ch) diff --git a/src/test/cpp/xml/xmllayouttest.cpp b/src/test/cpp/xml/xmllayouttest.cpp index 7ce31033..4cb3c575 100644 --- a/src/test/cpp/xml/xmllayouttest.cpp +++ b/src/test/cpp/xml/xmllayouttest.cpp @@ -373,18 +373,22 @@ public: */ void testProblemCharacters() { - std::string problemName = "'\"<com.example.bar>&\"'"; + // '\"<räksmörgås.josefsson.org>&\"' + std::string problemName = "'\"\162\303\244\153\163\155\303\266\162\147\303\245\163\056\152\157\163\145\146\163\163\157\156\056\157\162\147>&\"'"; LOG4CXX_DECODE_CHAR(problemNameLS, problemName); + auto loggerNameLS = problemNameLS; + auto levelNameLS = problemNameLS; + Transcoder::encode(0xD822, problemNameLS); // Add an invalid character that should be stripped from attribute values std::string problemMessage = "'\001\"<Hello >\"\004'"; std::string expectedCdataValue = "'\"<Hello >\"'"; std::string expectedAttributeValue = "'\"<Hello >\"'"; // Invalid characters stripped LOG4CXX_DECODE_CHAR(problemMessageLS, problemMessage); - LevelPtr level = LevelPtr(new XLevel(6000, problemNameLS, 7)); + LevelPtr level = LevelPtr(new XLevel(6000, levelNameLS, 7)); NDC::push(problemName); MDC::clear(); - MDC::put(problemName, problemMessage); + MDC::put(problemNameLS, problemMessageLS); auto event = std::make_shared<LoggingEvent> - (problemNameLS, level, problemMessageLS, LOG4CXX_LOCATION); + (loggerNameLS, level, problemMessageLS, LOG4CXX_LOCATION); XMLLayout layout; layout.setProperties(true); Pool p;
