This is an automated email from the ASF dual-hosted git repository.

swebb2066 pushed a commit to branch xml_layout_unicode_support
in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git

commit 42f543a11fe1f6970d8ac669365a29e2b6f253e1
Author: Stephen Webb <[email protected]>
AuthorDate: Thu Mar 19 17:20:25 2026 +1100

    Restore support for multi-byte code points in XML and HTML output
---
 src/main/cpp/transform.cpp         | 55 +++++++++++++++++---------------------
 src/test/cpp/xml/xmllayouttest.cpp | 12 ++++++---
 2 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/src/main/cpp/transform.cpp b/src/main/cpp/transform.cpp
index 9c56317b..be1812e8 100644
--- a/src/main/cpp/transform.cpp
+++ b/src/main/cpp/transform.cpp
@@ -36,10 +36,11 @@ void appendValidCharacters(LogString& buf, const LogString& 
input, CharProcessor
                , 0x3E /* > */
                , 0x00
                };
-       size_t start = 0;
-       for (size_t index = 0; index < input.size(); ++index)
+       auto start = input.begin();
+       for (auto nextCodePoint = start; input.end() != nextCodePoint; )
        {
-               int ch = input[index];
+               auto lastCodePoint = nextCodePoint;
+               auto ch = Transcoder::decode(input, nextCodePoint);
                // Allowable XML 1.0 characters are:
                // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | 
[#x10000-#x10FFFF]
                if (0x20 <= ch && ch <= 0xD7FF)
@@ -57,9 +58,9 @@ void appendValidCharacters(LogString& buf, const LogString& 
input, CharProcessor
                        continue;
                }
 
-               if (start < index)
-                       buf.append(input, start, index - start);
-               start = index + 1;
+               if (start != lastCodePoint)
+                       buf.append(start, lastCodePoint);
+               start = nextCodePoint;
                switch (ch)
                {
                        case 0: // Do not output a NUL character
@@ -86,11 +87,7 @@ void appendValidCharacters(LogString& buf, const LogString& 
input, CharProcessor
                                break;
                }
        }
-
-       if (start < input.size())
-       {
-               buf.append(input, start, input.size() - start);
-       }
+       buf.append(start, input.end());
 }
 
 } // namespace
@@ -101,48 +98,46 @@ void Transform::appendEscapingCDATA(
        static const LogString CDATA_END(LOG4CXX_STR("]]>"));
        const LogString::size_type CDATA_END_LEN = 3;
        static const LogString 
CDATA_EMBEDED_END(LOG4CXX_STR("]]&gt;<![CDATA["));
-       size_t start = 0;
-       for (size_t index = 0; index < input.size(); ++index)
+       auto start = input.begin();
+       for (auto nextCodePoint = start; input.end() != nextCodePoint; )
        {
-               int ch = input[index];
+               auto lastCodePoint = nextCodePoint;
+               auto ch = Transcoder::decode(input, nextCodePoint);
                bool cdataEnd = false;
                // Allowable XML 1.0 characters are:
                // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | 
[#x10000-#x10FFFF]
-               if (0x20 <= ch && ch <= 0xD7FF)
+               if (CDATA_END[0] == ch)
                {
-                       if (CDATA_END[0] == ch &&
-                               index + CDATA_END_LEN <= input.size() &&
-                               0 == input.compare(index, CDATA_END_LEN, 
CDATA_END))
+                       if (CDATA_END[1] != Transcoder::decode(input, 
nextCodePoint))
                        {
-                               index += CDATA_END_LEN;
-                               cdataEnd = true;
+                               --nextCodePoint;
+                               continue;
                        }
-                       else
+                       if (CDATA_END[2] != Transcoder::decode(input, 
nextCodePoint))
                        {
+                               --nextCodePoint;
+                               --nextCodePoint;
                                continue;
                        }
+                       cdataEnd = true;
                }
                else if (0x9 == ch || 0xA == ch || 0xD == ch ||
+                               (0x20 <= ch && ch <= 0xD7FF) ||
                                (0xE000 <= ch && ch <= 0xFFFD) ||
                                (0x10000 <= ch && ch <= 0x10FFFF))
                {
                        continue;
                }
 
-               if (start < index)
-                       buf.append(input, start, index - start);
+               if (start != lastCodePoint)
+                       buf.append(start, lastCodePoint);
                if (cdataEnd)
-               {
                        buf.append(CDATA_EMBEDED_END);
-                       --index;
-               }
                else if (0 != ch)
                        appendCharacterReference(buf, ch);
-               start = index + 1;
+               start = nextCodePoint;
        }
-
-       if (start < input.size())
-               buf.append(input, start, input.size() - start);
+       buf.append(start, input.end());
 }
 
 void Transform::appendCharacterReference(LogString& buf, int ch)
diff --git a/src/test/cpp/xml/xmllayouttest.cpp 
b/src/test/cpp/xml/xmllayouttest.cpp
index 7ce31033..4cb3c575 100644
--- a/src/test/cpp/xml/xmllayouttest.cpp
+++ b/src/test/cpp/xml/xmllayouttest.cpp
@@ -373,18 +373,22 @@ public:
         */
        void testProblemCharacters()
        {
-               std::string problemName = "'\"<com.example.bar>&\"'";
+               // '\"<räksmörgås.josefsson.org>&\"'
+               std::string problemName = 
"'\"\162\303\244\153\163\155\303\266\162\147\303\245\163\056\152\157\163\145\146\163\163\157\156\056\157\162\147>&\"'";
                LOG4CXX_DECODE_CHAR(problemNameLS, problemName);
+               auto loggerNameLS = problemNameLS;
+               auto levelNameLS = problemNameLS;
+               Transcoder::encode(0xD822, problemNameLS); // Add an invalid 
character that should be stripped from attribute values
                std::string problemMessage = "'\001\"<Hello >\"\004'";
                std::string expectedCdataValue = "'&#x1;\"<Hello >\"&#x4;'";
                std::string expectedAttributeValue = "'\"<Hello >\"'"; // 
Invalid characters stripped
                LOG4CXX_DECODE_CHAR(problemMessageLS, problemMessage);
-               LevelPtr level = LevelPtr(new XLevel(6000, problemNameLS, 7));
+               LevelPtr level = LevelPtr(new XLevel(6000, levelNameLS, 7));
                NDC::push(problemName);
                MDC::clear();
-               MDC::put(problemName, problemMessage);
+               MDC::put(problemNameLS, problemMessageLS);
                auto event = std::make_shared<LoggingEvent>
-                               (problemNameLS, level, problemMessageLS, 
LOG4CXX_LOCATION);
+                               (loggerNameLS, level, problemMessageLS, 
LOG4CXX_LOCATION);
                XMLLayout layout;
                layout.setProperties(true);
                Pool p;

Reply via email to