This is an automated email from the ASF dual-hosted git repository.

swebb2066 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git


The following commit(s) were added to refs/heads/master by this push:
     new 8a183728 Strip illegal characters in XML output and HTML attributes 
(#609)
8a183728 is described below

commit 8a183728f79c9710be66a52d8357ea34a84d3405
Author: Stephen Webb <[email protected]>
AuthorDate: Thu Mar 19 12:19:12 2026 +1100

    Strip illegal characters in XML output and HTML attributes (#609)
---
 src/main/cpp/htmllayout.cpp                  |   4 +-
 src/main/cpp/transform.cpp                   | 171 +++++++++++++++++----------
 src/main/cpp/xmllayout.cpp                   |  20 ++--
 src/main/include/log4cxx/helpers/transform.h |  40 +++++--
 src/test/cpp/xml/xmllayouttest.cpp           |  22 ++--
 src/test/cpp/xml/xmllayouttestcase.cpp       |   2 +-
 src/test/resources/witness/xmlLayout.3       |   2 +-
 7 files changed, 171 insertions(+), 90 deletions(-)

diff --git a/src/main/cpp/htmllayout.cpp b/src/main/cpp/htmllayout.cpp
index bcfce2f1..ac513075 100644
--- a/src/main/cpp/htmllayout.cpp
+++ b/src/main/cpp/htmllayout.cpp
@@ -99,7 +99,7 @@ void HTMLLayout::format(LogString& output,
 
        output.append(LOG4CXX_STR("<td title=\""));
        LogString threadName(event->getThreadName());
-       Transform::appendEscapingTags(output, threadName);
+       Transform::appendLegalCharacters(output, threadName);
        output.append(LOG4CXX_STR(" thread\">"));
        Transform::appendEscapingTags(output, threadName);
        output.append(LOG4CXX_STR("</td>"));
@@ -128,7 +128,7 @@ void HTMLLayout::format(LogString& output,
        output.append(LOG4CXX_EOL);
 
        output.append(LOG4CXX_STR("<td title=\""));
-       Transform::appendEscapingTags(output, event->getLoggerName());
+       Transform::appendLegalCharacters(output, event->getLoggerName());
        output.append(LOG4CXX_STR(" logger\">"));
        Transform::appendEscapingTags(output, event->getLoggerName());
        output.append(LOG4CXX_STR("</td>"));
diff --git a/src/main/cpp/transform.cpp b/src/main/cpp/transform.cpp
index 6cc7ca77..9c56317b 100644
--- a/src/main/cpp/transform.cpp
+++ b/src/main/cpp/transform.cpp
@@ -18,36 +18,52 @@
 #include <log4cxx/logstring.h>
 #include <log4cxx/helpers/transform.h>
 #include <log4cxx/helpers/widelife.h>
+#include <functional>
 
 using namespace LOG4CXX_NS;
 using namespace LOG4CXX_NS::helpers;
 
-
-
-void Transform::appendEscapingTags(
-       LogString& buf, const LogString& input)
+namespace
 {
-       //Check if the string is zero length -- if so, return
-       //what was sent in.
-
-       if (input.length() == 0 )
-       {
-               return;
-       }
+using CharProcessor = std::function<void(LogString&, int)>;
 
-       logchar specials[] = { 0x22 /* " */, 0x26 /* & */, 0x3C /* < */, 0x3E 
/* > */, 0x00 };
+void appendValidCharacters(LogString& buf, const LogString& input, 
CharProcessor handler = {})
+{
+       static const logchar specials[] =
+               { 0x22 /* " */
+               , 0x26 /* & */
+               , 0x3C /* < */
+               , 0x3E /* > */
+               , 0x00
+               };
        size_t start = 0;
-       size_t special = input.find_first_of(specials, start);
-
-       while (special != LogString::npos)
+       for (size_t index = 0; index < input.size(); ++index)
        {
-               if (special > start)
+               int ch = input[index];
+               // Allowable XML 1.0 characters are:
+               // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | 
[#x10000-#x10FFFF]
+               if (0x20 <= ch && ch <= 0xD7FF)
+               {
+                       auto pSpecial = &specials[0];
+                       while (*pSpecial && *pSpecial != ch)
+                               ++pSpecial;
+                       if (!*pSpecial)
+                               continue;
+               }
+               else if (0x9 == ch || 0xA == ch || 0xD == ch ||
+                               (0xE000 <= ch && ch <= 0xFFFD) ||
+                               (0x10000 <= ch && ch <= 0x10FFFF))
                {
-                       buf.append(input, start, special - start);
+                       continue;
                }
 
-               switch (input[special])
+               if (start < index)
+                       buf.append(input, start, index - start);
+               start = index + 1;
+               switch (ch)
                {
+                       case 0: // Do not output a NUL character
+                               break;
                        case 0x22:
                                buf.append(LOG4CXX_STR("&quot;"));
                                break;
@@ -65,20 +81,10 @@ void Transform::appendEscapingTags(
                                break;
 
                        default:
-                               buf.append(1, input[special]);
+                               if (handler)
+                                       handler(buf, ch);
                                break;
                }
-
-               start = special + 1;
-
-               if (special < input.size())
-               {
-                       special = input.find_first_of(specials, start);
-               }
-               else
-               {
-                       special = LogString::npos;
-               }
        }
 
        if (start < input.size())
@@ -87,46 +93,91 @@ void Transform::appendEscapingTags(
        }
 }
 
+} // namespace
+
 void Transform::appendEscapingCDATA(
        LogString& buf, const LogString& input)
 {
-       static const WideLife<LogString> CDATA_END(LOG4CXX_STR("]]>"));
-       static const WideLife<LogString> 
CDATA_EMBEDED_END(LOG4CXX_STR("]]>]]&gt;<![CDATA["));
-
+       static const LogString CDATA_END(LOG4CXX_STR("]]>"));
        const LogString::size_type CDATA_END_LEN = 3;
-
-
-       if (input.length() == 0 )
-       {
-               return;
-       }
-
-       LogString::size_type end = input.find(CDATA_END);
-
-       if (end == LogString::npos)
-       {
-               buf.append(input);
-               return;
-       }
-
-       LogString::size_type start = 0;
-
-       while (end != LogString::npos)
+       static const LogString 
CDATA_EMBEDED_END(LOG4CXX_STR("]]&gt;<![CDATA["));
+       size_t start = 0;
+       for (size_t index = 0; index < input.size(); ++index)
        {
-               buf.append(input, start, end - start);
-               buf.append(CDATA_EMBEDED_END);
-               start = end + CDATA_END_LEN;
-
-               if (start < input.length())
+               int ch = input[index];
+               bool cdataEnd = false;
+               // Allowable XML 1.0 characters are:
+               // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | 
[#x10000-#x10FFFF]
+               if (0x20 <= ch && ch <= 0xD7FF)
                {
-                       end = input.find(CDATA_END, start);
+                       if (CDATA_END[0] == ch &&
+                               index + CDATA_END_LEN <= input.size() &&
+                               0 == input.compare(index, CDATA_END_LEN, 
CDATA_END))
+                       {
+                               index += CDATA_END_LEN;
+                               cdataEnd = true;
+                       }
+                       else
+                       {
+                               continue;
+                       }
                }
-               else
+               else if (0x9 == ch || 0xA == ch || 0xD == ch ||
+                               (0xE000 <= ch && ch <= 0xFFFD) ||
+                               (0x10000 <= ch && ch <= 0x10FFFF))
+               {
+                       continue;
+               }
+
+               if (start < index)
+                       buf.append(input, start, index - start);
+               if (cdataEnd)
                {
-                       return;
+                       buf.append(CDATA_EMBEDED_END);
+                       --index;
                }
+               else if (0 != ch)
+                       appendCharacterReference(buf, ch);
+               start = index + 1;
        }
 
-       buf.append(input, start, input.length() - start);
+       if (start < input.size())
+               buf.append(input, start, input.size() - start);
+}
+
+void Transform::appendCharacterReference(LogString& buf, int ch)
+{
+       auto toHexDigit = [](int ch) -> int
+       {
+               return (10 <= ch ? (0x61 - 10) : 0x30) + ch;
+       };
+       buf.push_back('&');
+       buf.push_back('#');
+       buf.push_back('x');
+       if (0xFFFFFFF < ch)
+               buf.push_back(toHexDigit((ch & 0x70000000) >> 28));
+       if (0xFFFFFF < ch)
+               buf.push_back(toHexDigit((ch & 0xF000000) >> 24));
+       if (0xFFFFF < ch)
+               buf.push_back(toHexDigit((ch & 0xF00000) >> 20));
+       if (0xFFFF < ch)
+               buf.push_back(toHexDigit((ch & 0xF0000) >> 16));
+       if (0xFFF < ch)
+               buf.push_back(toHexDigit((ch & 0xF000) >> 12));
+       if (0xFF < ch)
+               buf.push_back(toHexDigit((ch & 0xF00) >> 8));
+       if (0xF < ch)
+               buf.push_back(toHexDigit((ch & 0xF0) >> 4));
+       buf.push_back(toHexDigit(ch & 0xF));
+       buf.push_back(';');
 }
 
+void Transform::appendEscapingTags(LogString& buf, const LogString& input)
+{
+       appendValidCharacters(buf, input, appendCharacterReference);
+}
+
+void Transform::appendLegalCharacters(LogString& buf, const LogString& input)
+{
+       appendValidCharacters(buf, input);
+}
diff --git a/src/main/cpp/xmllayout.cpp b/src/main/cpp/xmllayout.cpp
index e26be6c8..32badca6 100644
--- a/src/main/cpp/xmllayout.cpp
+++ b/src/main/cpp/xmllayout.cpp
@@ -81,13 +81,13 @@ void XMLLayout::format(LogString& output,
        auto& lsMsg = event->getRenderedMessage();
        output.reserve(m_priv->expectedPatternLength + lsMsg.size());
        output.append(LOG4CXX_STR("<log4j:event logger=\""));
-       Transform::appendEscapingTags(output, event->getLoggerName());
+       Transform::appendLegalCharacters(output, event->getLoggerName());
        output.append(LOG4CXX_STR("\" timestamp=\""));
        StringHelper::toString(event->getTimeStamp() / 1000L, p, output);
        output.append(LOG4CXX_STR("\" level=\""));
-       Transform::appendEscapingTags(output, event->getLevel()->toString());
+       Transform::appendLegalCharacters(output, event->getLevel()->toString());
        output.append(LOG4CXX_STR("\" thread=\""));
-       Transform::appendEscapingTags(output, event->getThreadName());
+       Transform::appendLegalCharacters(output, event->getThreadName());
        output.append(LOG4CXX_STR("\">"));
        output.append(LOG4CXX_EOL);
 
@@ -113,13 +113,13 @@ void XMLLayout::format(LogString& output,
                output.append(LOG4CXX_STR("<log4j:locationInfo class=\""));
                const LocationInfo& locInfo = event->getLocationInformation();
                LOG4CXX_DECODE_CHAR(className, locInfo.getClassName());
-               Transform::appendEscapingTags(output, className);
+               Transform::appendLegalCharacters(output, className);
                output.append(LOG4CXX_STR("\" method=\""));
                LOG4CXX_DECODE_CHAR(method, locInfo.getMethodName());
-               Transform::appendEscapingTags(output, method);
+               Transform::appendLegalCharacters(output, method);
                output.append(LOG4CXX_STR("\" file=\""));
                LOG4CXX_DECODE_CHAR(fileName, locInfo.getFileName());
-               Transform::appendEscapingTags(output, fileName);
+               Transform::appendLegalCharacters(output, fileName);
                output.append(LOG4CXX_STR("\" line=\""));
                StringHelper::toString(locInfo.getLineNumber(), p, output);
                output.append(LOG4CXX_STR("\"/>"));
@@ -143,9 +143,9 @@ void XMLLayout::format(LogString& output,
                                if (event->getMDC(key, value))
                                {
                                        output.append(LOG4CXX_STR("<log4j:data 
name=\""));
-                                       Transform::appendEscapingTags(output, 
key);
+                                       
Transform::appendLegalCharacters(output, key);
                                        output.append(LOG4CXX_STR("\" 
value=\""));
-                                       Transform::appendEscapingTags(output, 
value);
+                                       
Transform::appendLegalCharacters(output, value);
                                        output.append(LOG4CXX_STR("\"/>"));
                                        output.append(LOG4CXX_EOL);
                                }
@@ -158,9 +158,9 @@ void XMLLayout::format(LogString& output,
                                if (event->getProperty(key, value))
                                {
                                        output.append(LOG4CXX_STR("<log4j:data 
name=\""));
-                                       Transform::appendEscapingTags(output, 
key);
+                                       
Transform::appendLegalCharacters(output, key);
                                        output.append(LOG4CXX_STR("\" 
value=\""));
-                                       Transform::appendEscapingTags(output, 
value);
+                                       
Transform::appendLegalCharacters(output, value);
                                        output.append(LOG4CXX_STR("\"/>"));
                                        output.append(LOG4CXX_EOL);
                                }
diff --git a/src/main/include/log4cxx/helpers/transform.h 
b/src/main/include/log4cxx/helpers/transform.h
index 4ae67aac..70ee53d0 100644
--- a/src/main/include/log4cxx/helpers/transform.h
+++ b/src/main/include/log4cxx/helpers/transform.h
@@ -35,6 +35,9 @@ class LOG4CXX_EXPORT Transform
                * (ie, &lt;b&gt;, &lt;table&gt;, etc) to \c buf
                * while replacing any '<' and '>' characters
                * with respective predefined entity references.
+               * Any NUL character in \c input is not copied to \c buf.
+               * A character reference is used in place of a character
+               * whose value is not permitted by the XML 1.0 specification.
                *
                * @param buf output stream where to write the modified string.
                * @param input The text to be converted.
@@ -43,17 +46,40 @@ class LOG4CXX_EXPORT Transform
                        LogString& buf, const LogString& input);
 
                /**
-               * Add \c input to \c buf while ensuring embeded CDEnd strings 
(]]>)
-               * are handled properly within the message, NDC and throwable 
tag text.
+               * Add \c input to \c buf while ensuring embedded CDEnd strings 
(]]&gt;)
+               * are handled properly within the message.
+               * The initial CDStart (&lt;![CDATA[) and terminating CDEnd 
(]]&gt;)
+               * of the CDATA section must be added by the calling method.
+               * Any NUL character in \c input is not copied to \c buf.
+               * A character reference is used in place of a character
+               * whose value is not permitted by the XML 1.0 specification.
                *
-               * @param buf output stream holding the XML data to this point.  
The
-               * initial CDStart (<![CDATA[) and final CDEnd (]]>) of the CDATA
-               * section are the responsibility of the calling method.
-               * @param input The String that is inserted into an existing 
CDATA
-               * Section within buf.
+               * @param buf Transformed \c input text is added to this.
+               * @param input The text to be appended to \c buf
                */
                static void appendEscapingCDATA(
                        LogString& buf, const LogString& input);
+
+               /**
+               * Add \c ch to \c buf as an XML character reference.
+               *
+               * @param buf output stream holding the XML data to this point.
+               * @param ch the value to encode as a XML character reference
+               */
+               static void appendCharacterReference(LogString& buf, int ch);
+
+               /**
+               * Append a transformation of \c input onto \c buf.
+               * Only the valid XML 1.0 specification characters
+               * (#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | 
[#x10000-#x10FFFF])
+               * are copied to \c buf.
+               * Any special character (&lt;, &gt;, &amp; and &quot;)
+               * is replaced with an entity reference.
+               *
+               * @param buf Transformed \c input text is added to this.
+               * @param input The text to be transformed.
+               * */
+               static void appendLegalCharacters(LogString& buf, const 
LogString& input);
 }; // class Transform
 }  // namespace helpers
 } //namespace log4cxx
diff --git a/src/test/cpp/xml/xmllayouttest.cpp 
b/src/test/cpp/xml/xmllayouttest.cpp
index 710e9fca..7ce31033 100644
--- a/src/test/cpp/xml/xmllayouttest.cpp
+++ b/src/test/cpp/xml/xmllayouttest.cpp
@@ -309,7 +309,7 @@ public:
        void testFormatWithNDC()
        {
                LogString logger = 
LOG4CXX_STR("org.apache.log4j.xml.XMLLayoutTest");
-               NDC::push("NDC goes here");
+               NDC::push("\001NDC goes here\004");
 
                LoggingEventPtr event = LoggingEventPtr(
                                new LoggingEvent(
@@ -337,7 +337,7 @@ public:
                        }
                        else
                        {
-                               checkNDCElement(node, "NDC goes here");
+                               checkNDCElement(node, "&#x1;NDC goes 
here&#x4;");
                        }
                }
 
@@ -373,14 +373,18 @@ public:
         */
        void testProblemCharacters()
        {
-               std::string problemName = "com.example.bar<>&\"'";
-               LogString problemNameLS = LOG4CXX_STR("com.example.bar<>&\"'");
+               std::string problemName = "'\"<com.example.bar>&\"'";
+               LOG4CXX_DECODE_CHAR(problemNameLS, problemName);
+               std::string problemMessage = "'\001\"<Hello >\"\004'";
+               std::string expectedCdataValue = "'&#x1;\"<Hello >\"&#x4;'";
+               std::string expectedAttributeValue = "'\"<Hello >\"'"; // 
Invalid characters stripped
+               LOG4CXX_DECODE_CHAR(problemMessageLS, problemMessage);
                LevelPtr level = LevelPtr(new XLevel(6000, problemNameLS, 7));
                NDC::push(problemName);
                MDC::clear();
-               MDC::put(problemName, problemName);
-               LoggingEventPtr event = LoggingEventPtr(
-                               new LoggingEvent(problemNameLS, level, 
problemNameLS, LOG4CXX_LOCATION));
+               MDC::put(problemName, problemMessage);
+               auto event = std::make_shared<LoggingEvent>
+                               (problemNameLS, level, problemMessageLS, 
LOG4CXX_LOCATION);
                XMLLayout layout;
                layout.setProperties(true);
                Pool p;
@@ -402,7 +406,7 @@ public:
                        switch (childElementCount)
                        {
                                case 1:
-                                       checkMessageElement(node, problemName);
+                                       checkMessageElement(node, 
expectedCdataValue);
                                        break;
 
                                case 2:
@@ -410,7 +414,7 @@ public:
                                        break;
 
                                case 3:
-                                       checkPropertiesElement(node, 
problemName.c_str(), problemName.c_str());
+                                       checkPropertiesElement(node, 
problemName.c_str(), expectedAttributeValue.c_str());
                                        break;
 
                                default:
diff --git a/src/test/cpp/xml/xmllayouttestcase.cpp 
b/src/test/cpp/xml/xmllayouttestcase.cpp
index 05711daa..2a0138ca 100644
--- a/src/test/cpp/xml/xmllayouttestcase.cpp
+++ b/src/test/cpp/xml/xmllayouttestcase.cpp
@@ -176,7 +176,7 @@ public:
                LOG4CXX_TRACE(logger,
                        LOG4CXX_TEST_STR("Message with embedded 
<![CDATA[<hello>hi</hello>]]>."));
                LOG4CXX_DEBUG(logger,
-                       LOG4CXX_TEST_STR("Message with embedded 
<![CDATA[<hello>hi</hello>]]>."));
+                       LOG4CXX_TEST_STR("Message with terminating 
<![CDATA[<hello>hi</hello>]]>"));
 
                XMLTimestampFilter xmlTimestampFilter;
                XMLThreadFilter xmlThreadFilter;
diff --git a/src/test/resources/witness/xmlLayout.3 
b/src/test/resources/witness/xmlLayout.3
index 7ca78096..6e995ec4 100644
--- a/src/test/resources/witness/xmlLayout.3
+++ b/src/test/resources/witness/xmlLayout.3
@@ -4,7 +4,7 @@
 </log4j:event>
 
 <log4j:event logger="org.apache.log4j.xml.XMLLayoutTestCase" timestamp="XXX" 
level="DEBUG" thread="main">
-<log4j:message><![CDATA[Message with embedded 
<![CDATA[<hello>hi</hello>]]>]]&gt;<![CDATA[.]]></log4j:message>
+<log4j:message><![CDATA[Message with terminating 
<![CDATA[<hello>hi</hello>]]>]]&gt;<![CDATA[]]></log4j:message>
 <log4j:locationInfo class="XMLLayoutTestCase" method="testCDATA" 
file="XMLLayoutTestCase.java" line="X"/>
 </log4j:event>
 

Reply via email to