This is an automated email from the ASF dual-hosted git repository.
swebb2066 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git
The following commit(s) were added to refs/heads/master by this push:
new 8a183728 Strip illegal characters in XML output and HTML attributes
(#609)
8a183728 is described below
commit 8a183728f79c9710be66a52d8357ea34a84d3405
Author: Stephen Webb <[email protected]>
AuthorDate: Thu Mar 19 12:19:12 2026 +1100
Strip illegal characters in XML output and HTML attributes (#609)
---
src/main/cpp/htmllayout.cpp | 4 +-
src/main/cpp/transform.cpp | 171 +++++++++++++++++----------
src/main/cpp/xmllayout.cpp | 20 ++--
src/main/include/log4cxx/helpers/transform.h | 40 +++++--
src/test/cpp/xml/xmllayouttest.cpp | 22 ++--
src/test/cpp/xml/xmllayouttestcase.cpp | 2 +-
src/test/resources/witness/xmlLayout.3 | 2 +-
7 files changed, 171 insertions(+), 90 deletions(-)
diff --git a/src/main/cpp/htmllayout.cpp b/src/main/cpp/htmllayout.cpp
index bcfce2f1..ac513075 100644
--- a/src/main/cpp/htmllayout.cpp
+++ b/src/main/cpp/htmllayout.cpp
@@ -99,7 +99,7 @@ void HTMLLayout::format(LogString& output,
output.append(LOG4CXX_STR("<td title=\""));
LogString threadName(event->getThreadName());
- Transform::appendEscapingTags(output, threadName);
+ Transform::appendLegalCharacters(output, threadName);
output.append(LOG4CXX_STR(" thread\">"));
Transform::appendEscapingTags(output, threadName);
output.append(LOG4CXX_STR("</td>"));
@@ -128,7 +128,7 @@ void HTMLLayout::format(LogString& output,
output.append(LOG4CXX_EOL);
output.append(LOG4CXX_STR("<td title=\""));
- Transform::appendEscapingTags(output, event->getLoggerName());
+ Transform::appendLegalCharacters(output, event->getLoggerName());
output.append(LOG4CXX_STR(" logger\">"));
Transform::appendEscapingTags(output, event->getLoggerName());
output.append(LOG4CXX_STR("</td>"));
diff --git a/src/main/cpp/transform.cpp b/src/main/cpp/transform.cpp
index 6cc7ca77..9c56317b 100644
--- a/src/main/cpp/transform.cpp
+++ b/src/main/cpp/transform.cpp
@@ -18,36 +18,52 @@
#include <log4cxx/logstring.h>
#include <log4cxx/helpers/transform.h>
#include <log4cxx/helpers/widelife.h>
+#include <functional>
using namespace LOG4CXX_NS;
using namespace LOG4CXX_NS::helpers;
-
-
-void Transform::appendEscapingTags(
- LogString& buf, const LogString& input)
+namespace
{
- //Check if the string is zero length -- if so, return
- //what was sent in.
-
- if (input.length() == 0 )
- {
- return;
- }
+using CharProcessor = std::function<void(LogString&, int)>;
- logchar specials[] = { 0x22 /* " */, 0x26 /* & */, 0x3C /* < */, 0x3E
/* > */, 0x00 };
+void appendValidCharacters(LogString& buf, const LogString& input,
CharProcessor handler = {})
+{
+ static const logchar specials[] =
+ { 0x22 /* " */
+ , 0x26 /* & */
+ , 0x3C /* < */
+ , 0x3E /* > */
+ , 0x00
+ };
size_t start = 0;
- size_t special = input.find_first_of(specials, start);
-
- while (special != LogString::npos)
+ for (size_t index = 0; index < input.size(); ++index)
{
- if (special > start)
+ int ch = input[index];
+ // Allowable XML 1.0 characters are:
+ // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
[#x10000-#x10FFFF]
+ if (0x20 <= ch && ch <= 0xD7FF)
+ {
+ auto pSpecial = &specials[0];
+ while (*pSpecial && *pSpecial != ch)
+ ++pSpecial;
+ if (!*pSpecial)
+ continue;
+ }
+ else if (0x9 == ch || 0xA == ch || 0xD == ch ||
+ (0xE000 <= ch && ch <= 0xFFFD) ||
+ (0x10000 <= ch && ch <= 0x10FFFF))
{
- buf.append(input, start, special - start);
+ continue;
}
- switch (input[special])
+ if (start < index)
+ buf.append(input, start, index - start);
+ start = index + 1;
+ switch (ch)
{
+ case 0: // Do not output a NUL character
+ break;
case 0x22:
buf.append(LOG4CXX_STR("""));
break;
@@ -65,20 +81,10 @@ void Transform::appendEscapingTags(
break;
default:
- buf.append(1, input[special]);
+ if (handler)
+ handler(buf, ch);
break;
}
-
- start = special + 1;
-
- if (special < input.size())
- {
- special = input.find_first_of(specials, start);
- }
- else
- {
- special = LogString::npos;
- }
}
if (start < input.size())
@@ -87,46 +93,91 @@ void Transform::appendEscapingTags(
}
}
+} // namespace
+
void Transform::appendEscapingCDATA(
LogString& buf, const LogString& input)
{
- static const WideLife<LogString> CDATA_END(LOG4CXX_STR("]]>"));
- static const WideLife<LogString>
CDATA_EMBEDED_END(LOG4CXX_STR("]]>]]><![CDATA["));
-
+ static const LogString CDATA_END(LOG4CXX_STR("]]>"));
const LogString::size_type CDATA_END_LEN = 3;
-
-
- if (input.length() == 0 )
- {
- return;
- }
-
- LogString::size_type end = input.find(CDATA_END);
-
- if (end == LogString::npos)
- {
- buf.append(input);
- return;
- }
-
- LogString::size_type start = 0;
-
- while (end != LogString::npos)
+ static const LogString
CDATA_EMBEDED_END(LOG4CXX_STR("]]><![CDATA["));
+ size_t start = 0;
+ for (size_t index = 0; index < input.size(); ++index)
{
- buf.append(input, start, end - start);
- buf.append(CDATA_EMBEDED_END);
- start = end + CDATA_END_LEN;
-
- if (start < input.length())
+ int ch = input[index];
+ bool cdataEnd = false;
+ // Allowable XML 1.0 characters are:
+ // #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
[#x10000-#x10FFFF]
+ if (0x20 <= ch && ch <= 0xD7FF)
{
- end = input.find(CDATA_END, start);
+ if (CDATA_END[0] == ch &&
+ index + CDATA_END_LEN <= input.size() &&
+ 0 == input.compare(index, CDATA_END_LEN,
CDATA_END))
+ {
+ index += CDATA_END_LEN;
+ cdataEnd = true;
+ }
+ else
+ {
+ continue;
+ }
}
- else
+ else if (0x9 == ch || 0xA == ch || 0xD == ch ||
+ (0xE000 <= ch && ch <= 0xFFFD) ||
+ (0x10000 <= ch && ch <= 0x10FFFF))
+ {
+ continue;
+ }
+
+ if (start < index)
+ buf.append(input, start, index - start);
+ if (cdataEnd)
{
- return;
+ buf.append(CDATA_EMBEDED_END);
+ --index;
}
+ else if (0 != ch)
+ appendCharacterReference(buf, ch);
+ start = index + 1;
}
- buf.append(input, start, input.length() - start);
+ if (start < input.size())
+ buf.append(input, start, input.size() - start);
+}
+
+void Transform::appendCharacterReference(LogString& buf, int ch)
+{
+ auto toHexDigit = [](int ch) -> int
+ {
+ return (10 <= ch ? (0x61 - 10) : 0x30) + ch;
+ };
+ buf.push_back('&');
+ buf.push_back('#');
+ buf.push_back('x');
+ if (0xFFFFFFF < ch)
+ buf.push_back(toHexDigit((ch & 0x70000000) >> 28));
+ if (0xFFFFFF < ch)
+ buf.push_back(toHexDigit((ch & 0xF000000) >> 24));
+ if (0xFFFFF < ch)
+ buf.push_back(toHexDigit((ch & 0xF00000) >> 20));
+ if (0xFFFF < ch)
+ buf.push_back(toHexDigit((ch & 0xF0000) >> 16));
+ if (0xFFF < ch)
+ buf.push_back(toHexDigit((ch & 0xF000) >> 12));
+ if (0xFF < ch)
+ buf.push_back(toHexDigit((ch & 0xF00) >> 8));
+ if (0xF < ch)
+ buf.push_back(toHexDigit((ch & 0xF0) >> 4));
+ buf.push_back(toHexDigit(ch & 0xF));
+ buf.push_back(';');
}
+void Transform::appendEscapingTags(LogString& buf, const LogString& input)
+{
+ appendValidCharacters(buf, input, appendCharacterReference);
+}
+
+void Transform::appendLegalCharacters(LogString& buf, const LogString& input)
+{
+ appendValidCharacters(buf, input);
+}
diff --git a/src/main/cpp/xmllayout.cpp b/src/main/cpp/xmllayout.cpp
index e26be6c8..32badca6 100644
--- a/src/main/cpp/xmllayout.cpp
+++ b/src/main/cpp/xmllayout.cpp
@@ -81,13 +81,13 @@ void XMLLayout::format(LogString& output,
auto& lsMsg = event->getRenderedMessage();
output.reserve(m_priv->expectedPatternLength + lsMsg.size());
output.append(LOG4CXX_STR("<log4j:event logger=\""));
- Transform::appendEscapingTags(output, event->getLoggerName());
+ Transform::appendLegalCharacters(output, event->getLoggerName());
output.append(LOG4CXX_STR("\" timestamp=\""));
StringHelper::toString(event->getTimeStamp() / 1000L, p, output);
output.append(LOG4CXX_STR("\" level=\""));
- Transform::appendEscapingTags(output, event->getLevel()->toString());
+ Transform::appendLegalCharacters(output, event->getLevel()->toString());
output.append(LOG4CXX_STR("\" thread=\""));
- Transform::appendEscapingTags(output, event->getThreadName());
+ Transform::appendLegalCharacters(output, event->getThreadName());
output.append(LOG4CXX_STR("\">"));
output.append(LOG4CXX_EOL);
@@ -113,13 +113,13 @@ void XMLLayout::format(LogString& output,
output.append(LOG4CXX_STR("<log4j:locationInfo class=\""));
const LocationInfo& locInfo = event->getLocationInformation();
LOG4CXX_DECODE_CHAR(className, locInfo.getClassName());
- Transform::appendEscapingTags(output, className);
+ Transform::appendLegalCharacters(output, className);
output.append(LOG4CXX_STR("\" method=\""));
LOG4CXX_DECODE_CHAR(method, locInfo.getMethodName());
- Transform::appendEscapingTags(output, method);
+ Transform::appendLegalCharacters(output, method);
output.append(LOG4CXX_STR("\" file=\""));
LOG4CXX_DECODE_CHAR(fileName, locInfo.getFileName());
- Transform::appendEscapingTags(output, fileName);
+ Transform::appendLegalCharacters(output, fileName);
output.append(LOG4CXX_STR("\" line=\""));
StringHelper::toString(locInfo.getLineNumber(), p, output);
output.append(LOG4CXX_STR("\"/>"));
@@ -143,9 +143,9 @@ void XMLLayout::format(LogString& output,
if (event->getMDC(key, value))
{
output.append(LOG4CXX_STR("<log4j:data
name=\""));
- Transform::appendEscapingTags(output,
key);
+
Transform::appendLegalCharacters(output, key);
output.append(LOG4CXX_STR("\"
value=\""));
- Transform::appendEscapingTags(output,
value);
+
Transform::appendLegalCharacters(output, value);
output.append(LOG4CXX_STR("\"/>"));
output.append(LOG4CXX_EOL);
}
@@ -158,9 +158,9 @@ void XMLLayout::format(LogString& output,
if (event->getProperty(key, value))
{
output.append(LOG4CXX_STR("<log4j:data
name=\""));
- Transform::appendEscapingTags(output,
key);
+
Transform::appendLegalCharacters(output, key);
output.append(LOG4CXX_STR("\"
value=\""));
- Transform::appendEscapingTags(output,
value);
+
Transform::appendLegalCharacters(output, value);
output.append(LOG4CXX_STR("\"/>"));
output.append(LOG4CXX_EOL);
}
diff --git a/src/main/include/log4cxx/helpers/transform.h
b/src/main/include/log4cxx/helpers/transform.h
index 4ae67aac..70ee53d0 100644
--- a/src/main/include/log4cxx/helpers/transform.h
+++ b/src/main/include/log4cxx/helpers/transform.h
@@ -35,6 +35,9 @@ class LOG4CXX_EXPORT Transform
* (ie, <b>, <table>, etc) to \c buf
* while replacing any '<' and '>' characters
* with respective predefined entity references.
+ * Any NUL character in \c input is not copied to \c buf.
+ * A character reference is used in place of a character
+ * whose value is not permitted by the XML 1.0 specification.
*
* @param buf output stream where to write the modified string.
* @param input The text to be converted.
@@ -43,17 +46,40 @@ class LOG4CXX_EXPORT Transform
LogString& buf, const LogString& input);
/**
- * Add \c input to \c buf while ensuring embeded CDEnd strings
(]]>)
- * are handled properly within the message, NDC and throwable
tag text.
+ * Add \c input to \c buf while ensuring embedded CDEnd strings
(]]>)
+ * are handled properly within the message.
+ * The initial CDStart (<![CDATA[) and terminating CDEnd
(]]>)
+ * of the CDATA section must be added by the calling method.
+ * Any NUL character in \c input is not copied to \c buf.
+ * A character reference is used in place of a character
+ * whose value is not permitted by the XML 1.0 specification.
*
- * @param buf output stream holding the XML data to this point.
The
- * initial CDStart (<![CDATA[) and final CDEnd (]]>) of the CDATA
- * section are the responsibility of the calling method.
- * @param input The String that is inserted into an existing
CDATA
- * Section within buf.
+ * @param buf Transformed \c input text is added to this.
+ * @param input The text to be appended to \c buf
*/
static void appendEscapingCDATA(
LogString& buf, const LogString& input);
+
+ /**
+ * Add \c ch to \c buf as an XML character reference.
+ *
+ * @param buf output stream holding the XML data to this point.
+ * @param ch the value to encode as a XML character reference
+ */
+ static void appendCharacterReference(LogString& buf, int ch);
+
+ /**
+ * Append a transformation of \c input onto \c buf.
+ * Only the valid XML 1.0 specification characters
+ * (#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
[#x10000-#x10FFFF])
+ * are copied to \c buf.
+ * Any special character (<, >, & and ")
+ * is replaced with an entity reference.
+ *
+ * @param buf Transformed \c input text is added to this.
+ * @param input The text to be transformed.
+ * */
+ static void appendLegalCharacters(LogString& buf, const
LogString& input);
}; // class Transform
} // namespace helpers
} //namespace log4cxx
diff --git a/src/test/cpp/xml/xmllayouttest.cpp
b/src/test/cpp/xml/xmllayouttest.cpp
index 710e9fca..7ce31033 100644
--- a/src/test/cpp/xml/xmllayouttest.cpp
+++ b/src/test/cpp/xml/xmllayouttest.cpp
@@ -309,7 +309,7 @@ public:
void testFormatWithNDC()
{
LogString logger =
LOG4CXX_STR("org.apache.log4j.xml.XMLLayoutTest");
- NDC::push("NDC goes here");
+ NDC::push("\001NDC goes here\004");
LoggingEventPtr event = LoggingEventPtr(
new LoggingEvent(
@@ -337,7 +337,7 @@ public:
}
else
{
- checkNDCElement(node, "NDC goes here");
+ checkNDCElement(node, "NDC goes
here");
}
}
@@ -373,14 +373,18 @@ public:
*/
void testProblemCharacters()
{
- std::string problemName = "com.example.bar<>&\"'";
- LogString problemNameLS = LOG4CXX_STR("com.example.bar<>&\"'");
+ std::string problemName = "'\"<com.example.bar>&\"'";
+ LOG4CXX_DECODE_CHAR(problemNameLS, problemName);
+ std::string problemMessage = "'\001\"<Hello >\"\004'";
+ std::string expectedCdataValue = "'\"<Hello >\"'";
+ std::string expectedAttributeValue = "'\"<Hello >\"'"; //
Invalid characters stripped
+ LOG4CXX_DECODE_CHAR(problemMessageLS, problemMessage);
LevelPtr level = LevelPtr(new XLevel(6000, problemNameLS, 7));
NDC::push(problemName);
MDC::clear();
- MDC::put(problemName, problemName);
- LoggingEventPtr event = LoggingEventPtr(
- new LoggingEvent(problemNameLS, level,
problemNameLS, LOG4CXX_LOCATION));
+ MDC::put(problemName, problemMessage);
+ auto event = std::make_shared<LoggingEvent>
+ (problemNameLS, level, problemMessageLS,
LOG4CXX_LOCATION);
XMLLayout layout;
layout.setProperties(true);
Pool p;
@@ -402,7 +406,7 @@ public:
switch (childElementCount)
{
case 1:
- checkMessageElement(node, problemName);
+ checkMessageElement(node,
expectedCdataValue);
break;
case 2:
@@ -410,7 +414,7 @@ public:
break;
case 3:
- checkPropertiesElement(node,
problemName.c_str(), problemName.c_str());
+ checkPropertiesElement(node,
problemName.c_str(), expectedAttributeValue.c_str());
break;
default:
diff --git a/src/test/cpp/xml/xmllayouttestcase.cpp
b/src/test/cpp/xml/xmllayouttestcase.cpp
index 05711daa..2a0138ca 100644
--- a/src/test/cpp/xml/xmllayouttestcase.cpp
+++ b/src/test/cpp/xml/xmllayouttestcase.cpp
@@ -176,7 +176,7 @@ public:
LOG4CXX_TRACE(logger,
LOG4CXX_TEST_STR("Message with embedded
<![CDATA[<hello>hi</hello>]]>."));
LOG4CXX_DEBUG(logger,
- LOG4CXX_TEST_STR("Message with embedded
<![CDATA[<hello>hi</hello>]]>."));
+ LOG4CXX_TEST_STR("Message with terminating
<![CDATA[<hello>hi</hello>]]>"));
XMLTimestampFilter xmlTimestampFilter;
XMLThreadFilter xmlThreadFilter;
diff --git a/src/test/resources/witness/xmlLayout.3
b/src/test/resources/witness/xmlLayout.3
index 7ca78096..6e995ec4 100644
--- a/src/test/resources/witness/xmlLayout.3
+++ b/src/test/resources/witness/xmlLayout.3
@@ -4,7 +4,7 @@
</log4j:event>
<log4j:event logger="org.apache.log4j.xml.XMLLayoutTestCase" timestamp="XXX"
level="DEBUG" thread="main">
-<log4j:message><![CDATA[Message with embedded
<![CDATA[<hello>hi</hello>]]>]]><![CDATA[.]]></log4j:message>
+<log4j:message><![CDATA[Message with terminating
<![CDATA[<hello>hi</hello>]]>]]><![CDATA[]]></log4j:message>
<log4j:locationInfo class="XMLLayoutTestCase" method="testCDATA"
file="XMLLayoutTestCase.java" line="X"/>
</log4j:event>