This is an automated email from the ASF dual-hosted git repository.
swebb2066 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git
The following commit(s) were added to refs/heads/master by this push:
new a4ca4d8f Fix application hang in Transcoder when processing malformed
character sequences (#711)
a4ca4d8f is described below
commit a4ca4d8f54a7abae0f87b1a1bd787b0564092f0a
Author: sahvx655-wq <[email protected]>
AuthorDate: Sat Jun 6 17:56:18 2026 +0530
Fix application hang in Transcoder when processing malformed character
sequences (#711)
---
src/main/cpp/transcoder.cpp | 56 ++++++++++++++++++++++++++---
src/site/markdown/change-report-gh.md | 1 +
src/test/cpp/helpers/transcodertestcase.cpp | 43 ++++++++++++++++++++++
3 files changed, 95 insertions(+), 5 deletions(-)
diff --git a/src/main/cpp/transcoder.cpp b/src/main/cpp/transcoder.cpp
index b5a9d3d0..ddc7e1c0 100644
--- a/src/main/cpp/transcoder.cpp
+++ b/src/main/cpp/transcoder.cpp
@@ -512,6 +512,7 @@ void Transcoder::decode(const std::wstring& src, LogString&
dst)
while (i != src.end())
{
+ std::wstring::const_iterator start = i;
unsigned int cp = decode(src, i);
if (cp != 0xFFFF)
@@ -521,7 +522,10 @@ void Transcoder::decode(const std::wstring& src,
LogString& dst)
else
{
dst.append(1, LOSSCHAR);
- i++;
+ if (i == start)
+ {
+ i++;
+ }
}
}
@@ -536,6 +540,7 @@ void Transcoder::encode(const LogString& src, std::wstring&
dst)
for (LogString::const_iterator i = src.begin(); i != src.end();)
{
+ LogString::const_iterator start = i;
unsigned int cp = Transcoder::decode(src, i);
if (cp != 0xFFFF)
@@ -545,7 +550,10 @@ void Transcoder::encode(const LogString& src,
std::wstring& dst)
else
{
dst.append(1, LOSSCHAR);
- i++;
+ if (i == start)
+ {
+ i++;
+ }
}
}
@@ -610,8 +618,21 @@ void Transcoder::decode(const std::basic_string<UniChar>&
src, LogString& dst)
for (std::basic_string<UniChar>::const_iterator i = src.begin();
i != src.end();)
{
+ std::basic_string<UniChar>::const_iterator start = i;
unsigned int cp = decode(src, i);
- encode(cp, dst);
+
+ if (cp != 0xFFFF)
+ {
+ encode(cp, dst);
+ }
+ else
+ {
+ dst.append(1, LOSSCHAR);
+ if (i == start)
+ {
+ i++;
+ }
+ }
}
#endif
@@ -626,8 +647,21 @@ void Transcoder::encode(const LogString& src,
std::basic_string<UniChar>& dst)
for (LogString::const_iterator i = src.begin();
i != src.end();)
{
+ LogString::const_iterator start = i;
unsigned int cp = decode(src, i);
- encode(cp, dst);
+
+ if (cp != 0xFFFF)
+ {
+ encode(cp, dst);
+ }
+ else
+ {
+ encode(LOSSCHAR, dst);
+ if (i == start)
+ {
+ i++;
+ }
+ }
}
#endif
@@ -666,8 +700,20 @@ void Transcoder::decode(const CFStringRef& src, LogString&
dst)
CFStringGetCharacters(src, CFRangeMake(0, length), &tmp[0]);
for (auto i = tmp.begin(); i != tmp.end(); )
{
+ auto start = i;
unsigned int cp = decodeUTF16(tmp, i);
- encode(cp, dst);
+ if (cp != 0xFFFF)
+ {
+ encode(cp, dst);
+ }
+ else
+ {
+ dst.append(1, LOSSCHAR);
+ if (i == start)
+ {
+ i++;
+ }
+ }
}
}
}
diff --git a/src/site/markdown/change-report-gh.md
b/src/site/markdown/change-report-gh.md
index 892d98fb..0c5f114c 100644
--- a/src/site/markdown/change-report-gh.md
+++ b/src/site/markdown/change-report-gh.md
@@ -74,6 +74,7 @@ The following issues have been addressed:
, [#664](https://github.com/apache/logging-log4cxx/pull/664)
, [#669](https://github.com/apache/logging-log4cxx/pull/669)
, [#670](https://github.com/apache/logging-log4cxx/pull/670)
+ , [#711](https://github.com/apache/logging-log4cxx/pull/711)
* A lack of robustness dealing with values near numeric limits
* [#633](https://github.com/apache/logging-log4cxx/pull/633)
diff --git a/src/test/cpp/helpers/transcodertestcase.cpp
b/src/test/cpp/helpers/transcodertestcase.cpp
index f405bb06..18e7070c 100644
--- a/src/test/cpp/helpers/transcodertestcase.cpp
+++ b/src/test/cpp/helpers/transcodertestcase.cpp
@@ -74,12 +74,17 @@ LOGUNIT_CLASS(TranscoderTestCase)
LOGUNIT_TEST(testEncodeUTF16BE_BMP);
LOGUNIT_TEST(testEncodeUTF16BE_Supplementary);
LOGUNIT_TEST(testEncodeUTF16LE_Supplementary);
+#if LOG4CXX_WCHAR_T_API || LOG4CXX_LOGCHAR_IS_WCHAR || defined(WIN32) ||
defined(_WIN32)
+ LOGUNIT_TEST(testEncodeWString_FFFF_KeepsFollowingByte);
+#endif
#if LOG4CXX_UNICHAR_API
LOGUNIT_TEST(udecode2);
LOGUNIT_TEST(udecode4);
LOGUNIT_TEST(uencode1);
LOGUNIT_TEST(uencode3);
LOGUNIT_TEST(uencode5);
+ LOGUNIT_TEST(testDecodeUniChar_Malformed_DoesNotHang);
+ LOGUNIT_TEST(testEncodeUniChar_Malformed_DoesNotHang);
#endif
#if LOG4CXX_LOGCHAR_IS_UTF8
LOGUNIT_TEST(encodeCharsetName1);
@@ -520,6 +525,22 @@ public:
LOGUNIT_ASSERT_EQUAL((unsigned char) 0xDE, (unsigned char)
raw[3]);
}
+#if LOG4CXX_WCHAR_T_API || LOG4CXX_LOGCHAR_IS_WCHAR || defined(WIN32) ||
defined(_WIN32)
+ void testEncodeWString_FFFF_KeepsFollowingByte()
+ {
+ LogString src;
+#if LOG4CXX_LOGCHAR_IS_UTF8
+ src = "\xEF\xBF\xBF\x41"; // U+FFFF then 'A'
+#else
+ src.append(1, 0xFFFF);
+ src.append(1, 0x41);
+#endif
+ std::wstring out;
+ Transcoder::encode(src, out);
+ LOGUNIT_ASSERT(out.find(L"A") != std::wstring::npos);
+ }
+#endif
+
#if LOG4CXX_UNICHAR_API
void udecode2()
@@ -579,6 +600,28 @@ public:
//
LOGUNIT_ASSERT_EQUAL(std::basic_string<UniChar>(greeting),
encoded);
}
+
+ void testDecodeUniChar_Malformed_DoesNotHang()
+ {
+ const UniChar malformed[] = { 0xD800, 'A', 0 };
+ LogString decoded;
+ Transcoder::decode(malformed, decoded);
+ LOGUNIT_ASSERT(decoded.find(LOG4CXX_STR("A")) !=
LogString::npos);
+ }
+
+ void testEncodeUniChar_Malformed_DoesNotHang()
+ {
+ LogString src;
+#if LOG4CXX_LOGCHAR_IS_UTF8
+ src = "\xED\xA0\x80\x41"; // U+D800 then 'A'
+#else
+ src.append(1, 0xD800);
+ src.append(1, 0x41);
+#endif
+ std::basic_string<UniChar> out;
+ Transcoder::encode(src, out);
+ LOGUNIT_ASSERT(out.find('A') !=
std::basic_string<UniChar>::npos);
+ }
#endif
#if LOG4CXX_LOGCHAR_IS_UTF8