This is an automated email from the ASF dual-hosted git repository.
swebb2066 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git
The following commit(s) were added to refs/heads/master by this push:
new 0e24ad2c Fix ISO Latin-1 decoder sign extension (#660)
0e24ad2c is described below
commit 0e24ad2c9a15c30d828446650790d95610b32b43
Author: metsw24-max <[email protected]>
AuthorDate: Tue May 12 10:26:06 2026 +0530
Fix ISO Latin-1 decoder sign extension (#660)
---
src/main/cpp/charsetdecoder.cpp | 2 +-
src/test/cpp/helpers/charsetdecodertestcase.cpp | 32 +++++++++++++++++++++++++
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/src/main/cpp/charsetdecoder.cpp b/src/main/cpp/charsetdecoder.cpp
index b7f852a7..569e7c5c 100644
--- a/src/main/cpp/charsetdecoder.cpp
+++ b/src/main/cpp/charsetdecoder.cpp
@@ -355,7 +355,7 @@ class ISOLatinCharsetDecoder : public CharsetDecoder
while (src < srcEnd)
{
- auto sv = static_cast<unsigned int>(*src++);
+ auto sv = static_cast<unsigned
int>(static_cast<unsigned char>(*src++));
Transcoder::encode(sv, out);
}
in.increment_position(availableByteCount);
diff --git a/src/test/cpp/helpers/charsetdecodertestcase.cpp
b/src/test/cpp/helpers/charsetdecodertestcase.cpp
index ad357232..12b04a19 100644
--- a/src/test/cpp/helpers/charsetdecodertestcase.cpp
+++ b/src/test/cpp/helpers/charsetdecodertestcase.cpp
@@ -40,6 +40,7 @@ LOGUNIT_CLASS(CharsetDecoderTestCase)
LOGUNIT_TEST(decode2);
LOGUNIT_TEST(decode3);
LOGUNIT_TEST(decode4);
+ LOGUNIT_TEST(testISOLatinHighBytes);
#if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS
LOGUNIT_TEST(testMbstowcsInfiniteLoop);
#endif
@@ -152,6 +153,37 @@ public:
}
}
+ /**
+ * Decoding ISO-8859-1 must map every byte 0x80..0xFF to the
+ * code point of the same numeric value. On platforms where plain
+ * char is signed (default on MSVC/GCC/Clang for x86/x64), a
+ * static_cast<unsigned int>(*src) sign-extends bytes >= 0x80 into
+ * 0xFFFFFFxx, which Transcoder::encode then treats as out-of-range
+ * Unicode and replaces with U+FFFD (or appends garbage on wchar_t
+ * builds). The .properties configuration loader uses this decoder
+ * per the Java spec, so the bug silently corrupts any non-ASCII
+ * Latin-1 byte that appears in a log4cxx configuration file.
+ */
+ void testISOLatinHighBytes()
+ {
+ char buf[1];
+ auto dec = CharsetDecoder::getISOLatinDecoder();
+ for (unsigned int b = 0x80; b <= 0xFF; ++b)
+ {
+ buf[0] = static_cast<char>(b);
+ ByteBuffer in(buf, 1);
+ LogString out;
+ log4cxx_status_t stat = dec->decode(in, out);
+ LOGUNIT_ASSERT_EQUAL(APR_SUCCESS, stat);
+
+ // Build the expected LogString by encoding code point b
+ // through the same Transcoder path the decoder uses.
+ LogString expected;
+ Transcoder::encode(b, expected);
+ LOGUNIT_ASSERT_EQUAL(expected, out);
+ }
+ }
+
#if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS
/**
* Tests that we don't loop infinitely when mbsrtowcs refuses to consume