This is an automated email from the ASF dual-hosted git repository.

swebb2066 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/logging-log4cxx.git


The following commit(s) were added to refs/heads/master by this push:
     new 46037d01 Fix UTF-16 supplementary character encoding (#659)
46037d01 is described below

commit 46037d0143682257de2d8095cd79dfdd8a16b19a
Author: metsw24-max <[email protected]>
AuthorDate: Tue May 12 10:22:45 2026 +0530

    Fix UTF-16 supplementary character encoding (#659)
---
 src/main/cpp/transcoder.cpp                 |  4 +--
 src/test/cpp/helpers/transcodertestcase.cpp | 42 +++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/src/main/cpp/transcoder.cpp b/src/main/cpp/transcoder.cpp
index 82f20c19..02eb520b 100644
--- a/src/main/cpp/transcoder.cpp
+++ b/src/main/cpp/transcoder.cpp
@@ -165,7 +165,7 @@ size_t Transcoder::encodeUTF16BE(unsigned int ch, char* dst)
                unsigned char w = (unsigned char) ((ch >> 16) - 1);
                dst[0] = (char) (0xD8 + (w >> 2));
                dst[1] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F));
-               dst[2] = (char) (0xDC + ((ch & 0x30) >> 4));
+               dst[2] = (char) (0xDC + ((ch >> 8) & 0x03));
                dst[3] = (char) (ch & 0xFF);
                return 4;
        }
@@ -194,7 +194,7 @@ size_t Transcoder::encodeUTF16LE(unsigned int ch, char* dst)
                unsigned char w = (unsigned char) ((ch >> 16) - 1);
                dst[1] = (char) (0xD8 + (w >> 2));
                dst[0] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F));
-               dst[3] = (char) (0xDC + ((ch & 0x30) >> 4));
+               dst[3] = (char) (0xDC + ((ch >> 8) & 0x03));
                dst[2] = (char) (ch & 0xFF);
                return 4;
        }
diff --git a/src/test/cpp/helpers/transcodertestcase.cpp 
b/src/test/cpp/helpers/transcodertestcase.cpp
index 23a5caa5..47904456 100644
--- a/src/test/cpp/helpers/transcodertestcase.cpp
+++ b/src/test/cpp/helpers/transcodertestcase.cpp
@@ -16,6 +16,7 @@
  */
 
 #include <log4cxx/helpers/transcoder.h>
+#include <log4cxx/helpers/bytebuffer.h>
 #include "../insertwide.h"
 #include "../logunit.h"
 
@@ -63,6 +64,9 @@ LOGUNIT_CLASS(TranscoderTestCase)
        LOGUNIT_TEST(testDecodeUTF8_2);
        LOGUNIT_TEST(testDecodeUTF8_3);
        LOGUNIT_TEST(testDecodeUTF8_4);
+       LOGUNIT_TEST(testEncodeUTF16BE_BMP);
+       LOGUNIT_TEST(testEncodeUTF16BE_Supplementary);
+       LOGUNIT_TEST(testEncodeUTF16LE_Supplementary);
 #if LOG4CXX_UNICHAR_API
        LOGUNIT_TEST(udecode2);
        LOGUNIT_TEST(udecode4);
@@ -312,6 +316,44 @@ public:
                LOGUNIT_ASSERT_EQUAL(true, iter == out.end());
        }
 
+       void testEncodeUTF16BE_BMP()
+       {
+               char raw[4] = { 0, 0, 0, 0 };
+               ByteBuffer buf(raw, sizeof(raw));
+               Transcoder::encodeUTF16BE(0x4E03, buf); // CJK 七
+               LOGUNIT_ASSERT_EQUAL((size_t) 2, buf.position());
+               LOGUNIT_ASSERT_EQUAL((unsigned char) 0x4E, (unsigned char) 
raw[0]);
+               LOGUNIT_ASSERT_EQUAL((unsigned char) 0x03, (unsigned char) 
raw[1]);
+       }
+
+       // U+1F600 (GRINNING FACE) encodes to UTF-16BE as D8 3D DE 00.
+       // Before the fix the low surrogate's high byte was derived from bits 
4-5
+       // of the code point, yielding 0xDC here instead of 0xDE — corrupting 
the
+       // pair into two unpaired surrogates.
+       void testEncodeUTF16BE_Supplementary()
+       {
+               char raw[4] = { 0, 0, 0, 0 };
+               ByteBuffer buf(raw, sizeof(raw));
+               Transcoder::encodeUTF16BE(0x1F600, buf);
+               LOGUNIT_ASSERT_EQUAL((size_t) 4, buf.position());
+               LOGUNIT_ASSERT_EQUAL((unsigned char) 0xD8, (unsigned char) 
raw[0]);
+               LOGUNIT_ASSERT_EQUAL((unsigned char) 0x3D, (unsigned char) 
raw[1]);
+               LOGUNIT_ASSERT_EQUAL((unsigned char) 0xDE, (unsigned char) 
raw[2]);
+               LOGUNIT_ASSERT_EQUAL((unsigned char) 0x00, (unsigned char) 
raw[3]);
+       }
+
+       void testEncodeUTF16LE_Supplementary()
+       {
+               char raw[4] = { 0, 0, 0, 0 };
+               ByteBuffer buf(raw, sizeof(raw));
+               Transcoder::encodeUTF16LE(0x1F600, buf);
+               LOGUNIT_ASSERT_EQUAL((size_t) 4, buf.position());
+               LOGUNIT_ASSERT_EQUAL((unsigned char) 0x3D, (unsigned char) 
raw[0]);
+               LOGUNIT_ASSERT_EQUAL((unsigned char) 0xD8, (unsigned char) 
raw[1]);
+               LOGUNIT_ASSERT_EQUAL((unsigned char) 0x00, (unsigned char) 
raw[2]);
+               LOGUNIT_ASSERT_EQUAL((unsigned char) 0xDE, (unsigned char) 
raw[3]);
+       }
+
 
 #if LOG4CXX_UNICHAR_API
        void udecode2()

Reply via email to