I believe that the issue is addressed in the attached patch. (Already applied in the upstream repository).
-- Rinat
From 58596f4745190654cff4a5ad6a2bd4ac37b74800 Mon Sep 17 00:00:00 2001 From: Rinat Ibragimov <ibragimovrinat@mail.ru> Date: Tue, 25 Dec 2018 22:22:41 +0300 Subject: [PATCH] tests: use uint16_t for UTF-16 code points in charset test Charset-related APIs are using 16-bit uint16_t when referring to UTF-16, and are not obligated to have any particular byte layout in memory. Those are different for little- and big-endian machines, which caused test to fail when compiling on mips and s390x. --- tests/test_ppb_char_set.c | 40 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/tests/test_ppb_char_set.c b/tests/test_ppb_char_set.c index 8163b2dc..bb25d3f6 100644 --- a/tests/test_ppb_char_set.c +++ b/tests/test_ppb_char_set.c @@ -43,8 +43,8 @@ TEST(ppb_char_set, extract_relevant_part_from_locale_name) TEST(ppb_char_set, to_utf16_all_ASCII) { const char *in = "Hello, world!"; - const uint8_t out[] = {'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, ',', 0, ' ', 0, 'w', 0, - 'o', 0, 'r', 0, 'l', 0, 'd', 0, '!', 0}; + const uint16_t out[] = {'H', 'e', 'l', 'l', 'o', ',', ' ', + 'w', 'o', 'r', 'l', 'd', '!'}; uint32_t res_len = 7777; uint16_t *res = ppb_char_set_char_set_to_utf16(0, in, strlen(in), "UTF-8", PP_CHARSET_CONVERSIONERROR_FAIL, &res_len); @@ -56,9 +56,8 @@ TEST(ppb_char_set, to_utf16_all_ASCII) TEST(ppb_char_set, to_utf16_basic_UTF_8) { const char *in = "Привет, мир!"; - const uint8_t out[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04, - 0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x3c, 0x04, 0x38, 0x04, - 0x40, 0x04, 0x21, 0x00}; + const uint16_t out[] = {0x41f, 0x440, 0x438, 0x432, 0x435, 0x442, + 0x2c, 0x20, 0x43c, 0x438, 0x440, 0x21}; uint32_t res_len = 7777; uint16_t *res = ppb_char_set_char_set_to_utf16(0, in, strlen(in), "UTF-8", PP_CHARSET_CONVERSIONERROR_FAIL, &res_len); @@ -83,8 +82,7 @@ TEST(ppb_char_set, to_utf16_wrong_UTF_8_with_error) TEST(ppb_char_set, from_utf16_all_ASCII) { - const uint8_t in[] = {'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, ',', 0, ' ', 0, 'w', 0, - 'o', 0, 'r', 0, 'l', 0, 'd', 0, '!', 0}; + const uint16_t in[] = {'H', 'e', 'l', 'l', 'o', ',', ' ', 'w', 'o', 'r', 'l', 'd', '!'}; const char *out = "Hello, world!"; uint32_t res_len = 7777; char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in, @@ -97,9 +95,8 @@ TEST(ppb_char_set, from_utf16_all_ASCII) TEST(ppb_char_set, to_utf16_non_ASCII_all_correct) { - const uint8_t in[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04, - 0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x3c, 0x04, 0x38, 0x04, - 0x40, 0x04, 0x21, 0x00}; // "Привет, мир!" + const uint16_t in[] = {0x41f, 0x440, 0x438, 0x432, 0x435, 0x442, 0x2c, + 0x20, 0x43c, 0x438, 0x440, 0x21}; // "Привет, мир!" const char *out = "\xcf\xf0\xe8\xe2\xe5\xf2\x2c\x20\xec\xe8\xf0\x21"; // "Привет, мир!" uint32_t res_len = 7777; char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in, @@ -112,9 +109,9 @@ TEST(ppb_char_set, to_utf16_non_ASCII_all_correct) TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_FAIL) { - const uint8_t in[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04, - 0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x6b, 0x26, 0x3c, 0x04, - 0x38, 0x04, 0x40, 0x04, 0x21, 0x00}; // "Привет, ♫мир!" + const uint16_t in[] = {0x41f, 0x440, 0x438, 0x432, 0x435, 0x442, 0x2c, + 0x20, 0x266b, 0x43c, 0x438, 0x440, 0x21}; + // "♫" in "Привет, ♫мир!" cannot be represented in cp1251. // const char *out = "\xcf\xf0\xe8\xe2\xe5\xf2\x2c\x20\xec\xe8\xf0\x21"; // "Привет, мир!" uint32_t res_len = 7777; char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in, @@ -127,9 +124,9 @@ TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_FAIL) TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_SKIP) { - const uint8_t in[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04, - 0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x6b, 0x26, 0x3c, 0x04, - 0x38, 0x04, 0x40, 0x04, 0x21, 0x00}; // "Привет, ♫мир!" + const uint16_t in[] = { + 0x41f, 0x440, 0x438, 0x432, 0x435, 0x442, 0x2c, + 0x20, 0x266b, 0x43c, 0x438, 0x440, 0x21}; // "Привет, ♫мир!" const char *out = "\xcf\xf0\xe8\xe2\xe5\xf2\x2c\x20\xec\xe8\xf0\x21"; // "Привет, мир!" uint32_t res_len = 7777; char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in, @@ -142,9 +139,9 @@ TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_SKIP) TEST(ppb_char_set, to_utf16_non_ASCII_PP_CHARSET_CONVERSIONERROR_SUBSTITUTE) { - const uint8_t in[] = {0x1f, 0x04, 0x40, 0x04, 0x38, 0x04, 0x32, 0x04, 0x35, 0x04, - 0x42, 0x04, 0x2c, 0x00, 0x20, 0x00, 0x6b, 0x26, 0x3c, 0x04, - 0x38, 0x04, 0x40, 0x04, 0x21, 0x00}; // "Привет, ♫мир!" + const uint16_t in[] = { + 0x41f, 0x440, 0x438, 0x432, 0x435, 0x442, 0x2c, + 0x20, 0x266b, 0x43c, 0x438, 0x440, 0x21}; // "Привет, ♫мир!" const char *out = "\xcf\xf0\xe8\xe2\xe5\xf2\x2c\x20\x3f\xec\xe8\xf0\x21";// "Привет, ?мир!" uint32_t res_len = 7777; char *res = ppb_char_set_utf16_to_char_set(0, (const uint16_t *)in, @@ -164,9 +161,8 @@ TEST(ppb_char_set, gb2312_ASCII_to_utf16) 0x6c, 0x64, 0x21}; // "Hello, world!" in UTF16-LE - const uint8_t out[] = {0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, - 0x2c, 0x00, 0x20, 0x00, 0x77, 0x00, 0x6f, 0x00, 0x72, 0x00, - 0x6c, 0x00, 0x64, 0x00, 0x21, 0x00}; + const uint16_t out[] = {0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20, + 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21}; uint32_t res_len = 7777; uint16_t *res = ppb_char_set_char_set_to_utf16(0, (const char *)in, sizeof(in), "gb2312",