poppler/UTF.cc | 2 - qt5/tests/check_utf_conversion.cpp | 36 ++++++++++++++++++++---------- qt6/tests/check_utf_conversion.cpp | 44 ++++++++++++++++++++++++++++++++++++- 3 files changed, 68 insertions(+), 14 deletions(-)
New commits: commit 969562d387b3791c7bc192a213e74049e08c9395 Author: Albert Astals Cid <[email protected]> Date: Sat Jul 11 00:41:13 2020 +0200 Fix UTF16LE support in TextStringToUCS4 Make test a bit more complex by using a nice checkbox Also copy the text to the qt6 folder diff --git a/poppler/UTF.cc b/poppler/UTF.cc index ee0314f8..9097b312 100644 --- a/poppler/UTF.cc +++ b/poppler/UTF.cc @@ -119,7 +119,7 @@ int TextStringToUCS4(const GooString *textStr, Unicode **ucs4) if (isUnicode) utf16[i] = (s[2 + i * 2] & 0xff) << 8 | (s[3 + i * 2] & 0xff); else // UnicodeLE - utf16[i] = (s[2 + i * 2] & 0xff) | (s[3 + i * 2] & 0xff) >> 8; + utf16[i] = (s[3 + i * 2] & 0xff) << 8 | (s[2 + i * 2] & 0xff); } len = UTF16toUCS4(utf16, len, &u); delete[] utf16; diff --git a/qt5/tests/check_utf_conversion.cpp b/qt5/tests/check_utf_conversion.cpp index 1f04c2a5..b153ae5b 100644 --- a/qt5/tests/check_utf_conversion.cpp +++ b/qt5/tests/check_utf_conversion.cpp @@ -43,7 +43,17 @@ static bool compare(const Unicode *a, const char *b, int len) return false; } - return *a == (Unicode)*b; + return true; +} + +static bool compare(const Unicode *a, const uint16_t *b, int len) +{ + for (int i = 0; i < len; i++) { + if (a[i] != b[i]) + return false; + } + + return true; } void TestUTFConversion::testUTF_data() @@ -147,32 +157,34 @@ void TestUTFConversion::testUnicodeToAscii7() void TestUTFConversion::testUnicodeLittleEndian() { - uint16_t UTF16LE_hi[4] { 0xFFFE, 0x4800, 0x4900, 0x2100 }; // UTF16-LE "HI!" - GooString GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), 4 * 2); + uint16_t UTF16LE_hi[5] { 0xFFFE, 0x4800, 0x4900, 0x2100, 0x1126 }; // UTF16-LE "HI!☑" + GooString GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), sizeof(UTF16LE_hi)); - uint16_t UTF16BE_hi[4] { 0xFEFF, 0x0048, 0x0049, 0x0021 }; // UTF16-BE "HI!" - GooString GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), 4 * 2); + uint16_t UTF16BE_hi[5] { 0xFEFF, 0x0048, 0x0049, 0x0021, 0x2611 }; // UTF16-BE "HI!☑" + GooString GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), sizeof(UTF16BE_hi)); // Let's assert both GooString's are different - Q_ASSERT(GooUTF16LE.cmp(&GooUTF16BE) != 0); + QVERIFY(GooUTF16LE.cmp(&GooUTF16BE)); Unicode *UCS4fromLE, *UCS4fromBE; const int len1 = TextStringToUCS4(&GooUTF16LE, &UCS4fromLE); const int len2 = TextStringToUCS4(&GooUTF16BE, &UCS4fromBE); - // 3 as TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points - Q_ASSERT(len1 == len2); - Q_ASSERT(len1 == 3); + // len is 4 because TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points + QCOMPARE(len1, len2); + QCOMPARE(len1, 4); // Check that now after conversion, UCS4fromLE and UCS4fromBE are now the same for (int i = 0; i < len1; i++) { - Q_ASSERT(UCS4fromLE[i] == UCS4fromBE[i]); + QCOMPARE(UCS4fromLE[i], UCS4fromBE[i]); } + const QString expected = QStringLiteral("HI!☑"); + // Do some final verifications, checking the strings to be "HI!" QVERIFY(*UCS4fromLE == *UCS4fromBE); - QVERIFY(compare(UCS4fromLE, "HI!", 3)); - QVERIFY(compare(UCS4fromBE, "HI!", 3)); + QVERIFY(compare(UCS4fromLE, expected.utf16(), len1)); + QVERIFY(compare(UCS4fromBE, expected.utf16(), len1)); } QTEST_GUILESS_MAIN(TestUTFConversion) diff --git a/qt6/tests/check_utf_conversion.cpp b/qt6/tests/check_utf_conversion.cpp index f28829f4..f2a66096 100644 --- a/qt6/tests/check_utf_conversion.cpp +++ b/qt6/tests/check_utf_conversion.cpp @@ -18,6 +18,7 @@ private slots: void testUTF_data(); void testUTF(); void testUnicodeToAscii7(); + void testUnicodeLittleEndian(); }; static bool compare(const char *a, const char *b) @@ -41,9 +42,18 @@ static bool compare(const Unicode *a, const char *b, int len) return false; } - return *a == (Unicode)*b; + return true; } +static bool compare(const Unicode *a, const uint16_t *b, int len) +{ + for (int i = 0; i < len; i++) { + if (a[i] != b[i]) + return false; + } + + return true; +} void TestUTFConversion::testUTF_data() { QTest::addColumn<QString>("s"); @@ -143,5 +153,37 @@ void TestUTFConversion::testUnicodeToAscii7() free(out_ascii_idx); } +void TestUTFConversion::testUnicodeLittleEndian() +{ + uint16_t UTF16LE_hi[5] { 0xFFFE, 0x4800, 0x4900, 0x2100, 0x1126 }; // UTF16-LE "HI!☑" + GooString GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), sizeof(UTF16LE_hi)); + + uint16_t UTF16BE_hi[5] { 0xFEFF, 0x0048, 0x0049, 0x0021, 0x2611 }; // UTF16-BE "HI!☑" + GooString GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), sizeof(UTF16BE_hi)); + + // Let's assert both GooString's are different + QVERIFY(GooUTF16LE.cmp(&GooUTF16BE)); + + Unicode *UCS4fromLE, *UCS4fromBE; + const int len1 = TextStringToUCS4(&GooUTF16LE, &UCS4fromLE); + const int len2 = TextStringToUCS4(&GooUTF16BE, &UCS4fromBE); + + // len is 4 because TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points + QCOMPARE(len1, len2); + QCOMPARE(len1, 4); + + // Check that now after conversion, UCS4fromLE and UCS4fromBE are now the same + for (int i = 0; i < len1; i++) { + QCOMPARE(UCS4fromLE[i], UCS4fromBE[i]); + } + + const QString expected = QStringLiteral("HI!☑"); + + // Do some final verifications, checking the strings to be "HI!" + QVERIFY(*UCS4fromLE == *UCS4fromBE); + QVERIFY(compare(UCS4fromLE, expected.utf16(), len1)); + QVERIFY(compare(UCS4fromBE, expected.utf16(), len1)); +} + QTEST_GUILESS_MAIN(TestUTFConversion) #include "check_utf_conversion.moc" _______________________________________________ poppler mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/poppler
