tools/source/misc/json_writer.cxx | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-)
New commits: commit 5aee16cf09f9d4ba50feaf804b2a7a649af276bc Author: Tor Lillqvist <t...@collabora.com> AuthorDate: Fri Jan 15 03:03:41 2021 +0200 Commit: Tor Lillqvist <t...@collabora.com> CommitDate: Fri Jan 15 08:54:12 2021 +0100 Make JsonWriter::writeEscapedOUString() handle surrogate pairs properly It is wrong to iterate over UTF-16 code units one by one. We have OUString::iterateCodePoints() to iterate over Unicode code points. The two UTF-16 code units of a surrogate pair (for a non-BMP code point) should not be encoded separately to UTF-8 bytes. It is the code point that should be encoded (to four bytes). Change-Id: Ica4341308deb6618c9c2da8dcee8a11ef4e8238d Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109318 Tested-by: Jenkins Reviewed-by: Noel Grandin <noel.gran...@collabora.co.uk> Reviewed-by: Tor Lillqvist <t...@collabora.com> diff --git a/tools/source/misc/json_writer.cxx b/tools/source/misc/json_writer.cxx index a50e2ada967f..c326201eb9e5 100644 --- a/tools/source/misc/json_writer.cxx +++ b/tools/source/misc/json_writer.cxx @@ -123,9 +123,10 @@ void JsonWriter::endStruct() void JsonWriter::writeEscapedOUString(const OUString& rPropVal) { // Convert from UTF-16 to UTF-8 and perform escaping - for (int i = 0; i < rPropVal.getLength(); ++i) + sal_Int32 i = 0; + while (i < rPropVal.getLength()) { - sal_Unicode ch = rPropVal[i]; + sal_uInt32 ch = rPropVal.iterateCodePoints(&i); if (ch == '\\') { *mPos = static_cast<char>(ch); @@ -173,7 +174,7 @@ void JsonWriter::writeEscapedOUString(const OUString& rPropVal) *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */ ++mPos; } - else + else if (ch <= 0xFFFF) { *mPos = 0xE0 | (ch >> 12); /* 1110xxxx */ ++mPos; @@ -182,6 +183,17 @@ void JsonWriter::writeEscapedOUString(const OUString& rPropVal) *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */ ++mPos; } + else + { + *mPos = 0xF0 | (ch >> 18); /* 11110xxx */ + ++mPos; + *mPos = 0x80 | ((ch >> 12) & 0x3F); /* 10xxxxxx */ + ++mPos; + *mPos = 0x80 | ((ch >> 6) & 0x3F); /* 10xxxxxx */ + ++mPos; + *mPos = 0x80 | (ch & 0x3F); /* 10xxxxxx */ + ++mPos; + } } } _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits