PROTON-834: further UTF-8 encoder fixes After commit c65e897 it turned out there were still some issues with strings containing a codepoint >0xDBFF which was being incorrectly treated as a surrogate pair in the calculateUTF8Length method.
Fixed this up and added some more test coverage. Closes #13 (cherry picked from commit 7b9b516d445ab9e86a0313709c77218d901435b1) Project: http://git-wip-us.apache.org/repos/asf/qpid-proton/repo Commit: http://git-wip-us.apache.org/repos/asf/qpid-proton/commit/810088b1 Tree: http://git-wip-us.apache.org/repos/asf/qpid-proton/tree/810088b1 Diff: http://git-wip-us.apache.org/repos/asf/qpid-proton/diff/810088b1 Branch: refs/heads/0.9.x Commit: 810088b14dedcd12a9474687ba9cd05fc8297188 Parents: c2042d7 Author: Dominic Evans <[email protected]> Authored: Mon Mar 16 12:18:20 2015 +0000 Committer: Dominic Evans <[email protected]> Committed: Mon Mar 16 16:07:51 2015 +0000 ---------------------------------------------------------------------- .../main/java/org/apache/qpid/proton/codec/StringType.java | 2 +- .../java/org/apache/qpid/proton/codec/StringTypeTest.java | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/qpid-proton/blob/810088b1/proton-j/src/main/java/org/apache/qpid/proton/codec/StringType.java ---------------------------------------------------------------------- diff --git a/proton-j/src/main/java/org/apache/qpid/proton/codec/StringType.java b/proton-j/src/main/java/org/apache/qpid/proton/codec/StringType.java index 092894d..a035e94 100644 --- a/proton-j/src/main/java/org/apache/qpid/proton/codec/StringType.java +++ b/proton-j/src/main/java/org/apache/qpid/proton/codec/StringType.java @@ -97,7 +97,7 @@ public class StringType extends AbstractPrimitiveType<String> { len++; // surrogate pairs should always combine to create a code point with a 4 octet representation - if ((c & 0xD800) == 0xD800) + if ((c & 0xD800) == 0xD800 && c < 0xDC00) { i++; } http://git-wip-us.apache.org/repos/asf/qpid-proton/blob/810088b1/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java ---------------------------------------------------------------------- diff --git a/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java b/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java index 7d78f65..7a44063 100644 --- a/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java +++ b/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java @@ -140,9 +140,17 @@ public class StringTypeTest UnicodeBlock.MUSICAL_SYMBOLS, /*UnicodeBlock.EMOTICONS,*/ /*UnicodeBlock.PLAYING_CARDS,*/ + UnicodeBlock.BOX_DRAWING, UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B)); + // some additional combinations of characters that could cause problems to the encoder + String[] boxDrawing = getAllStringsFromUnicodeBlocks(UnicodeBlock.BOX_DRAWING).toArray(new String[0]); + String[] halfFullWidthForms = getAllStringsFromUnicodeBlocks(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS).toArray(new String[0]); + for (int i = 0; i < halfFullWidthForms.length; i++) + { + add(halfFullWidthForms[i] + boxDrawing[i % boxDrawing.length]); + } } }; } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
