NO-JIRA: additional fix to proton-j UTF-8 Still the encoder wasn't 100% compliant in 0.9. Also fixup the unittest to run a touch faster.
(cherry picked from commit 836cf278a1c2aa6d8fafe90b4b253549782bcefb) Project: http://git-wip-us.apache.org/repos/asf/qpid-proton/repo Commit: http://git-wip-us.apache.org/repos/asf/qpid-proton/commit/073a9c19 Tree: http://git-wip-us.apache.org/repos/asf/qpid-proton/tree/073a9c19 Diff: http://git-wip-us.apache.org/repos/asf/qpid-proton/diff/073a9c19 Branch: refs/heads/0.9.x Commit: 073a9c19e025394a884ffb6cbcb7e595111b0e21 Parents: b59ed4c Author: Dominic Evans <[email protected]> Authored: Thu Apr 2 20:35:33 2015 +0100 Committer: Robert Gemmell <[email protected]> Committed: Sat Apr 25 20:24:17 2015 +0100 ---------------------------------------------------------------------- .../apache/qpid/proton/codec/EncoderImpl.java | 2 +- .../qpid/proton/codec/StringTypeTest.java | 63 +++++++++++--------- 2 files changed, 35 insertions(+), 30 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/qpid-proton/blob/073a9c19/proton-j/src/main/java/org/apache/qpid/proton/codec/EncoderImpl.java ---------------------------------------------------------------------- diff --git a/proton-j/src/main/java/org/apache/qpid/proton/codec/EncoderImpl.java b/proton-j/src/main/java/org/apache/qpid/proton/codec/EncoderImpl.java index 77f0efc..fd0be07 100644 --- a/proton-j/src/main/java/org/apache/qpid/proton/codec/EncoderImpl.java +++ b/proton-j/src/main/java/org/apache/qpid/proton/codec/EncoderImpl.java @@ -788,7 +788,7 @@ public final class EncoderImpl implements ByteBufferEncoder _buffer.put((byte)(0xC0 | ((c >> 6) & 0x1F))); _buffer.put((byte)(0x80 | (c & 0x3F))); } - else if ((c & 0xD800) != 0xD800 || (c & 0xDC00) == 0xDC00) /* U+0800..U+FFFF - excluding surrogate pairs */ + else if ((c & 0xD800) != 0xD800 || (c > 0xDBFF)) /* U+0800..U+FFFF - excluding surrogate pairs */ { _buffer.put((byte)(0xE0 | ((c >> 12) & 0x0F))); _buffer.put((byte)(0x80 | ((c >> 6) & 0x3F))); http://git-wip-us.apache.org/repos/asf/qpid-proton/blob/073a9c19/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java ---------------------------------------------------------------------- diff --git a/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java b/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java index 7a44063..90cfe26 100644 --- a/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java +++ b/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java @@ -27,6 +27,8 @@ import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.util.Arrays; import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; import java.util.Set; import org.apache.qpid.proton.amqp.messaging.AmqpValue; @@ -39,6 +41,8 @@ public class StringTypeTest { private static final Charset CHARSET_UTF8 = Charset.forName("UTF-8"); + private static final List<String> TEST_DATA = generateTestData(); + /** * Loop over all the chars in given {@link UnicodeBlock}s and return a * {@link Set <String>} containing all the possible values as their @@ -91,7 +95,7 @@ public class StringTypeTest @Test public void calculateUTF8Length() { - for (final String input : generateTestData()) + for (final String input : TEST_DATA) { assertEquals("Incorrect string length calculated for string '"+input+"'",input.getBytes(CHARSET_UTF8).length, StringType.calculateUTF8Length(input)); } @@ -108,7 +112,7 @@ public class StringTypeTest AMQPDefinedTypes.registerAllTypes(decoder, encoder); final ByteBuffer bb = ByteBuffer.allocate(16); - for (final String input : generateTestData()) + for (final String input : TEST_DATA) { bb.clear(); final AmqpValue inputValue = new AmqpValue(input); @@ -122,36 +126,37 @@ public class StringTypeTest } // build up some test data with a set of suitable Unicode characters - private Set<String> generateTestData() + private static List<String> generateTestData() { - return new HashSet<String>() + return new LinkedList<String>() + { + private static final long serialVersionUID = 7331717267070233454L; { - private static final long serialVersionUID = 7331717267070233454L; - + // non-surrogate pair blocks + addAll(getAllStringsFromUnicodeBlocks(UnicodeBlock.BASIC_LATIN, + UnicodeBlock.LATIN_1_SUPPLEMENT, + UnicodeBlock.GREEK, + UnicodeBlock.LETTERLIKE_SYMBOLS)); + // blocks with surrogate pairs + //TODO: restore others when Java 7 is baseline + addAll(getAllStringsFromUnicodeBlocks(UnicodeBlock.LINEAR_B_SYLLABARY, + /*UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,*/ + UnicodeBlock.MUSICAL_SYMBOLS, + /*UnicodeBlock.EMOTICONS,*/ + /*UnicodeBlock.PLAYING_CARDS,*/ + UnicodeBlock.BOX_DRAWING, + UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, + UnicodeBlock.PRIVATE_USE_AREA, + UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, + UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B)); + // some additional combinations of characters that could cause problems to the encoder + String[] boxDrawing = getAllStringsFromUnicodeBlocks(UnicodeBlock.BOX_DRAWING).toArray(new String[0]); + String[] halfFullWidthForms = getAllStringsFromUnicodeBlocks(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS).toArray(new String[0]); + for (int i = 0; i < halfFullWidthForms.length; i++) { - // non-surrogate pair blocks - addAll(getAllStringsFromUnicodeBlocks(UnicodeBlock.BASIC_LATIN, - UnicodeBlock.LATIN_1_SUPPLEMENT, - UnicodeBlock.GREEK, - UnicodeBlock.LETTERLIKE_SYMBOLS)); - // blocks with surrogate pairs - //TODO: restore others when Java 7 is baseline - addAll(getAllStringsFromUnicodeBlocks(/*UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,*/ - UnicodeBlock.MUSICAL_SYMBOLS, - /*UnicodeBlock.EMOTICONS,*/ - /*UnicodeBlock.PLAYING_CARDS,*/ - UnicodeBlock.BOX_DRAWING, - UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, - UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, - UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B)); - // some additional combinations of characters that could cause problems to the encoder - String[] boxDrawing = getAllStringsFromUnicodeBlocks(UnicodeBlock.BOX_DRAWING).toArray(new String[0]); - String[] halfFullWidthForms = getAllStringsFromUnicodeBlocks(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS).toArray(new String[0]); - for (int i = 0; i < halfFullWidthForms.length; i++) - { - add(halfFullWidthForms[i] + boxDrawing[i % boxDrawing.length]); - } + add(halfFullWidthForms[i] + boxDrawing[i % boxDrawing.length]); } - }; + } + }; } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
