NO-JIRA: additional fix to proton-j UTF-8

Still the encoder wasn't 100% compliant in 0.9. Also fixup the unittest
to run a touch faster.

(cherry picked from commit 836cf278a1c2aa6d8fafe90b4b253549782bcefb)


Project: http://git-wip-us.apache.org/repos/asf/qpid-proton/repo
Commit: http://git-wip-us.apache.org/repos/asf/qpid-proton/commit/073a9c19
Tree: http://git-wip-us.apache.org/repos/asf/qpid-proton/tree/073a9c19
Diff: http://git-wip-us.apache.org/repos/asf/qpid-proton/diff/073a9c19

Branch: refs/heads/0.9.x
Commit: 073a9c19e025394a884ffb6cbcb7e595111b0e21
Parents: b59ed4c
Author: Dominic Evans <[email protected]>
Authored: Thu Apr 2 20:35:33 2015 +0100
Committer: Robert Gemmell <[email protected]>
Committed: Sat Apr 25 20:24:17 2015 +0100

----------------------------------------------------------------------
 .../apache/qpid/proton/codec/EncoderImpl.java   |  2 +-
 .../qpid/proton/codec/StringTypeTest.java       | 63 +++++++++++---------
 2 files changed, 35 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/qpid-proton/blob/073a9c19/proton-j/src/main/java/org/apache/qpid/proton/codec/EncoderImpl.java
----------------------------------------------------------------------
diff --git 
a/proton-j/src/main/java/org/apache/qpid/proton/codec/EncoderImpl.java 
b/proton-j/src/main/java/org/apache/qpid/proton/codec/EncoderImpl.java
index 77f0efc..fd0be07 100644
--- a/proton-j/src/main/java/org/apache/qpid/proton/codec/EncoderImpl.java
+++ b/proton-j/src/main/java/org/apache/qpid/proton/codec/EncoderImpl.java
@@ -788,7 +788,7 @@ public final class EncoderImpl implements ByteBufferEncoder
                 _buffer.put((byte)(0xC0 | ((c >> 6) & 0x1F)));
                 _buffer.put((byte)(0x80 | (c & 0x3F)));
             }
-            else if ((c & 0xD800) != 0xD800 || (c & 0xDC00) == 0xDC00)     /* 
U+0800..U+FFFF - excluding surrogate pairs */
+            else if ((c & 0xD800) != 0xD800 || (c > 0xDBFF))     /* 
U+0800..U+FFFF - excluding surrogate pairs */
             {
                 _buffer.put((byte)(0xE0 | ((c >> 12) & 0x0F)));
                 _buffer.put((byte)(0x80 | ((c >> 6) & 0x3F)));

http://git-wip-us.apache.org/repos/asf/qpid-proton/blob/073a9c19/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java
----------------------------------------------------------------------
diff --git 
a/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java 
b/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java
index 7a44063..90cfe26 100644
--- a/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java
+++ b/proton-j/src/test/java/org/apache/qpid/proton/codec/StringTypeTest.java
@@ -27,6 +27,8 @@ import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
 import java.util.Set;
 
 import org.apache.qpid.proton.amqp.messaging.AmqpValue;
@@ -39,6 +41,8 @@ public class StringTypeTest
 {
     private static final Charset CHARSET_UTF8 = Charset.forName("UTF-8");
 
+    private static final List<String> TEST_DATA = generateTestData();
+
     /**
      * Loop over all the chars in given {@link UnicodeBlock}s and return a
      * {@link Set <String>} containing all the possible values as their
@@ -91,7 +95,7 @@ public class StringTypeTest
     @Test
     public void calculateUTF8Length()
     {
-        for (final String input : generateTestData())
+        for (final String input : TEST_DATA)
         {
             assertEquals("Incorrect string length calculated for string 
'"+input+"'",input.getBytes(CHARSET_UTF8).length, 
StringType.calculateUTF8Length(input));
         }
@@ -108,7 +112,7 @@ public class StringTypeTest
         AMQPDefinedTypes.registerAllTypes(decoder, encoder);
         final ByteBuffer bb = ByteBuffer.allocate(16);
 
-        for (final String input : generateTestData())
+        for (final String input : TEST_DATA)
         {
             bb.clear();
             final AmqpValue inputValue = new AmqpValue(input);
@@ -122,36 +126,37 @@ public class StringTypeTest
     }
 
     // build up some test data with a set of suitable Unicode characters
-    private Set<String> generateTestData()
+    private static List<String> generateTestData()
     {
-        return new HashSet<String>()
+        return new LinkedList<String>()
+        {
+            private static final long serialVersionUID = 7331717267070233454L;
             {
-                private static final long serialVersionUID = 
7331717267070233454L;
-
+                // non-surrogate pair blocks
+                addAll(getAllStringsFromUnicodeBlocks(UnicodeBlock.BASIC_LATIN,
+                                                     
UnicodeBlock.LATIN_1_SUPPLEMENT,
+                                                     UnicodeBlock.GREEK,
+                                                     
UnicodeBlock.LETTERLIKE_SYMBOLS));
+                // blocks with surrogate pairs
+                //TODO: restore others when Java 7 is baseline
+                
addAll(getAllStringsFromUnicodeBlocks(UnicodeBlock.LINEAR_B_SYLLABARY,
+                                                     
/*UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,*/
+                                                     
UnicodeBlock.MUSICAL_SYMBOLS,
+                                                     
/*UnicodeBlock.EMOTICONS,*/
+                                                     
/*UnicodeBlock.PLAYING_CARDS,*/
+                                                     UnicodeBlock.BOX_DRAWING,
+                                                     
UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
+                                                     
UnicodeBlock.PRIVATE_USE_AREA,
+                                                     
UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
+                                                     
UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B));
+                // some additional combinations of characters that could cause 
problems to the encoder
+                String[] boxDrawing = 
getAllStringsFromUnicodeBlocks(UnicodeBlock.BOX_DRAWING).toArray(new String[0]);
+                String[] halfFullWidthForms = 
getAllStringsFromUnicodeBlocks(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS).toArray(new
 String[0]);
+                for (int i = 0; i < halfFullWidthForms.length; i++)
                 {
-                    // non-surrogate pair blocks
-                    
addAll(getAllStringsFromUnicodeBlocks(UnicodeBlock.BASIC_LATIN,
-                                                         
UnicodeBlock.LATIN_1_SUPPLEMENT,
-                                                         UnicodeBlock.GREEK,
-                                                         
UnicodeBlock.LETTERLIKE_SYMBOLS));
-                    // blocks with surrogate pairs
-                    //TODO: restore others when Java 7 is baseline
-                    
addAll(getAllStringsFromUnicodeBlocks(/*UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,*/
-                                                         
UnicodeBlock.MUSICAL_SYMBOLS,
-                                                         
/*UnicodeBlock.EMOTICONS,*/
-                                                         
/*UnicodeBlock.PLAYING_CARDS,*/
-                                                         
UnicodeBlock.BOX_DRAWING,
-                                                         
UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
-                                                         
UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
-                                                         
UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B));
-                    // some additional combinations of characters that could 
cause problems to the encoder
-                    String[] boxDrawing = 
getAllStringsFromUnicodeBlocks(UnicodeBlock.BOX_DRAWING).toArray(new String[0]);
-                    String[] halfFullWidthForms = 
getAllStringsFromUnicodeBlocks(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS).toArray(new
 String[0]);
-                    for (int i = 0; i < halfFullWidthForms.length; i++)
-                    {
-                        add(halfFullWidthForms[i] + boxDrawing[i % 
boxDrawing.length]);
-                    }
+                    add(halfFullWidthForms[i] + boxDrawing[i % 
boxDrawing.length]);
                 }
-            };
+            }
+        };
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to