Repository: thrift
Updated Branches:
  refs/heads/master 11b515cd2 -> 54beb80de


THRIFT-2409 UTF-8 sent by PHP as JSON is not understood by TJsonProtocol
Client: Java
Patch: Phongphan Phuttha <[email protected]>

This closes #667


Project: http://git-wip-us.apache.org/repos/asf/thrift/repo
Commit: http://git-wip-us.apache.org/repos/asf/thrift/commit/54beb80d
Tree: http://git-wip-us.apache.org/repos/asf/thrift/tree/54beb80d
Diff: http://git-wip-us.apache.org/repos/asf/thrift/diff/54beb80d

Branch: refs/heads/master
Commit: 54beb80ded9286aeda7da345cbd8303e010b45e8
Parents: 11b515c
Author: Phongphan Phuttha <[email protected]>
Authored: Fri Oct 30 00:18:54 2015 +0700
Committer: Jens Geyer <[email protected]>
Committed: Thu Oct 29 22:18:42 2015 +0200

----------------------------------------------------------------------
 .../apache/thrift/protocol/TJSONProtocol.java   | 44 ++++++++++++++++++--
 .../thrift/protocol/TestTJSONProtocol.java      | 17 ++++++++
 2 files changed, 57 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/thrift/blob/54beb80d/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java
----------------------------------------------------------------------
diff --git a/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java 
b/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java
index 9876e13..12341ab 100644
--- a/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java
+++ b/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java
@@ -19,8 +19,10 @@
 
 package org.apache.thrift.protocol;
 
+import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.Stack;
 
 import org.apache.thrift.TByteArrayOutputStream;
@@ -640,6 +642,7 @@ public class TJSONProtocol extends TProtocol {
   private TByteArrayOutputStream readJSONString(boolean skipContext)
     throws TException {
     TByteArrayOutputStream arr = new TByteArrayOutputStream(DEF_STRING_SIZE);
+    ArrayList<Character> codeunits = new ArrayList<Character>();
     if (!skipContext) {
       context_.read();
     }
@@ -652,10 +655,43 @@ public class TJSONProtocol extends TProtocol {
       if (ch == ESCSEQ[0]) {
         ch = reader_.read();
         if (ch == ESCSEQ[1]) {
-          readJSONSyntaxChar(ZERO);
-          readJSONSyntaxChar(ZERO);
-          trans_.readAll(tmpbuf_, 0, 2);
-          ch = (byte)((hexVal((byte)tmpbuf_[0]) << 4) + hexVal(tmpbuf_[1]));
+          trans_.readAll(tmpbuf_, 0, 4);
+          short cu = (short)(
+              ((short)hexVal(tmpbuf_[0]) << 12) +
+              ((short)hexVal(tmpbuf_[1]) << 8) +
+              ((short)hexVal(tmpbuf_[2]) << 4) +
+              (short)hexVal(tmpbuf_[3]));
+          try {
+            if (Character.isHighSurrogate((char)cu)) {
+              if (codeunits.size() > 0) {
+                throw new TProtocolException(TProtocolException.INVALID_DATA,
+                    "Expected low surrogate char");
+              }
+              codeunits.add((char)cu);
+            }
+            else if (Character.isLowSurrogate((char)cu)) {
+              if (codeunits.size() == 0) {
+                throw new TProtocolException(TProtocolException.INVALID_DATA,
+                    "Expected high surrogate char");
+              }
+
+              codeunits.add((char)cu);
+              arr.write((new String(new int[] { codeunits.get(0), 
codeunits.get(1) }, 0, 2)).getBytes("UTF-8"));
+              codeunits.clear();
+            }
+            else {
+              arr.write((new String(new int[] { cu }, 0, 
1)).getBytes("UTF-8"));
+            }
+            continue;
+          }
+          catch (UnsupportedEncodingException ex) {
+            throw new TProtocolException(TProtocolException.NOT_IMPLEMENTED,
+                "JVM does not support UTF-8");
+          }
+          catch (IOException ex) {
+            throw new TProtocolException(TProtocolException.INVALID_DATA,
+                "Invalid unicode sequence");
+          }
         }
         else {
           int off = ESCAPE_CHARS.indexOf(ch);

http://git-wip-us.apache.org/repos/asf/thrift/blob/54beb80d/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java
----------------------------------------------------------------------
diff --git a/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java 
b/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java
index d7376ac..1320749 100644
--- a/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java
+++ b/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java
@@ -18,6 +18,12 @@
  */
 package org.apache.thrift.protocol;
 
+import java.io.IOException;
+
+import org.apache.thrift.TException;
+import org.apache.thrift.protocol.TJSONProtocol;
+import org.apache.thrift.transport.TMemoryBuffer;
+
 public class TestTJSONProtocol extends ProtocolTestBase {
   @Override
   protected TProtocolFactory getFactory() {
@@ -28,4 +34,15 @@ public class TestTJSONProtocol extends ProtocolTestBase {
   protected boolean canBeUsedNaked() {
     return false;
   }
+
+  public void testEscapedUnicode() throws TException, IOException {
+    String jsonString = "\"hello unicode \\u0e01\\ud834\\udd1e world\"";
+    String expectedString = "hello unicode \u0e01\ud834\udd1e world";
+
+    TMemoryBuffer buffer = new TMemoryBuffer(1000);
+    TJSONProtocol protocol = new TJSONProtocol(buffer);
+    buffer.write(jsonString.getBytes("UTF-8"));
+
+    assertEquals(expectedString, protocol.readString());
+  }
 }

Reply via email to