Repository: thrift Updated Branches: refs/heads/master 11b515cd2 -> 54beb80de
THRIFT-2409 UTF-8 sent by PHP as JSON is not understood by TJsonProtocol Client: Java Patch: Phongphan Phuttha <[email protected]> This closes #667 Project: http://git-wip-us.apache.org/repos/asf/thrift/repo Commit: http://git-wip-us.apache.org/repos/asf/thrift/commit/54beb80d Tree: http://git-wip-us.apache.org/repos/asf/thrift/tree/54beb80d Diff: http://git-wip-us.apache.org/repos/asf/thrift/diff/54beb80d Branch: refs/heads/master Commit: 54beb80ded9286aeda7da345cbd8303e010b45e8 Parents: 11b515c Author: Phongphan Phuttha <[email protected]> Authored: Fri Oct 30 00:18:54 2015 +0700 Committer: Jens Geyer <[email protected]> Committed: Thu Oct 29 22:18:42 2015 +0200 ---------------------------------------------------------------------- .../apache/thrift/protocol/TJSONProtocol.java | 44 ++++++++++++++++++-- .../thrift/protocol/TestTJSONProtocol.java | 17 ++++++++ 2 files changed, 57 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/thrift/blob/54beb80d/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java ---------------------------------------------------------------------- diff --git a/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java b/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java index 9876e13..12341ab 100644 --- a/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java +++ b/lib/java/src/org/apache/thrift/protocol/TJSONProtocol.java @@ -19,8 +19,10 @@ package org.apache.thrift.protocol; +import java.io.IOException; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Stack; import org.apache.thrift.TByteArrayOutputStream; @@ -640,6 +642,7 @@ public class TJSONProtocol extends TProtocol { private TByteArrayOutputStream readJSONString(boolean skipContext) throws TException { TByteArrayOutputStream arr = new TByteArrayOutputStream(DEF_STRING_SIZE); + ArrayList<Character> codeunits = new ArrayList<Character>(); if (!skipContext) { context_.read(); } @@ -652,10 +655,43 @@ public class TJSONProtocol extends TProtocol { if (ch == ESCSEQ[0]) { ch = reader_.read(); if (ch == ESCSEQ[1]) { - readJSONSyntaxChar(ZERO); - readJSONSyntaxChar(ZERO); - trans_.readAll(tmpbuf_, 0, 2); - ch = (byte)((hexVal((byte)tmpbuf_[0]) << 4) + hexVal(tmpbuf_[1])); + trans_.readAll(tmpbuf_, 0, 4); + short cu = (short)( + ((short)hexVal(tmpbuf_[0]) << 12) + + ((short)hexVal(tmpbuf_[1]) << 8) + + ((short)hexVal(tmpbuf_[2]) << 4) + + (short)hexVal(tmpbuf_[3])); + try { + if (Character.isHighSurrogate((char)cu)) { + if (codeunits.size() > 0) { + throw new TProtocolException(TProtocolException.INVALID_DATA, + "Expected low surrogate char"); + } + codeunits.add((char)cu); + } + else if (Character.isLowSurrogate((char)cu)) { + if (codeunits.size() == 0) { + throw new TProtocolException(TProtocolException.INVALID_DATA, + "Expected high surrogate char"); + } + + codeunits.add((char)cu); + arr.write((new String(new int[] { codeunits.get(0), codeunits.get(1) }, 0, 2)).getBytes("UTF-8")); + codeunits.clear(); + } + else { + arr.write((new String(new int[] { cu }, 0, 1)).getBytes("UTF-8")); + } + continue; + } + catch (UnsupportedEncodingException ex) { + throw new TProtocolException(TProtocolException.NOT_IMPLEMENTED, + "JVM does not support UTF-8"); + } + catch (IOException ex) { + throw new TProtocolException(TProtocolException.INVALID_DATA, + "Invalid unicode sequence"); + } } else { int off = ESCAPE_CHARS.indexOf(ch); http://git-wip-us.apache.org/repos/asf/thrift/blob/54beb80d/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java ---------------------------------------------------------------------- diff --git a/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java b/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java index d7376ac..1320749 100644 --- a/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java +++ b/lib/java/test/org/apache/thrift/protocol/TestTJSONProtocol.java @@ -18,6 +18,12 @@ */ package org.apache.thrift.protocol; +import java.io.IOException; + +import org.apache.thrift.TException; +import org.apache.thrift.protocol.TJSONProtocol; +import org.apache.thrift.transport.TMemoryBuffer; + public class TestTJSONProtocol extends ProtocolTestBase { @Override protected TProtocolFactory getFactory() { @@ -28,4 +34,15 @@ public class TestTJSONProtocol extends ProtocolTestBase { protected boolean canBeUsedNaked() { return false; } + + public void testEscapedUnicode() throws TException, IOException { + String jsonString = "\"hello unicode \\u0e01\\ud834\\udd1e world\""; + String expectedString = "hello unicode \u0e01\ud834\udd1e world"; + + TMemoryBuffer buffer = new TMemoryBuffer(1000); + TJSONProtocol protocol = new TJSONProtocol(buffer); + buffer.write(jsonString.getBytes("UTF-8")); + + assertEquals(expectedString, protocol.readString()); + } }
