abdullah alamoudi has submitted this change and it was merged. Change subject: Provide a Way to Create UTF8 Serde with Reader and Writer ......................................................................
Provide a Way to Create UTF8 Serde with Reader and Writer Change-Id: Ia1a0a2f240ab780e21e7170e03767d8e9981899c Reviewed-on: https://asterix-gerrit.ics.uci.edu/757 Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Reviewed-by: Murtadha Hubail <hubail...@gmail.com> --- M hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java M hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java 2 files changed, 25 insertions(+), 9 deletions(-) Approvals: Murtadha Hubail: Looks good to me, approved Jenkins: Verified diff --git a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java index b36ee5a..8dccf70 100644 --- a/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java +++ b/hyracks/hyracks-dataflow-common/src/main/java/org/apache/hyracks/dataflow/common/data/marshalling/UTF8StringSerializerDeserializer.java @@ -24,19 +24,30 @@ import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer; import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.util.string.UTF8StringReader; import org.apache.hyracks.util.string.UTF8StringUtil; +import org.apache.hyracks.util.string.UTF8StringWriter; public class UTF8StringSerializerDeserializer implements ISerializerDeserializer<String> { private static final long serialVersionUID = 1L; + private final UTF8StringWriter utf8StringWriter; + private final UTF8StringReader utf8StringReader; public UTF8StringSerializerDeserializer() { + this.utf8StringWriter = null; + this.utf8StringReader = null; + } + + public UTF8StringSerializerDeserializer(UTF8StringWriter utf8StringWriter, UTF8StringReader utf8StringReader) { + this.utf8StringWriter = utf8StringWriter; + this.utf8StringReader = utf8StringReader; } @Override public String deserialize(DataInput in) throws HyracksDataException { try { - return UTF8StringUtil.readUTF8(in); + return UTF8StringUtil.readUTF8(in, utf8StringReader); } catch (IOException e) { throw new HyracksDataException(e); } @@ -45,7 +56,7 @@ @Override public void serialize(String instance, DataOutput out) throws HyracksDataException { try { - UTF8StringUtil.writeUTF8(instance, out); + UTF8StringUtil.writeUTF8(instance, out, utf8StringWriter); } catch (IOException e) { throw new HyracksDataException(e); } diff --git a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java index ee05b03..e867ecc 100644 --- a/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java +++ b/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java @@ -448,7 +448,7 @@ return readUTF8(in, null); } - static String readUTF8(DataInput in, UTF8StringReader reader) throws IOException { + public static String readUTF8(DataInput in, UTF8StringReader reader) throws IOException { int utflen = VarLenIntEncoderDecoder.decode(in); byte[] bytearr; char[] chararr; @@ -473,8 +473,9 @@ while (count < utflen) { c = bytearr[count] & 0xff; - if (c > 127) + if (c > 127) { break; + } count++; chararr[chararr_count++] = (char) c; } @@ -498,22 +499,26 @@ case 13: /* 110x xxxx 10xx xxxx*/ count += 2; - if (count > utflen) + if (count > utflen) { throw new UTFDataFormatException("malformed input: partial character at end"); + } char2 = bytearr[count - 1]; - if ((char2 & 0xC0) != 0x80) + if ((char2 & 0xC0) != 0x80) { throw new UTFDataFormatException("malformed input around byte " + count); + } chararr[chararr_count++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F)); break; case 14: /* 1110 xxxx 10xx xxxx 10xx xxxx */ count += 3; - if (count > utflen) + if (count > utflen) { throw new UTFDataFormatException("malformed input: partial character at end"); + } char2 = bytearr[count - 2]; char3 = bytearr[count - 1]; - if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) + if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) { throw new UTFDataFormatException("malformed input around byte " + (count - 1)); + } chararr[chararr_count++] = (char) (((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)); break; @@ -539,7 +544,7 @@ writeUTF8(str, out, null); } - static void writeUTF8(CharSequence str, DataOutput out, UTF8StringWriter writer) throws IOException { + public static void writeUTF8(CharSequence str, DataOutput out, UTF8StringWriter writer) throws IOException { int strlen = str.length(); int utflen = 0; char c; -- To view, visit https://asterix-gerrit.ics.uci.edu/757 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ia1a0a2f240ab780e21e7170e03767d8e9981899c Gerrit-PatchSet: 2 Gerrit-Project: hyracks Gerrit-Branch: master Gerrit-Owner: abdullah alamoudi <bamou...@gmail.com> Gerrit-Reviewer: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Gerrit-Reviewer: Murtadha Hubail <hubail...@gmail.com> Gerrit-Reviewer: abdullah alamoudi <bamou...@gmail.com>