Repository: spark Updated Branches: refs/heads/master 71a3af8a9 -> 7c1e56825
[SPARK-9725] [SQL] fix serialization of UTF8String across different JVM The BYTE_ARRAY_OFFSET could be different in JVM with different configurations (for example, different heap size, 24 if heap > 32G, otherwise 16), so offset of UTF8String is not portable, we should handler that during serialization. Author: Davies Liu <[email protected]> Closes #8210 from davies/serialize_utf8string. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7c1e5682 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7c1e5682 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7c1e5682 Branch: refs/heads/master Commit: 7c1e56825b716a7d703dff38254b4739755ac0c4 Parents: 71a3af8 Author: Davies Liu <[email protected]> Authored: Fri Aug 14 22:30:35 2015 -0700 Committer: Davies Liu <[email protected]> Committed: Fri Aug 14 22:30:35 2015 -0700 ---------------------------------------------------------------------- .../apache/spark/unsafe/types/UTF8String.java | 31 ++++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/7c1e5682/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java ---------------------------------------------------------------------- diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 667c009..cbcab95 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -18,8 +18,7 @@ package org.apache.spark.unsafe.types; import javax.annotation.Nonnull; -import java.io.Serializable; -import java.io.UnsupportedEncodingException; +import java.io.*; import java.nio.ByteOrder; import java.util.Arrays; import java.util.Map; @@ -38,12 +37,13 @@ import static org.apache.spark.unsafe.Platform.*; * <p> * Note: This is not designed for general use cases, should not be used outside SQL. */ -public final class UTF8String implements Comparable<UTF8String>, Serializable { +public final class UTF8String implements Comparable<UTF8String>, Externalizable { + // These are only updated by readExternal() @Nonnull - private final Object base; - private final long offset; - private final int numBytes; + private Object base; + private long offset; + private int numBytes; public Object getBaseObject() { return base; } public long getBaseOffset() { return offset; } @@ -127,6 +127,11 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { this.numBytes = numBytes; } + // for serialization + public UTF8String() { + this(null, 0, 0); + } + /** * Writes the content of this string into a memory address, identified by an object and an offset. * The target memory address must already been allocated, and have enough space to hold all the @@ -978,4 +983,18 @@ public final class UTF8String implements Comparable<UTF8String>, Serializable { } return UTF8String.fromBytes(sx); } + + public void writeExternal(ObjectOutput out) throws IOException { + byte[] bytes = getBytes(); + out.writeInt(bytes.length); + out.write(bytes); + } + + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { + offset = BYTE_ARRAY_OFFSET; + numBytes = in.readInt(); + base = new byte[numBytes]; + in.readFully((byte[]) base); + } + } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
