Repository: spark
Updated Branches:
  refs/heads/branch-1.5 33015009f -> d97af68af


[SPARK-9725] [SQL] fix serialization of UTF8String across different JVM

The BYTE_ARRAY_OFFSET could be different in JVM with different configurations 
(for example, different heap size, 24 if heap > 32G, otherwise 16), so offset 
of UTF8String is not portable, we should handler that during serialization.

Author: Davies Liu <[email protected]>

Closes #8210 from davies/serialize_utf8string.

(cherry picked from commit 7c1e56825b716a7d703dff38254b4739755ac0c4)
Signed-off-by: Davies Liu <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d97af68a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d97af68a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d97af68a

Branch: refs/heads/branch-1.5
Commit: d97af68af3e910eb7247e9832615758385d642b9
Parents: 3301500
Author: Davies Liu <[email protected]>
Authored: Fri Aug 14 22:30:35 2015 -0700
Committer: Davies Liu <[email protected]>
Committed: Fri Aug 14 22:31:34 2015 -0700

----------------------------------------------------------------------
 .../apache/spark/unsafe/types/UTF8String.java   | 31 ++++++++++++++++----
 1 file changed, 25 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/d97af68a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
----------------------------------------------------------------------
diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java 
b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 667c009..cbcab95 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -18,8 +18,7 @@
 package org.apache.spark.unsafe.types;
 
 import javax.annotation.Nonnull;
-import java.io.Serializable;
-import java.io.UnsupportedEncodingException;
+import java.io.*;
 import java.nio.ByteOrder;
 import java.util.Arrays;
 import java.util.Map;
@@ -38,12 +37,13 @@ import static org.apache.spark.unsafe.Platform.*;
  * <p>
  * Note: This is not designed for general use cases, should not be used 
outside SQL.
  */
-public final class UTF8String implements Comparable<UTF8String>, Serializable {
+public final class UTF8String implements Comparable<UTF8String>, 
Externalizable {
 
+  // These are only updated by readExternal()
   @Nonnull
-  private final Object base;
-  private final long offset;
-  private final int numBytes;
+  private Object base;
+  private long offset;
+  private int numBytes;
 
   public Object getBaseObject() { return base; }
   public long getBaseOffset() { return offset; }
@@ -127,6 +127,11 @@ public final class UTF8String implements 
Comparable<UTF8String>, Serializable {
     this.numBytes = numBytes;
   }
 
+  // for serialization
+  public UTF8String() {
+    this(null, 0, 0);
+  }
+
   /**
    * Writes the content of this string into a memory address, identified by an 
object and an offset.
    * The target memory address must already been allocated, and have enough 
space to hold all the
@@ -978,4 +983,18 @@ public final class UTF8String implements 
Comparable<UTF8String>, Serializable {
     }
     return UTF8String.fromBytes(sx);
   }
+
+  public void writeExternal(ObjectOutput out) throws IOException {
+    byte[] bytes = getBytes();
+    out.writeInt(bytes.length);
+    out.write(bytes);
+  }
+
+  public void readExternal(ObjectInput in) throws IOException, 
ClassNotFoundException {
+    offset = BYTE_ARRAY_OFFSET;
+    numBytes = in.readInt();
+    base = new byte[numBytes];
+    in.readFully((byte[]) base);
+  }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to