Repository: spark
Updated Branches:
  refs/heads/branch-1.6 add4e6311 -> 9b99b2b46


[SPARK-12060][CORE] Avoid memory copy in JavaSerializerInstance.serialize

`JavaSerializerInstance.serialize` uses `ByteArrayOutputStream.toByteArray` to 
get the serialized data. `ByteArrayOutputStream.toByteArray` needs to copy the 
content in the internal array to a new array. However, since the array will be 
converted to `ByteBuffer` at once, we can avoid the memory copy.

This PR added `ByteBufferOutputStream` to access the protected `buf` and 
convert it to a `ByteBuffer` directly.

Author: Shixiong Zhu <shixi...@databricks.com>

Closes #10051 from zsxwing/SPARK-12060.

(cherry picked from commit 1401166576c7018c5f9c31e0a6703d5fb16ea339)
Signed-off-by: Shixiong Zhu <shixi...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b99b2b4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b99b2b4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b99b2b4

Branch: refs/heads/branch-1.6
Commit: 9b99b2b46c452ba396e922db5fc7eec02c45b158
Parents: add4e63
Author: Shixiong Zhu <shixi...@databricks.com>
Authored: Tue Dec 1 09:45:55 2015 -0800
Committer: Shixiong Zhu <shixi...@databricks.com>
Committed: Tue Dec 1 09:46:07 2015 -0800

----------------------------------------------------------------------
 .../spark/serializer/JavaSerializer.scala       |  7 ++---
 .../spark/util/ByteBufferOutputStream.scala     | 31 ++++++++++++++++++++
 2 files changed, 34 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9b99b2b4/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala 
b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
index b463a71..ea718a0 100644
--- a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
@@ -24,8 +24,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.util.ByteBufferInputStream
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{ByteBufferInputStream, ByteBufferOutputStream, 
Utils}
 
 private[spark] class JavaSerializationStream(
     out: OutputStream, counterReset: Int, extraDebugInfo: Boolean)
@@ -96,11 +95,11 @@ private[spark] class JavaSerializerInstance(
   extends SerializerInstance {
 
   override def serialize[T: ClassTag](t: T): ByteBuffer = {
-    val bos = new ByteArrayOutputStream()
+    val bos = new ByteBufferOutputStream()
     val out = serializeStream(bos)
     out.writeObject(t)
     out.close()
-    ByteBuffer.wrap(bos.toByteArray)
+    bos.toByteBuffer
   }
 
   override def deserialize[T: ClassTag](bytes: ByteBuffer): T = {

http://git-wip-us.apache.org/repos/asf/spark/blob/9b99b2b4/core/src/main/scala/org/apache/spark/util/ByteBufferOutputStream.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/util/ByteBufferOutputStream.scala 
b/core/src/main/scala/org/apache/spark/util/ByteBufferOutputStream.scala
new file mode 100644
index 0000000..92e4522
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/ByteBufferOutputStream.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.io.ByteArrayOutputStream
+import java.nio.ByteBuffer
+
+/**
+ * Provide a zero-copy way to convert data in ByteArrayOutputStream to 
ByteBuffer
+ */
+private[spark] class ByteBufferOutputStream extends ByteArrayOutputStream {
+
+  def toByteBuffer: ByteBuffer = {
+    return ByteBuffer.wrap(buf, 0, count)
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to