Repository: spark Updated Branches: refs/heads/master 411cf29ff -> 90f73fcc4
[SPARK-3889] Attempt to avoid SIGBUS by not mmapping files in ConnectionManager In general, individual shuffle blocks are frequently small, so mmapping them often creates a lot of waste. It may not be bad to mmap the larger ones, but it is pretty inconvenient to get configuration into ManagedBuffer, and besides it is unlikely to help all that much. Author: Aaron Davidson <[email protected]> Closes #2742 from aarondav/mmap and squashes the following commits: a152065 [Aaron Davidson] Add other pathway back 52b6cd2 [Aaron Davidson] [SPARK-3889] Attempt to avoid SIGBUS by not mmapping files in ConnectionManager Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/90f73fcc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/90f73fcc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/90f73fcc Branch: refs/heads/master Commit: 90f73fcc47c7bf881f808653d46a9936f37c3c31 Parents: 411cf29 Author: Aaron Davidson <[email protected]> Authored: Fri Oct 10 01:44:36 2014 -0700 Committer: Reynold Xin <[email protected]> Committed: Fri Oct 10 01:44:36 2014 -0700 ---------------------------------------------------------------------- .../org/apache/spark/network/ManagedBuffer.scala | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/90f73fcc/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala ---------------------------------------------------------------------- diff --git a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala b/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala index a440918..4c9ca97 100644 --- a/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala +++ b/core/src/main/scala/org/apache/spark/network/ManagedBuffer.scala @@ -66,13 +66,27 @@ sealed abstract class ManagedBuffer { final class FileSegmentManagedBuffer(val file: File, val offset: Long, val length: Long) extends ManagedBuffer { + /** + * Memory mapping is expensive and can destabilize the JVM (SPARK-1145, SPARK-3889). + * Avoid unless there's a good reason not to. + */ + private val MIN_MEMORY_MAP_BYTES = 2 * 1024 * 1024; + override def size: Long = length override def nioByteBuffer(): ByteBuffer = { var channel: FileChannel = null try { channel = new RandomAccessFile(file, "r").getChannel - channel.map(MapMode.READ_ONLY, offset, length) + // Just copy the buffer if it's sufficiently small, as memory mapping has a high overhead. + if (length < MIN_MEMORY_MAP_BYTES) { + val buf = ByteBuffer.allocate(length.toInt) + channel.read(buf, offset) + buf.flip() + buf + } else { + channel.map(MapMode.READ_ONLY, offset, length) + } } catch { case e: IOException => Try(channel.size).toOption match { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
