Repository: incubator-systemml Updated Branches: refs/heads/master a21d8c6ed -> 8eee978ed
[SYSTEMML-383] Performance Spark CSR conversion (avoid redundant copies) There are two sources of redundant deep copies of sparse matrix blocks: (1) before CSR conversion we unnecessarily created an MCSR deep copy on read from binary inputs, and (2) for checkpoints following a read, we created an unnecessary CSR copy if the block is already in CSR. This patch avoids these unnecessary deep copies by applying the CSR conversion instead of the required deep copy and shallow copies if blocks are already in CSR format. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/8eee978e Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/8eee978e Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/8eee978e Branch: refs/heads/master Commit: 8eee978ed4fd01ec085d6526ebdcf102271a356b Parents: a21d8c6 Author: Matthias Boehm <[email protected]> Authored: Sun Feb 28 19:17:56 2016 -0800 Committer: Matthias Boehm <[email protected]> Committed: Mon Feb 29 12:33:14 2016 -0800 ---------------------------------------------------------------------- .../spark/functions/CopyBlockPairFunction.java | 11 +++++++++-- .../spark/functions/CreateSparseBlockFunction.java | 4 +++- .../apache/sysml/runtime/matrix/data/MatrixBlock.java | 4 ++-- 3 files changed, 14 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8eee978e/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java index d23dcfc..301ca4d 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java @@ -22,8 +22,10 @@ import org.apache.spark.api.java.function.PairFunction; import scala.Tuple2; +import org.apache.sysml.lops.Checkpoint; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.MatrixIndexes; +import org.apache.sysml.runtime.matrix.data.SparseBlock; /** * General purpose copy function for binary block rdds. This function can be used in @@ -51,8 +53,13 @@ public class CopyBlockPairFunction implements PairFunction<Tuple2<MatrixIndexes, { if( _deepCopy ) { MatrixIndexes ix = new MatrixIndexes(arg0._1()); - MatrixBlock block = new MatrixBlock(); - block.copy(arg0._2()); + MatrixBlock block = null; + //always create deep copies in more memory-efficient CSR representation + //if block is already in sparse format + if( Checkpoint.CHECKPOINT_SPARSE_CSR && arg0._2.isInSparseFormat() ) + block = new MatrixBlock(arg0._2, SparseBlock.Type.CSR, true); + else + block = new MatrixBlock(arg0._2()); return new Tuple2<MatrixIndexes,MatrixBlock>(ix,block); } else { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8eee978e/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java index 51f3217..7cf6e8c 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java @@ -42,8 +42,10 @@ public class CreateSparseBlockFunction implements Function<MatrixBlock,MatrixBlo public MatrixBlock call(MatrixBlock arg0) throws Exception { + //convert given block to CSR representation if in sparse format + //but allow shallow pass-through if already in CSR representation. if( arg0.isInSparseFormat() ) - return new MatrixBlock(arg0, _stype); + return new MatrixBlock(arg0, _stype, false); else //pass through dense return arg0; } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8eee978e/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index 8f47bc3..ddafe99 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -175,7 +175,7 @@ public class MatrixBlock extends MatrixValue implements Externalizable this.copy(that); } - public MatrixBlock(MatrixBlock that, SparseBlock.Type stype) + public MatrixBlock(MatrixBlock that, SparseBlock.Type stype, boolean deep) { //sanity check sparse matrix block if( !that.isInSparseFormat() ) @@ -188,7 +188,7 @@ public class MatrixBlock extends MatrixValue implements Externalizable nonZeros = that.nonZeros; estimatedNNzsPerRow = that.estimatedNNzsPerRow; sparseBlock = SparseBlockFactory - .copySparseBlock(stype, that.sparseBlock, true); + .copySparseBlock(stype, that.sparseBlock, deep); } ////////
