Repository: incubator-systemml
Updated Branches:
  refs/heads/master a21d8c6ed -> 8eee978ed


[SYSTEMML-383] Performance Spark CSR conversion (avoid redundant copies)

There are two sources of redundant deep copies of sparse matrix blocks:
(1) before CSR conversion we unnecessarily created an MCSR deep copy on
read from binary inputs, and (2) for checkpoints following a read, we
created an unnecessary CSR copy if the block is already in CSR. This
patch avoids these unnecessary deep copies by applying the CSR
conversion instead of the required deep copy and shallow copies if
blocks are already in CSR format. 

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/8eee978e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/8eee978e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/8eee978e

Branch: refs/heads/master
Commit: 8eee978ed4fd01ec085d6526ebdcf102271a356b
Parents: a21d8c6
Author: Matthias Boehm <[email protected]>
Authored: Sun Feb 28 19:17:56 2016 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Mon Feb 29 12:33:14 2016 -0800

----------------------------------------------------------------------
 .../spark/functions/CopyBlockPairFunction.java           | 11 +++++++++--
 .../spark/functions/CreateSparseBlockFunction.java       |  4 +++-
 .../apache/sysml/runtime/matrix/data/MatrixBlock.java    |  4 ++--
 3 files changed, 14 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8eee978e/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java
index d23dcfc..301ca4d 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CopyBlockPairFunction.java
@@ -22,8 +22,10 @@ import org.apache.spark.api.java.function.PairFunction;
 
 import scala.Tuple2;
 
+import org.apache.sysml.lops.Checkpoint;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.MatrixIndexes;
+import org.apache.sysml.runtime.matrix.data.SparseBlock;
 
 /**
  * General purpose copy function for binary block rdds. This function can be 
used in
@@ -51,8 +53,13 @@ public class CopyBlockPairFunction implements 
PairFunction<Tuple2<MatrixIndexes,
        {       
                if( _deepCopy ) {
                        MatrixIndexes ix = new MatrixIndexes(arg0._1());
-                       MatrixBlock block = new MatrixBlock();
-                       block.copy(arg0._2());
+                       MatrixBlock block = null;
+                       //always create deep copies in more memory-efficient 
CSR representation 
+                       //if block is already in sparse format                  
+                       if( Checkpoint.CHECKPOINT_SPARSE_CSR && 
arg0._2.isInSparseFormat() )
+                               block = new MatrixBlock(arg0._2, 
SparseBlock.Type.CSR, true);
+                       else
+                               block = new MatrixBlock(arg0._2());
                        return new Tuple2<MatrixIndexes,MatrixBlock>(ix,block);
                }
                else {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8eee978e/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java
index 51f3217..7cf6e8c 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/functions/CreateSparseBlockFunction.java
@@ -42,8 +42,10 @@ public class CreateSparseBlockFunction implements 
Function<MatrixBlock,MatrixBlo
        public MatrixBlock call(MatrixBlock arg0)
                throws Exception 
        {
+               //convert given block to CSR representation if in sparse format
+               //but allow shallow pass-through if already in CSR 
representation. 
                if( arg0.isInSparseFormat() )
-                       return new MatrixBlock(arg0, _stype);
+                       return new MatrixBlock(arg0, _stype, false);
                else //pass through dense
                        return arg0;    
        }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/8eee978e/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 8f47bc3..ddafe99 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -175,7 +175,7 @@ public class MatrixBlock extends MatrixValue implements 
Externalizable
                this.copy(that);
        }
        
-       public MatrixBlock(MatrixBlock that, SparseBlock.Type stype)
+       public MatrixBlock(MatrixBlock that, SparseBlock.Type stype, boolean 
deep)
        {
                //sanity check sparse matrix block
                if( !that.isInSparseFormat() )
@@ -188,7 +188,7 @@ public class MatrixBlock extends MatrixValue implements 
Externalizable
                nonZeros = that.nonZeros;
                estimatedNNzsPerRow = that.estimatedNNzsPerRow;
                sparseBlock = SparseBlockFactory
-                               .copySparseBlock(stype, that.sparseBlock, true);
+                               .copySparseBlock(stype, that.sparseBlock, deep);
        }
        
        ////////

Reply via email to