Repository: incubator-systemml
Updated Branches:
  refs/heads/master 44d7a8857 -> 0f8b19703


[SYSTEMML-1548] Minor performance improvements ultra-sparse matrix read

This patch makes the following minor performance improvements for
reading ultra-sparse matrices:

(1) Avoid unnecessary csr matrix block conversion before persisting
ultra-sparse matrices into a serialized storage level.

(2) Exploit sparse row scalars in the context of row allocation with
estimated number of non-zeros.

(3) Fix synchronization point allocation for sparse/ultra-sparse matrix
reads (corrupted number of non-zeros). 

(4) Avoid row copies on sparse block append if the right-hand-side block
is not in MCSR format (unnecessary temporary allocation).


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/0f8b1970
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/0f8b1970
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/0f8b1970

Branch: refs/heads/master
Commit: 0f8b19703d446ed6f987167975a2deb506e0fd92
Parents: 44d7a88
Author: Matthias Boehm <[email protected]>
Authored: Sat May 6 20:32:59 2017 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Sat May 6 20:48:09 2017 -0700

----------------------------------------------------------------------
 src/main/java/org/apache/sysml/parser/DMLTranslator.java       | 1 -
 .../runtime/instructions/spark/CheckpointSPInstruction.java    | 6 ++++--
 src/main/java/org/apache/sysml/runtime/io/MatrixReader.java    | 5 +++--
 .../java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java | 3 ++-
 .../org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java  | 6 ++++--
 5 files changed, 13 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/parser/DMLTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java 
b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
index 9f63038..daf00b7 100644
--- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java
+++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
@@ -25,7 +25,6 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 
-import org.antlr.v4.parse.ANTLRParser.option_return;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.conf.ConfigurationManager;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
index 1fa30b6..cddfd12 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/CheckpointSPInstruction.java
@@ -22,6 +22,7 @@ package org.apache.sysml.runtime.instructions.spark;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.storage.StorageLevel;
 import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.Checkpoint;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
@@ -121,10 +122,11 @@ public class CheckpointSPInstruction extends 
UnarySPInstruction
                                        out = ((JavaPairRDD<Long,FrameBlock>)in)
                                                .mapValues(new 
CopyFrameBlockFunction(false));  
                        }
-               
+                       
                        //convert mcsr into memory-efficient csr if potentially 
sparse
                        if( input1.getDataType()==DataType.MATRIX 
-                               && 
OptimizerUtils.checkSparseBlockCSRConversion(mcIn) ) 
+                               && 
OptimizerUtils.checkSparseBlockCSRConversion(mcIn)
+                               && !_level.equals(Checkpoint.SER_STORAGE_LEVEL) 
) 
                        {                               
                                out = 
((JavaPairRDD<MatrixIndexes,MatrixBlock>)out)
                                        .mapValues(new 
CreateSparseBlockFunction(SparseBlock.Type.CSR));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java 
b/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java
index ffe290e..11e306e 100644
--- a/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java
+++ b/src/main/java/org/apache/sysml/runtime/io/MatrixReader.java
@@ -95,7 +95,8 @@ public abstract class MatrixReader
         * @throws IOException if IOException occurs
         * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       protected static MatrixBlock createOutputMatrixBlock( long rlen, long 
clen, int bclen, int brlen, long estnnz, boolean mallocDense, boolean 
mallocSparse ) 
+       protected static MatrixBlock createOutputMatrixBlock( long rlen, long 
clen, 
+                       int bclen, int brlen, long estnnz, boolean mallocDense, 
boolean mallocSparse ) 
                throws IOException, DMLRuntimeException
        {
                //check input dimension
@@ -116,7 +117,7 @@ public abstract class MatrixReader
                        if( sblock instanceof SparseBlockMCSR && clen > bclen   
   //multiple col blocks 
                                && clen > 0 && bclen > 0 && rlen > 0 && brlen > 
0 ) {  //all dims known
                                for( int i=0; i<rlen; i+=brlen )
-                                       ret.getSparseBlock().allocate(i, 
Math.min((int)(estnnz/rlen),1), (int)clen);
+                                       sblock.allocate(i, 
Math.max((int)(estnnz/rlen),1), (int)clen);
                        }
                }
                

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 4172e44..ff6a007 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -723,7 +723,8 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                                int aix = rowoffset+i;
                                        
                                //single block append (avoid re-allocations)
-                               if( sparseBlock.isEmpty(aix) && coloffset==0 ) 
{ 
+                               if( sparseBlock.isEmpty(aix) && coloffset==0
+                                       && b instanceof SparseBlockMCSR ) { 
                                        sparseBlock.set(aix, b.get(i), deep);
                                }
                                else { //general case

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/0f8b1970/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
index 9952fab..2c04865 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java
@@ -134,8 +134,10 @@ public class SparseBlockMCSR extends SparseBlock
        
        @Override
        public void allocate(int r, int ennz, int maxnnz) {
-               if( _rows[r] == null )
-                       _rows[r] = new SparseRowVector(ennz, maxnnz);
+               if( _rows[r] == null ) {
+                       _rows[r] = (ennz == 1) ? new SparseRowScalar() :
+                               new SparseRowVector(ennz, maxnnz);
+               }
        }
        
        @Override

Reply via email to