Repository: incubator-systemml
Updated Branches:
  refs/heads/master e9aa58414 -> 5ac32d6be


[SYSTEMML-824] Performance dense-sparse block conversion (row pre-alloc)

This patch improves the performance of the core matrix block primitive
denseToSparse as triggered in examSparsity(). Similar to sparse
transpose, we now pre-allocate sparse rows with exact nnz information,
in order to avoid repeated reallocations and internal copies. On
scenario with a 10k x 10k matrix and sparsity 0.25, the runtime improved
from 1.7s to 490ms.

Furthermore, this patch also includes some minor cleanups (e.g., missing
imports and unused variables). 

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5ac32d6b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5ac32d6b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5ac32d6b

Branch: refs/heads/master
Commit: 5ac32d6be15ce5ab212cc2d040546825195e41e6
Parents: e9aa584
Author: Matthias Boehm <[email protected]>
Authored: Sun Aug 14 19:15:11 2016 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Sun Aug 14 19:15:11 2016 -0700

----------------------------------------------------------------------
 .../org/apache/sysml/hops/ConvolutionOp.java    |  3 --
 .../runtime/matrix/data/LibMatrixCUDA.java      |  1 -
 .../sysml/runtime/matrix/data/MatrixBlock.java  | 33 ++++++++++++++------
 3 files changed, 23 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5ac32d6b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java 
b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
index 8c38a48..c010de6 100644
--- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
+++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java
@@ -31,7 +31,6 @@ import org.apache.sysml.lops.LopProperties.ExecType;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.parser.Expression.ValueType;
 import org.apache.sysml.runtime.DMLRuntimeException;
-import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.ConvolutionParameters;
 
 public class ConvolutionOp extends Hop  implements MultiThreadedHop
@@ -231,9 +230,7 @@ public class ConvolutionOp extends Hop  implements 
MultiThreadedHop
                // [numRows, numCols, NNZ] 
                long[] ret = null;
        
-               Hop input1 = getInput().get(0);
                ConvolutionParameters params;
-               MatrixCharacteristics mc = memo.getAllInputStats(input1);
                try {
                        params = parseInput();
                } catch (DMLRuntimeException e) {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5ac32d6b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index 52272a0..6a25b49 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -49,7 +49,6 @@ import static jcuda.runtime.JCuda.cudaFree;
 import jcuda.Pointer;
 import jcuda.Sizeof;
 import jcuda.jcublas.JCublas;
-import jcuda.jcublas.JCublas2;
 import jcuda.jcublas.cublasHandle;
 import jcuda.jcudnn.cudnnConvolutionDescriptor;
 import jcuda.jcudnn.cudnnFilterDescriptor;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5ac32d6b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 1316ad8..8bd7b79 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -1203,20 +1203,33 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                allocateSparseRowsBlock();
                reset();
                
-               //copy dense to sparse
+               //copy dense to sparse with (1) row pre-allocation to avoid 
repeated 
+               //allocation on append, and (2) nnz re-computation 
                double[] a = denseBlock;
                SparseBlock c = sparseBlock;
+               final int m = rlen;
+               final int n = clen;
                
-               for( int i=0, aix=0; i<rlen; i++ )
-                       for(int j=0; j<clen; j++, aix++)
-                               if( a[aix] != 0 ) {
-                                       //create sparse row only if required
-                                       c.allocate(i, estimatedNNzsPerRow, 
clen);
-                                       c.append(i, j, a[aix]);
-                                       nonZeros++;
-                               }
+               long nnz = 0;
+               for( int i=0, aix=0; i<m; i++, aix+=n ) {
+                       //recompute nnz per row (not via recomputeNonZeros as 
sparse allocated)
+                       int lnnz = 0;
+                       for(int j=0; j<n; j++)
+                               lnnz += (a[aix+j]!=0) ? 1 : 0;
+                       if( lnnz <= 0 ) continue;
+                       
+                       //allocate sparse row and append non-zero values
+                       c.allocate(i, lnnz); 
+                       for(int j=0; j<n; j++) {
+                               double val = a[aix+j];
+                               if( val != 0 )
+                                       c.append(i, j, val);
+                       }
+                       nnz += lnnz;
+               }
                                
-               //cleanup dense block
+               //update nnz and cleanup dense block
+               nonZeros = nnz;
                denseBlock = null;
        }
        

Reply via email to