Repository: incubator-systemml Updated Branches: refs/heads/master e9aa58414 -> 5ac32d6be
[SYSTEMML-824] Performance dense-sparse block conversion (row pre-alloc) This patch improves the performance of the core matrix block primitive denseToSparse as triggered in examSparsity(). Similar to sparse transpose, we now pre-allocate sparse rows with exact nnz information, in order to avoid repeated reallocations and internal copies. On scenario with a 10k x 10k matrix and sparsity 0.25, the runtime improved from 1.7s to 490ms. Furthermore, this patch also includes some minor cleanups (e.g., missing imports and unused variables). Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5ac32d6b Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5ac32d6b Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5ac32d6b Branch: refs/heads/master Commit: 5ac32d6be15ce5ab212cc2d040546825195e41e6 Parents: e9aa584 Author: Matthias Boehm <[email protected]> Authored: Sun Aug 14 19:15:11 2016 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sun Aug 14 19:15:11 2016 -0700 ---------------------------------------------------------------------- .../org/apache/sysml/hops/ConvolutionOp.java | 3 -- .../runtime/matrix/data/LibMatrixCUDA.java | 1 - .../sysml/runtime/matrix/data/MatrixBlock.java | 33 ++++++++++++++------ 3 files changed, 23 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5ac32d6b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java index 8c38a48..c010de6 100644 --- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java +++ b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java @@ -31,7 +31,6 @@ import org.apache.sysml.lops.LopProperties.ExecType; import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.DMLRuntimeException; -import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.ConvolutionParameters; public class ConvolutionOp extends Hop implements MultiThreadedHop @@ -231,9 +230,7 @@ public class ConvolutionOp extends Hop implements MultiThreadedHop // [numRows, numCols, NNZ] long[] ret = null; - Hop input1 = getInput().get(0); ConvolutionParameters params; - MatrixCharacteristics mc = memo.getAllInputStats(input1); try { params = parseInput(); } catch (DMLRuntimeException e) { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5ac32d6b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java index 52272a0..6a25b49 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java @@ -49,7 +49,6 @@ import static jcuda.runtime.JCuda.cudaFree; import jcuda.Pointer; import jcuda.Sizeof; import jcuda.jcublas.JCublas; -import jcuda.jcublas.JCublas2; import jcuda.jcublas.cublasHandle; import jcuda.jcudnn.cudnnConvolutionDescriptor; import jcuda.jcudnn.cudnnFilterDescriptor; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5ac32d6b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index 1316ad8..8bd7b79 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -1203,20 +1203,33 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab allocateSparseRowsBlock(); reset(); - //copy dense to sparse + //copy dense to sparse with (1) row pre-allocation to avoid repeated + //allocation on append, and (2) nnz re-computation double[] a = denseBlock; SparseBlock c = sparseBlock; + final int m = rlen; + final int n = clen; - for( int i=0, aix=0; i<rlen; i++ ) - for(int j=0; j<clen; j++, aix++) - if( a[aix] != 0 ) { - //create sparse row only if required - c.allocate(i, estimatedNNzsPerRow, clen); - c.append(i, j, a[aix]); - nonZeros++; - } + long nnz = 0; + for( int i=0, aix=0; i<m; i++, aix+=n ) { + //recompute nnz per row (not via recomputeNonZeros as sparse allocated) + int lnnz = 0; + for(int j=0; j<n; j++) + lnnz += (a[aix+j]!=0) ? 1 : 0; + if( lnnz <= 0 ) continue; + + //allocate sparse row and append non-zero values + c.allocate(i, lnnz); + for(int j=0; j<n; j++) { + double val = a[aix+j]; + if( val != 0 ) + c.append(i, j, val); + } + nnz += lnnz; + } - //cleanup dense block + //update nnz and cleanup dense block + nonZeros = nnz; denseBlock = null; }
