Repository: systemml Updated Branches: refs/heads/master 772d9302d -> 137fbf18a
[SYSTEMML-1893] Performance sparse-dense binary cell operations This patch improves performance of common sparse-dense binary cellwise operations such as +, -, *, or axpy +- by avoiding two unnecessary scans over the output matrix (for memset zero and recompute non-zeros). For example, on a scenario of 100 iterations X + Y, where X is 10Kx10K, sparsity=0.1 and Y is 10Kx10K dense, this patch improved performance from 70.6s to 54.3s. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/6b1c6f06 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/6b1c6f06 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/6b1c6f06 Branch: refs/heads/master Commit: 6b1c6f065782190c84ee9461192b063b6ad19cf3 Parents: 772d930 Author: Matthias Boehm <[email protected]> Authored: Wed Sep 6 14:48:21 2017 -0700 Committer: Matthias Boehm <[email protected]> Committed: Thu Sep 7 13:02:43 2017 -0700 ---------------------------------------------------------------------- .../runtime/matrix/data/LibMatrixBincell.java | 27 ++++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/6b1c6f06/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java index 9489225..1e2649d 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java @@ -321,8 +321,6 @@ public class LibMatrixBincell if( m1.sparse ) //SPARSE left { - Arrays.fill(ret.denseBlock, 0, ret.denseBlock.length, 0); - if( m1.sparseBlock != null ) { SparseBlock a = m1.sparseBlock; @@ -349,6 +347,7 @@ public class LibMatrixBincell } //2) process right input: op.fn (+,-,*), * only if dense + long lnnz = 0; if( m2.sparse ) //SPARSE right { if(m2.sparseBlock!=null) @@ -364,20 +363,26 @@ public class LibMatrixBincell for(int k = apos; k < apos+alen; k++) c[ix+aix[k]] = op.fn.execute(c[ix+aix[k]], avals[k]); } - } + //exploit temporal locality of rows + lnnz += ret.recomputeNonZeros(i, i, 0, clen-1); + } } } else //DENSE right { - if( !m2.isEmptyBlock(false) ) - for( int i=0; i<m*n; i++ ) - c[i] = op.fn.execute(c[i], m2.denseBlock[i]); + if( !m2.isEmptyBlock(false) ) { + double[] a = m2.denseBlock; + for( int i=0; i<m*n; i++ ) { + c[i] = op.fn.execute(c[i], a[i]); + lnnz += (c[i]!=0) ? 1 : 0; + } + } else if(op.fn instanceof Multiply) Arrays.fill(ret.denseBlock, 0, m*n, 0); } - + //3) recompute nnz - ret.recomputeNonZeros(); + ret.setNonZeros(lnnz); } else if( !ret.sparse && !m1.sparse && !m2.sparse && m1.denseBlock!=null && m2.denseBlock!=null ) @@ -391,12 +396,12 @@ public class LibMatrixBincell ValueFunction fn = op.fn; //compute dense-dense binary, maintain nnz on-the-fly - int nnz = 0; + int lnnz = 0; for( int i=0; i<m*n; i++ ) { c[i] = fn.execute(a[i], b[i]); - nnz += (c[i]!=0)? 1 : 0; + lnnz += (c[i]!=0)? 1 : 0; } - ret.nonZeros = nnz; + ret.setNonZeros(lnnz); } else if( skipEmpty && (m1.sparse || m2.sparse) ) {
