Repository: incubator-systemml Updated Branches: refs/heads/master 6009e88d6 -> 4f8648593
[SYSTEMML-1140] Performance binary sparse-dense mult/div (pre-allocate) This patch improves the performance of sparse-dense element-wise multiply and divide by pre-allocating the sparse output rows once with the size of the sparse input rows, which likely comes close to the output size given the dense right-hand-side. Note that this patch does not fix all issues of [SYSTEMML-1140] but is a related backlog item that we've encountered during the SPOOF experiments. The performance improvements are moderate at around 25% but tend to increase with skew of sparse row sizes. On mnist60k, div sparse-dense, the improvement was from 391ms to 296ms. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/4f864859 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/4f864859 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/4f864859 Branch: refs/heads/master Commit: 4f86485939d4777d2799a697b2cbc23ea93ee7e4 Parents: 6009e88 Author: Matthias Boehm <[email protected]> Authored: Wed Feb 1 00:25:15 2017 +0100 Committer: Matthias Boehm <[email protected]> Committed: Wed Feb 1 00:25:15 2017 +0100 ---------------------------------------------------------------------- .../runtime/matrix/data/LibMatrixBincell.java | 34 ++++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4f864859/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java index 139d6fd..15d6e70 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java @@ -401,20 +401,26 @@ public class LibMatrixBincell else if( skipEmpty && (m1.sparse || m2.sparse) ) { SparseBlock a = m1.sparse ? m1.sparseBlock : m2.sparseBlock; - if( a != null ) { - MatrixBlock b = m1.sparse ? m2 : m1; - for( int i=0; i<a.numRows(); i++ ) { - if( a.isEmpty(i) ) continue; - int apos = a.pos(i); - int alen = a.size(i); - int[] aix = a.indexes(i); - double[] avals = a.values(i); - for(int k = apos; k < apos+alen; k++) { - double in2 = b.quickGetValue(i, aix[k]); - if( in2==0 ) continue; - double val = op.fn.execute(avals[k], in2); - ret.appendValue(i, aix[k], val); - } + if( a == null ) + return; + + //prepare second input and allocate output + MatrixBlock b = m1.sparse ? m2 : m1; + ret.allocateDenseOrSparseBlock(); + + for( int i=0; i<a.numRows(); i++ ) { + if( a.isEmpty(i) ) continue; + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + if( ret.sparse && !b.sparse ) + ret.sparseBlock.allocate(i, alen); + for(int k = apos; k < apos+alen; k++) { + double in2 = b.quickGetValue(i, aix[k]); + if( in2==0 ) continue; + double val = op.fn.execute(avals[k], in2); + ret.appendValue(i, aix[k], val); } } }
