[SYSTEMML-2099] Performance sparse result merge accum w/o compare This patch improves the performance of dense-sparse binary in-place operations for plus and minus (as used for accumulating result merge without compare). On a scenario of merging 12 sparse results of size 10K x 10K, sp=0.2, this patch improved performance from 53.3s to 4.9s.
Furthermore, this also includes a minor fix to disable parfor update in place for accumulating result variables. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/68b93c75 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/68b93c75 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/68b93c75 Branch: refs/heads/master Commit: 68b93c75ff80f8ba3be367498e801d5877e173c9 Parents: e63eb2e Author: Matthias Boehm <[email protected]> Authored: Sun Jan 28 16:00:43 2018 -0800 Committer: Matthias Boehm <[email protected]> Committed: Sun Jan 28 16:00:43 2018 -0800 ---------------------------------------------------------------------- .../parfor/opt/OptimizerRuleBased.java | 3 +- .../runtime/matrix/data/LibMatrixBincell.java | 30 ++++++++++++++++---- 2 files changed, 26 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/68b93c75/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java index 4ce7466..d551d58 100644 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java +++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java @@ -1734,7 +1734,8 @@ public class OptimizerRuleBased extends Optimizer } else if( n.getNodeType()== NodeType.HOP) { Hop h = OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID()); - if( h instanceof LeftIndexingOp && ResultVar.contains(retVars, h.getInput().get(0).getName() ) ) + if( h instanceof LeftIndexingOp && ResultVar.contains(retVars, h.getInput().get(0).getName() ) + && !retVars.stream().anyMatch(rvar -> rvar._isAccum) ) ret &= (h.getParent().size()==1 && h.getParent().get(0).getName().equals(h.getInput().get(0).getName())); } http://git-wip-us.apache.org/repos/asf/systemml/blob/68b93c75/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java index 582a0cd..1c9ddc8 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java @@ -1208,13 +1208,31 @@ public class LibMatrixBincell final int rlen = m1ret.rlen; final int clen = m1ret.clen; - for(int r=0; r<rlen; r++) - for(int c=0; c<clen; c++) { - double thisvalue = m1ret.quickGetValue(r, c); - double thatvalue = m2.quickGetValue(r, c); - double resultvalue = op.fn.execute(thisvalue, thatvalue); - m1ret.quickSetValue(r, c, resultvalue); + if( m2.sparse && (op.fn instanceof Plus || op.fn instanceof Minus) ) { + if( m2.isEmptyBlock(false) ) + return; + SparseBlock b = m2.sparseBlock; + for(int r=0; r<rlen; r++) { + int bpos = b.pos(r); + int blen = b.size(r); + int[] bix = b.indexes(r); + double[] bvals = b.values(r); + for(int k = bpos; k<bpos+blen; k++) { + double vold = m1ret.quickGetValue(r, bix[k]); + double vnew = op.fn.execute(vold, bvals[k]); + m1ret.quickSetValue(r, bix[k], vnew); + } } + } + else { + for(int r=0; r<rlen; r++) + for(int c=0; c<clen; c++) { + double thisvalue = m1ret.quickGetValue(r, c); + double thatvalue = m2.quickGetValue(r, c); + double resultvalue = op.fn.execute(thisvalue, thatvalue); + m1ret.quickSetValue(r, c, resultvalue); + } + } } private static void unsafeBinaryInPlace(MatrixBlock m1ret, MatrixBlock m2, BinaryOperator op) throws DMLRuntimeException
