[SYSTEMML-2099] Performance sparse result merge accum w/o compare

This patch improves the performance of dense-sparse binary in-place
operations for plus and minus (as used for accumulating result merge
without compare). On a scenario of merging 12 sparse results of size 10K
x 10K, sp=0.2, this patch improved performance from 53.3s to 4.9s. 

Furthermore, this also includes a minor fix to disable parfor update in
place for accumulating result variables.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/68b93c75
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/68b93c75
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/68b93c75

Branch: refs/heads/master
Commit: 68b93c75ff80f8ba3be367498e801d5877e173c9
Parents: e63eb2e
Author: Matthias Boehm <[email protected]>
Authored: Sun Jan 28 16:00:43 2018 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Sun Jan 28 16:00:43 2018 -0800

----------------------------------------------------------------------
 .../parfor/opt/OptimizerRuleBased.java          |  3 +-
 .../runtime/matrix/data/LibMatrixBincell.java   | 30 ++++++++++++++++----
 2 files changed, 26 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/68b93c75/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
index 4ce7466..d551d58 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
@@ -1734,7 +1734,8 @@ public class OptimizerRuleBased extends Optimizer
                }
                else if( n.getNodeType()== NodeType.HOP) {
                        Hop h = 
OptTreeConverter.getAbstractPlanMapping().getMappedHop(n.getID());
-                       if( h instanceof LeftIndexingOp && 
ResultVar.contains(retVars, h.getInput().get(0).getName() ) )
+                       if( h instanceof LeftIndexingOp && 
ResultVar.contains(retVars, h.getInput().get(0).getName() )
+                               && !retVars.stream().anyMatch(rvar -> 
rvar._isAccum) )
                                ret &= (h.getParent().size()==1 
                                        && 
h.getParent().get(0).getName().equals(h.getInput().get(0).getName()));
                }

http://git-wip-us.apache.org/repos/asf/systemml/blob/68b93c75/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
index 582a0cd..1c9ddc8 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
@@ -1208,13 +1208,31 @@ public class LibMatrixBincell
                final int rlen = m1ret.rlen;
                final int clen = m1ret.clen;
                
-               for(int r=0; r<rlen; r++)
-                       for(int c=0; c<clen; c++) {
-                               double thisvalue = m1ret.quickGetValue(r, c);
-                               double thatvalue = m2.quickGetValue(r, c);
-                               double resultvalue = op.fn.execute(thisvalue, 
thatvalue);
-                               m1ret.quickSetValue(r, c, resultvalue);
+               if( m2.sparse && (op.fn instanceof Plus || op.fn instanceof 
Minus) ) {
+                       if( m2.isEmptyBlock(false) )
+                               return;
+                       SparseBlock b = m2.sparseBlock;
+                       for(int r=0; r<rlen; r++) {
+                               int bpos = b.pos(r);
+                               int blen = b.size(r);
+                               int[] bix = b.indexes(r);
+                               double[] bvals = b.values(r);
+                               for(int k = bpos; k<bpos+blen; k++) {
+                                       double vold = m1ret.quickGetValue(r, 
bix[k]);
+                                       double vnew = op.fn.execute(vold, 
bvals[k]);
+                                       m1ret.quickSetValue(r, bix[k], vnew);
+                               }
                        }
+               }
+               else {
+                       for(int r=0; r<rlen; r++)
+                               for(int c=0; c<clen; c++) {
+                                       double thisvalue = 
m1ret.quickGetValue(r, c);
+                                       double thatvalue = m2.quickGetValue(r, 
c);
+                                       double resultvalue = 
op.fn.execute(thisvalue, thatvalue);
+                                       m1ret.quickSetValue(r, c, resultvalue);
+                               }
+               }
        }
        
        private static void unsafeBinaryInPlace(MatrixBlock m1ret, MatrixBlock 
m2, BinaryOperator op) throws DMLRuntimeException 

Reply via email to