[SYSTEMML-510] Fix performance sparse wdivmm with scalar epsilon

The newly added wdivmm w/ eps always selected the generic fallback
implementation which led to severe performance issues for the common
case of sparse-dense-dense. In addition to the fixed selection of
primitives, this patch also includes a fix for the sparse-dense-dense
implementation with scalars (which ran into null pointer exceptions).

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/79ae5415
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/79ae5415
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/79ae5415

Branch: refs/heads/master
Commit: 79ae5415272b04707d8f121f7dd35294431b8612
Parents: 8cea30c
Author: Matthias Boehm <[email protected]>
Authored: Sat Mar 5 21:40:16 2016 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Mon Mar 7 12:23:14 2016 -0800

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/LibMatrixMult.java | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/79ae5415/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index 14024b3..9b2fb78 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -727,9 +727,10 @@ public class LibMatrixMult
                ret.allocateDenseOrSparseBlock();
                
                //core weighted div mm computation
-               if( !mW.sparse && !mU.sparse && !mV.sparse && (mX==null || 
!mX.sparse) && !mU.isEmptyBlock() && !mV.isEmptyBlock() )
+               boolean scalarX = wt.hasScalar();
+               if( !mW.sparse && !mU.sparse && !mV.sparse && (mX==null || 
!mX.sparse || scalarX) && !mU.isEmptyBlock() && !mV.isEmptyBlock() )
                        matrixMultWDivMMDense(mW, mU, mV, mX, ret, wt, 0, 
mW.rlen, 0, mW.clen);
-               else if( mW.sparse && !mU.sparse && !mV.sparse && (mX==null || 
mX.sparse) && !mU.isEmptyBlock() && !mV.isEmptyBlock())
+               else if( mW.sparse && !mU.sparse && !mV.sparse && (mX==null || 
mX.sparse || scalarX) && !mU.isEmptyBlock() && !mV.isEmptyBlock())
                        matrixMultWDivMMSparseDense(mW, mU, mV, mX, ret, wt, 0, 
mW.rlen, 0, mW.clen);
                else
                        matrixMultWDivMMGeneric(mW, mU, mV, mX, ret, wt, 0, 
mW.rlen, 0, mW.clen);
@@ -2663,17 +2664,14 @@ public class LibMatrixMult
                                        k = (k>=0) ? k : wpos+wlen;
                                        //checking alignment per row is ok 
because early abort if false, 
                                        //row nnz likely fit in L1/L2 cache, 
and asymptotically better if aligned
-                                       if( w.isAligned(i, x) ) {
+                                       if( !scalar && w.isAligned(i, x) ) {
                                                //O(n) where n is nnz in w/x 
                                                double[] xvals = x.values(i);
                                                for( ; k<wpos+wlen && 
wix[k]<cu; k++ )
-                                                       if (scalar)
-                                                               wdivmm(wval[k], 
eps, u, v, c, uix, wix[k]*cd, left, scalar, cd);
-                                                       else
-                                                               wdivmm(wval[k], 
xvals[k], u, v, c, uix, wix[k]*cd, left, scalar, cd);
+                                                       wdivmm(wval[k], 
xvals[k], u, v, c, uix, wix[k]*cd, left, scalar, cd);
                                        }
                                        else {
-                                               //O(n log m) where n/m are nnz 
in w/x
+                                               //scalar or O(n log m) where 
n/m are nnz in w/x
                                                for( ; k<wpos+wlen && 
wix[k]<cu; k++ )
                                                        if (scalar)
                                                                wdivmm(wval[k], 
eps, u, v, c, uix, wix[k]*cd, left, scalar, cd);
@@ -4333,9 +4331,10 @@ public class LibMatrixMult
                public Object call() throws DMLRuntimeException
                {
                        //core weighted div mm computation
-                       if( !_mW.sparse && !_mU.sparse && !_mV.sparse && 
(_mX==null || !_mX.sparse) && !_mU.isEmptyBlock() && !_mV.isEmptyBlock() )
+                       boolean scalarX = _wt.hasScalar();
+                       if( !_mW.sparse && !_mU.sparse && !_mV.sparse && 
(_mX==null || !_mX.sparse || scalarX) && !_mU.isEmptyBlock() && 
!_mV.isEmptyBlock() )
                                matrixMultWDivMMDense(_mW, _mU, _mV, _mX, _ret, 
_wt, _rl, _ru, _cl, _cu);
-                       else if( _mW.sparse && !_mU.sparse && !_mV.sparse && 
(_mX==null || _mX.sparse) && !_mU.isEmptyBlock() && !_mV.isEmptyBlock())
+                       else if( _mW.sparse && !_mU.sparse && !_mV.sparse && 
(_mX==null || _mX.sparse || scalarX) && !_mU.isEmptyBlock() && 
!_mV.isEmptyBlock())
                                matrixMultWDivMMSparseDense(_mW, _mU, _mV, _mX, 
_ret, _wt, _rl, _ru, _cl, _cu);
                        else
                                matrixMultWDivMMGeneric(_mW, _mU, _mV, _mX, 
_ret, _wt, _rl, _ru, _cl, _cu);

Reply via email to