Repository: incubator-systemml
Updated Branches:
  refs/heads/master 7f3327658 -> 11a85775f


[SYSTEMML-824] Performance scalar/unary/binary ops (nnz, sparse, skip)

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a528f5e4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a528f5e4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a528f5e4

Branch: refs/heads/master
Commit: a528f5e4ee3e4717ff0dfe27850533b94043c8fe
Parents: 7f33276
Author: Matthias Boehm <[email protected]>
Authored: Fri Jul 29 23:22:39 2016 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Sat Jul 30 16:23:17 2016 -0700

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixBincell.java   | 140 ++++++++++---------
 .../sysml/runtime/matrix/data/MatrixBlock.java  |  56 ++++++--
 .../matrix/operators/ScalarOperator.java        |  32 ++---
 .../runtime/matrix/operators/UnaryOperator.java |  10 +-
 4 files changed, 126 insertions(+), 112 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a528f5e4/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
index 6ad08d8..f9269e8 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixBincell.java
@@ -29,11 +29,13 @@ import 
org.apache.sysml.runtime.functionobjects.GreaterThanEquals;
 import org.apache.sysml.runtime.functionobjects.LessThan;
 import org.apache.sysml.runtime.functionobjects.LessThanEquals;
 import org.apache.sysml.runtime.functionobjects.Minus;
+import org.apache.sysml.runtime.functionobjects.MinusMultiply;
 import org.apache.sysml.runtime.functionobjects.Multiply;
 import org.apache.sysml.runtime.functionobjects.Multiply2;
 import org.apache.sysml.runtime.functionobjects.NotEquals;
 import org.apache.sysml.runtime.functionobjects.Or;
 import org.apache.sysml.runtime.functionobjects.Plus;
+import org.apache.sysml.runtime.functionobjects.PlusMultiply;
 import org.apache.sysml.runtime.functionobjects.Power2;
 import org.apache.sysml.runtime.functionobjects.ValueFunction;
 import org.apache.sysml.runtime.matrix.operators.BinaryOperator;
@@ -332,7 +334,8 @@ public class LibMatrixBincell
                                }
                        }
                        else if( !ret.sparse && (m1.sparse || m2.sparse) &&
-                                       (op.fn instanceof Plus || op.fn 
instanceof Minus || 
+                                       (op.fn instanceof Plus || op.fn 
instanceof Minus ||
+                                       op.fn instanceof PlusMultiply || op.fn 
instanceof MinusMultiply ||
                                        (op.fn instanceof Multiply && 
!m2.sparse )))
                        {
                                //specific case in order to prevent binary 
search on sparse inputs (see quickget and quickset)
@@ -464,7 +467,8 @@ public class LibMatrixBincell
                double[] a = m1.denseBlock;
                double[] b = m2.denseBlock;
                double[] c = ret.denseBlock;
-
+               int nnz = 0;
+               
                if( atype == BinaryAccessType.MATRIX_COL_VECTOR )
                {
                        for( int i=0, ix=0; i<rlen; i++, ix+=clen )
@@ -474,33 +478,39 @@ public class LibMatrixBincell
                                if( skipEmpty && v2 == 0 ) //skip empty rows
                                        continue;
                                        
-                               if( isMultiply && v2 == 1 ) //ROW COPY
-                               {
+                               if( isMultiply && v2 == 1 ) { //ROW COPY
                                        //a guaranteed to be non-null (see 
early abort)
                                        System.arraycopy(a, ix, c, ix, clen);
+                                       nnz += m1.recomputeNonZeros(i, i, 0, 
clen-1);
                                }
-                               else //GENERAL CASE
-                               {
+                               else { //GENERAL CASE
                                        if( a != null )
-                                               for( int j=0; j<clen; j++ )
+                                               for( int j=0; j<clen; j++ ) {
                                                        c[ix+j] = 
op.fn.execute( a[ix+j], v2 ); 
-                                       else
-                                               Arrays.fill(c, ix, ix+clen, 
op.fn.execute( 0, v2 ));    
+                                                       nnz += (c[ix+j] != 0) ? 
1 : 0;
+                                               }
+                                       else {
+                                               double val = op.fn.execute( 0, 
v2 );
+                                               Arrays.fill(c, ix, ix+clen, 
val);
+                                               nnz += (val != 0) ? clen : 0;
+                                       }
                                }
                        }
                }
                else if( atype == BinaryAccessType.MATRIX_ROW_VECTOR )
                {
-                       if( a==null && b==null ) //both empty
-                       {
+                       if( a==null && b==null ) { //both empty
                                double v = op.fn.execute( 0, 0 );
                                Arrays.fill(c, 0, rlen*clen, v);
+                               nnz += (v != 0) ? rlen*clen : 0;
                        }
                        else if( a==null ) //left empty
                        {
                                //compute first row
-                               for( int j=0; j<clen; j++ )
+                               for( int j=0; j<clen; j++ ) {
                                        c[j] = op.fn.execute( 0, b[j] );
+                                       nnz += (c[j] != 0) ? rlen : 0;
+                               }
                                //copy first to all other rows
                                for( int i=1, ix=clen; i<rlen; i++, ix+=clen )
                                        System.arraycopy(c, 0, c, ix, clen);
@@ -508,12 +518,14 @@ public class LibMatrixBincell
                        else //default case (incl right empty) 
                        {
                                for( int i=0, ix=0; i<rlen; i++, ix+=clen )
-                                       for( int j=0; j<clen; j++ )
+                                       for( int j=0; j<clen; j++ ) {
                                                c[ix+j] = op.fn.execute( 
a[ix+j], ((b!=null) ? b[j] : 0) );     
+                                               nnz += (c[ix+j] != 0) ? 1 : 0;
+                                       }
                        }
                }
                
-               ret.recomputeNonZeros();
+               ret.nonZeros = nnz;
        }
        
        /**
@@ -608,7 +620,7 @@ public class LibMatrixBincell
                                        for( int j=apos; j<apos+alen; j++ )
                                        {
                                                //empty left
-                                               for( int k = lastIx+1; 
k<aix[j]; k++ ){
+                                               for( int k=lastIx+1; 
!skipEmpty&&k<aix[j]; k++ ){
                                                        double v2 = 
m2.quickGetValue(0, k);
                                                        double v = 
op.fn.execute( 0, v2 );
                                                        ret.appendValue(i, k, 
v);
@@ -622,7 +634,7 @@ public class LibMatrixBincell
                                }
                                
                                //empty left
-                               for( int k = lastIx+1; k<clen; k++ ){
+                               for( int k=lastIx+1; !skipEmpty&&k<clen; k++ ){
                                        double v2 = m2.quickGetValue(0, k);
                                        double v = op.fn.execute( 0, v2 );
                                        ret.appendValue(i, k, v);
@@ -945,43 +957,43 @@ public class LibMatrixBincell
                        ret.allocateSparseRowsBlock();
                        SparseBlock a = m1.sparseBlock;
                        SparseBlock c = ret.sparseBlock;
+                       int rlen = Math.min(m1.rlen, a.numRows());
                        
-                       for(int r=0; r<Math.min(m1.rlen, a.numRows()); r++) {
-                               if( !a.isEmpty(r) )
-                               {
-                                       int apos = a.pos(r);
-                                       int alen = a.size(r);
-                                       int[] aix = a.indexes(r);
-                                       double[] avals = a.values(r);
+                       long nnz = 0;
+                       for(int r=0; r<rlen; r++) {
+                               if( a.isEmpty(r) ) continue;
+                               
+                               int apos = a.pos(r);
+                               int alen = a.size(r);
+                               int[] aix = a.indexes(r);
+                               double[] avals = a.values(r);
+                               
+                               if( copyOnes ) { //SPECIAL CASE: e.g., (X != 0) 
+                                       //create sparse row without repeated 
resizing
+                                       SparseRow crow = new SparseRow(alen);
+                                       crow.setSize(alen);
                                        
-                                       if( copyOnes ) //SPECIAL CASE: e.g., (X 
!= 0) 
-                                       {
-                                               //create sparse row without 
repeated resizing
-                                               SparseRow crow = new 
SparseRow(alen);
-                                               crow.setSize(alen);
-                                               
-                                               //memcopy/memset of 
indexes/values (sparseblock guarantees absence of 0s) 
-                                               System.arraycopy(aix, apos, 
crow.indexes(), 0, alen);
-                                               Arrays.fill(crow.values(), 0, 
alen, 1);
-                                               c.set(r, crow, false);
-                                               ret.nonZeros+=alen;
+                                       //memcopy/memset of indexes/values 
(sparseblock guarantees absence of 0s) 
+                                       System.arraycopy(aix, apos, 
crow.indexes(), 0, alen);
+                                       Arrays.fill(crow.values(), 0, alen, 1);
+                                       c.set(r, crow, false);
+                                       nnz += alen;
+                               }
+                               else { //GENERAL CASE
+                                       //create sparse row without repeated 
resizing for specific ops
+                                       if( op.fn instanceof Multiply || op.fn 
instanceof Multiply2 
+                                               || op.fn instanceof Power2  ) {
+                                               c.allocate(r, alen);
                                        }
-                                       else //GENERAL CASE
-                                       {
-                                               //create sparse row without 
repeated resizing for specific ops
-                                               if( op.fn instanceof Multiply 
|| op.fn instanceof Multiply2 
-                                                       || op.fn instanceof 
Power2  )
-                                               {
-                                                       c.allocate(r, alen);
-                                               }
-                                               
-                                               for(int j=apos; j<apos+alen; 
j++) {
-                                                       double val = 
op.executeScalar(avals[j]);
-                                                       ret.appendValue(r, 
aix[j], val);
-                                               }
+                                       
+                                       for(int j=apos; j<apos+alen; j++) {
+                                               double val = 
op.executeScalar(avals[j]);
+                                               c.append(r, aix[j], val);
+                                               nnz += (val != 0) ? 1 : 0; 
                                        }
                                }
                        }
+                       ret.nonZeros = nnz;
                }
                else //DENSE <- DENSE
                {
@@ -992,12 +1004,12 @@ public class LibMatrixBincell
                        double[] c = ret.denseBlock;
                        
                        int limit = m1.rlen*m1.clen;
-                       for( int i=0; i<limit; i++ )
-                       {
+                       int nnz = 0;
+                       for( int i=0; i<limit; i++ ) {
                                c[i] = op.executeScalar( a[i] );
-                               if( c[i] != 0 )
-                                       ret.nonZeros++;
+                               nnz += (c[i] != 0) ? 1 : 0;
                        }
+                       ret.nonZeros = nnz;
                }
                
        }
@@ -1040,8 +1052,8 @@ public class LibMatrixBincell
                        Arrays.fill(c, cval0);
                        
                        //compute non-zero input values
-                       for(int i=0, cix=0; i<m; i++, cix+=n) 
-                       {
+                       int nnz = m*n;
+                       for(int i=0, cix=0; i<m; i++, cix+=n) {
                                if( !a.isEmpty(i) ) {
                                        int apos = a.pos(i);
                                        int alen = a.size(i);
@@ -1050,12 +1062,11 @@ public class LibMatrixBincell
                                        for(int j=apos; j<apos+alen; j++) {
                                                double val = 
op.executeScalar(avals[j]);
                                                c[ cix+aix[j] ] = val;
+                                               nnz -= (val==0) ? 1 : 0;
                                        }
                                }
                        }
-               
-                       //recompute non zeros 
-                       ret.recomputeNonZeros();
+                       ret.nonZeros = nnz;
                }
                else //DENSE MATRIX
                {
@@ -1067,12 +1078,12 @@ public class LibMatrixBincell
                        
                        //compute scalar operation, incl nnz maintenance
                        int limit = m1.rlen*m1.clen;
-                       for( int i=0; i<limit; i++ )
-                       {
+                       int nnz = 0;
+                       for( int i=0; i<limit; i++ ) {
                                c[i] = op.executeScalar( a[i] );
-                               if( c[i] != 0 )
-                                       ret.nonZeros++;
+                               nnz += (c[i] != 0) ? 1 : 0;
                        }
+                       ret.nonZeros = nnz;
                }
        }
 
@@ -1270,21 +1281,18 @@ public class LibMatrixBincell
                while( p1<size1 && p2< size2 )
                {
                        double value = 0;
-                       if(cols1[pos1+p1]<cols2[pos2+p2])
-                       {
+                       if(cols1[pos1+p1]<cols2[pos2+p2]) {
                                value = op.fn.execute(values1[pos1+p1], 0);
                                column = cols1[pos1+p1];
                                p1++;
                        }
-                       else if(cols1[pos1+p1]==cols2[pos2+p2])
-                       {
+                       else if(cols1[pos1+p1]==cols2[pos2+p2]) {
                                value = op.fn.execute(values1[pos1+p1], 
values2[pos2+p2]);
                                column = cols1[pos1+p1];
                                p1++;
                                p2++;
                        }
-                       else
-                       {
+                       else {
                                value = op.fn.execute(0, values2[pos2+p2]);
                                column = cols2[pos2+p2];
                                p2++;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a528f5e4/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 452eddb..5884768 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -2901,24 +2901,48 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                final int m = rlen;
                final int n = clen;
                
-               if( sparse ) //SPARSE <- SPARSE
+               if( sparse && ret.sparse ) //SPARSE <- SPARSE
                {
+                       ret.allocateSparseRowsBlock();
                        SparseBlock a = sparseBlock;
+                       SparseBlock c = ret.sparseBlock;
+               
+                       long nnz = 0;
+                       for(int i=0; i<m; i++) {
+                               if( a.isEmpty(i) ) continue;
+                               
+                               int apos = a.pos(i);
+                               int alen = a.size(i);
+                               int[] aix = a.indexes(i);
+                               double[] avals = a.values(i);
+                               
+                               c.allocate(i, alen); //avoid repeated alloc
+                               for( int j=apos; j<apos+alen; j++ ) {
+                                       double val = op.fn.execute(avals[j]);
+                                       c.append(i, aix[j], val);
+                                       nnz += (val != 0) ? 1 : 0;
+                               }
+                       }
+                       ret.nonZeros = nnz;
+               }
+               else if( sparse ) //DENSE <- SPARSE
+               {
+                       SparseBlock a = sparseBlock;                    
                        
                        for(int i=0; i<m; i++) {
-                               if( !a.isEmpty(i) )
-                               {
-                                       int apos = a.pos(i);
-                                       int alen = a.size(i);
-                                       int[] aix = a.indexes(i);
-                                       double[] avals = a.values(i);
-                                       
-                                       for( int j=apos; j<apos+alen; j++ ) {
-                                               double val = 
op.fn.execute(avals[j]);
-                                               ret.appendValue(i, aix[j], val);
-                                       }
+                               if( a.isEmpty(i) ) continue;
+                       
+                               int apos = a.pos(i);
+                               int alen = a.size(i);
+                               int[] aix = a.indexes(i);
+                               double[] avals = a.values(i);
+                               
+                               for( int j=apos; j<apos+alen; j++ ) {
+                                       double val = op.fn.execute(avals[j]);
+                                       ret.appendValue(i, aix[j], val);
                                }
                        }
+                       //nnz maintained on appendValue
                }
                else //DENSE <- DENSE
                {
@@ -2926,13 +2950,15 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                        ret.allocateDenseBlock();                               
                
                        double[] a = denseBlock;
                        double[] c = ret.denseBlock;
+                       int len = m * n;
                        
                        //unary op, incl nnz maintenance
-                       int len = m*n;
+                       int nnz = 0;
                        for( int i=0; i<len; i++ ) {
                                c[i] = op.fn.execute(a[i]);
-                               ret.nonZeros += (c[i] != 0) ? 1 : 0;
-                       }                       
+                               nnz += (c[i] != 0) ? 1 : 0;
+                       }
+                       ret.nonZeros = nnz;
                }
        }
        

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a528f5e4/src/main/java/org/apache/sysml/runtime/matrix/operators/ScalarOperator.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/operators/ScalarOperator.java 
b/src/main/java/org/apache/sysml/runtime/matrix/operators/ScalarOperator.java
index 4a45569..75891d3 100644
--- 
a/src/main/java/org/apache/sysml/runtime/matrix/operators/ScalarOperator.java
+++ 
b/src/main/java/org/apache/sysml/runtime/matrix/operators/ScalarOperator.java
@@ -52,32 +52,23 @@ public class ScalarOperator  extends Operator
                
                //as long as (0 op v)=0, then op is sparsesafe
                //note: additional functionobjects might qualify according to 
constant
-               if(   fn instanceof Multiply || fn instanceof Multiply2 
-                  || fn instanceof Power || fn instanceof Power2 
-                  || fn instanceof And || fn instanceof MinusNz
-                  || (fn instanceof Builtin && 
((Builtin)fn).getBuiltinFunctionCode()==BuiltinFunctionCode.LOG_NZ)) 
-               {
-                       sparseSafe=true;
-               }
-               else
-               {
-                       sparseSafe=false;
-               }
+               sparseSafe = (fn instanceof Multiply || fn instanceof Multiply2 
+                               || fn instanceof Power || fn instanceof Power2 
+                               || fn instanceof And || fn instanceof MinusNz
+                               || (fn instanceof Builtin && 
((Builtin)fn).getBuiltinFunctionCode()==BuiltinFunctionCode.LOG_NZ));
        }
        
-       public double getConstant()
-       {
+       public double getConstant() {
                return _constant;
        }
        
-       public void setConstant(double cst) 
-       {
+       public void setConstant(double cst) {
                //set constant
                _constant = cst;
                
                //revisit sparse safe decision according to known constant
                //note: there would be even more potential if we take 
left/right op into account
-               if(    fn instanceof Multiply || fn instanceof Multiply2 
+               sparseSafe = ( fn instanceof Multiply || fn instanceof 
Multiply2 
                        || fn instanceof Power || fn instanceof Power2 
                        || fn instanceof And || fn instanceof MinusNz
                        || fn instanceof Builtin && 
((Builtin)fn).getBuiltinFunctionCode()==BuiltinFunctionCode.LOG_NZ
@@ -88,14 +79,7 @@ public class ScalarOperator  extends Operator
                        || (fn instanceof Minus && _constant==0)
                        || (fn instanceof Minus && _constant==0)
                        || (fn instanceof Builtin && 
((Builtin)fn).getBuiltinFunctionCode()==BuiltinFunctionCode.MAX && _constant<=0)
-                       || (fn instanceof Builtin && 
((Builtin)fn).getBuiltinFunctionCode()==BuiltinFunctionCode.MIN && 
_constant>=0))
-               {
-                       sparseSafe = true;
-               }
-               else
-               {
-                       sparseSafe = false;
-               }
+                       || (fn instanceof Builtin && 
((Builtin)fn).getBuiltinFunctionCode()==BuiltinFunctionCode.MIN && 
_constant>=0));
        }
        
        public double executeScalar(double in) throws DMLRuntimeException {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a528f5e4/src/main/java/org/apache/sysml/runtime/matrix/operators/UnaryOperator.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/operators/UnaryOperator.java 
b/src/main/java/org/apache/sysml/runtime/matrix/operators/UnaryOperator.java
index 2f65fb4..a736c8b 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/operators/UnaryOperator.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/operators/UnaryOperator.java
@@ -40,17 +40,13 @@ public class UnaryOperator extends Operator
                sparseSafe = false;
                k = numThreads;
                
-               if(fn instanceof Builtin)
-               {
+               if( fn instanceof Builtin ) {
                        Builtin f=(Builtin)fn;
-                       if(f.bFunc==Builtin.BuiltinFunctionCode.SIN || 
f.bFunc==Builtin.BuiltinFunctionCode.TAN 
+                       sparseSafe = (f.bFunc==Builtin.BuiltinFunctionCode.SIN 
|| f.bFunc==Builtin.BuiltinFunctionCode.TAN 
                                        || 
f.bFunc==Builtin.BuiltinFunctionCode.ROUND || 
f.bFunc==Builtin.BuiltinFunctionCode.ABS
                                        || 
f.bFunc==Builtin.BuiltinFunctionCode.SQRT || 
f.bFunc==Builtin.BuiltinFunctionCode.SPROP
                                        || 
f.bFunc==Builtin.BuiltinFunctionCode.SELP || 
f.bFunc==Builtin.BuiltinFunctionCode.LOG_NZ
-                                       || 
f.bFunc==Builtin.BuiltinFunctionCode.SIGN )
-                       {
-                               sparseSafe = true;
-                       }
+                                       || 
f.bFunc==Builtin.BuiltinFunctionCode.SIGN );
                }
        }
        

Reply via email to