Repository: incubator-systemml
Updated Branches:
  refs/heads/master c697c30eb -> ab1c70508


[SYSTEMML-1592] Improved codegen cellwise operations w/ sparse outputs

This patch improves codegen cellwise operations with the ability to
directly create sparse outputs instead of dense intermediates and a
subsequent conversion to sparse. We apply this for sparse inputs with
sparse-safe cellwise operations. Furthermore, this also includes a
refactoring of the cellwise operator skeleton for better instruction
locality.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ab1c7050
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ab1c7050
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ab1c7050

Branch: refs/heads/master
Commit: ab1c7050890bcfc7032c33df3c3d8244a906c8a3
Parents: c697c30
Author: Matthias Boehm <[email protected]>
Authored: Thu May 25 20:40:52 2017 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Thu May 25 20:40:52 2017 -0700

----------------------------------------------------------------------
 .../sysml/runtime/codegen/SpoofCellwise.java    | 607 ++++++++++---------
 1 file changed, 335 insertions(+), 272 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ab1c7050/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
index b01914a..fe23a04 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
@@ -188,28 +188,28 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                        k = 1; //serial execution
                }
                
-               //result allocation and preparations
-               out.reset(inputs.get(0).getNumRows(), _type == CellType.NO_AGG 
? 
-                               inputs.get(0).getNumColumns() : 1, false);
-               out.allocateDenseBlock();
-               double[] c = out.getDenseBlock();
-               
                //input preparation
                double[][] b = prepInputMatrices(inputs);
                double[] scalars = prepInputScalars(scalarObjects);
                final int m = inputs.get(0).getNumRows();
-               final int n = inputs.get(0).getNumColumns();            
+               final int n = inputs.get(0).getNumColumns();
                
                //sparse safe check 
-               boolean sparseSafe = isSparseSafe() || (b.length == 0 
+               boolean sparseSafe = isSparseSafe() || (b.length == 0
                                && genexec( 0, b, scalars, m, n, 0, 0 ) == 0);
                
+               //result allocation and preparations
+               boolean sparseOut = sparseSafe && 
inputs.get(0).isInSparseFormat() && _type == CellType.NO_AGG;
+               out.reset(inputs.get(0).getNumRows(), _type == CellType.NO_AGG ?
+                               inputs.get(0).getNumColumns() : 1, sparseOut);
+               out.allocateDenseOrSparseBlock();
+               
                long lnnz = 0;
                if( k <= 1 ) //SINGLE-THREADED
                {
                        lnnz = (!inputs.get(0).isInSparseFormat()) ?
-                               executeDense(inputs.get(0).getDenseBlock(), b, 
scalars, c, m, n, sparseSafe, 0, m) :
-                               executeSparse(inputs.get(0).getSparseBlock(), 
b, scalars, c, m, n, sparseSafe, 0, m);
+                               executeDense(inputs.get(0).getDenseBlock(), b, 
scalars, out, m, n, sparseSafe, 0, m) :
+                               executeSparse(inputs.get(0).getSparseBlock(), 
b, scalars, out, m, n, sparseSafe, 0, m);
                }
                else  //MULTI-THREADED
                {
@@ -219,7 +219,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                int nk = 
UtilFunctions.roundToNext(Math.min(8*k,m/32), k);
                                int blklen = (int)(Math.ceil((double)m/nk));
                                for( int i=0; i<nk & i*blklen<m; i++ )
-                                       tasks.add(new 
ParExecTask(inputs.get(0), b, scalars, c, 
+                                       tasks.add(new 
ParExecTask(inputs.get(0), b, scalars, out, 
                                                m, n, sparseSafe, i*blklen, 
Math.min((i+1)*blklen, m))); 
                                //execute tasks
                                List<Future<Long>> taskret = 
pool.invokeAll(tasks);     
@@ -235,129 +235,56 @@ public abstract class SpoofCellwise extends 
SpoofOperator implements Serializabl
                }
                
                //post-processing
-               out.setNonZeros(lnnz);  
-               out.examSparsity();     
+               out.setNonZeros(lnnz);
+               out.examSparsity();
        }
        
-       private double executeDenseAndAgg(double[] a, double[][] b, double[] 
scalars, int m, int n, boolean sparseSafe, int rl, int ru) throws 
DMLRuntimeException 
+       private long executeDense(double[] a, double[][] b, double[] scalars, 
MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) 
+               throws DMLRuntimeException 
        {
-               ValueFunction vfun = getAggFunction();
-               double ret = 0;
+               double[] c = out.getDenseBlock();
                
-               //numerically stable aggregation for sum/sum_sq
-               if( vfun instanceof KahanFunction ) {
-                       KahanObject kbuff = new KahanObject(0, 0);
-                       KahanFunction kplus = (KahanFunction) vfun;
-                       
-                       if( a == null && !sparseSafe ) { //empty
-                               for( int i=rl; i<ru; i++ ) 
-                                       for( int j=0; j<n; j++ )
-                                               kplus.execute2(kbuff, genexec( 
0, b, scalars, m, n, i, j ));
-                       }
-                       else if( a != null ) { //general case
-                               for( int i=rl, ix=rl*n; i<ru; i++ ) 
-                                       for( int j=0; j<n; j++, ix++ )
-                                               if( a[ix] != 0 || !sparseSafe)
-                                                       kplus.execute2(kbuff, 
genexec( a[ix], b, scalars, m, n, i, j ));
-                       }
-                       ret = kbuff._sum;
+               if( _type == CellType.NO_AGG ) {
+                       return executeDenseNoAgg(a, b, scalars, c, m, n, 
sparseSafe, rl, ru);
                }
-               //safe aggregation for min/max w/ handling of zero entries
-               //note: sparse safe with zero value as min/max handled outside
-               else {
-                       ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : 
-Double.MAX_VALUE; 
-                       if( a == null && !sparseSafe ) { //empty
-                               for( int i=rl; i<ru; i++ ) 
-                                       for( int j=0; j<n; j++ )
-                                               ret = vfun.execute(ret, 
genexec( 0, b, scalars, m, n, i, j ));
-                       }
-                       else if( a != null ) { //general case
-                               for( int i=rl, ix=rl*n; i<ru; i++ ) 
-                                       for( int j=0; j<n; j++, ix++ )
-                                               if( a[ix] != 0 || !sparseSafe)
-                                                       ret = vfun.execute(ret, 
genexec( a[ix], b, scalars, m, n, i, j ));
-                       }
+               else if( _type == CellType.ROW_AGG ) {
+                       if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
+                               return executeDenseRowAggSum(a, b, scalars, c, 
m, n, sparseSafe, rl, ru);
+                       else
+                               return executeDenseRowAggMxx(a, b, scalars, c, 
m, n, sparseSafe, rl, ru);
                }
-               
-               return ret;
+               return -1;
        }
        
-       private long executeDense(double[] a, double[][] b, double[] scalars, 
double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
+       private double executeDenseAndAgg(double[] a, double[][] b, double[] 
scalars, int m, int n, boolean sparseSafe, int rl, int ru) throws 
DMLRuntimeException 
+       {
+               //numerically stable aggregation for sum/sum_sq
+               if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
+                       return executeDenseAggSum(a, b, scalars, m, n, 
sparseSafe, rl, ru);
+               else
+                       return executeDenseAggMxx(a, b, scalars, m, n, 
sparseSafe, rl, ru);
+       }
+       
+       private long executeSparse(SparseBlock sblock, double[][] b, double[] 
scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) 
                throws DMLRuntimeException 
        {
-               long lnnz = 0;
+               if( sparseSafe && sblock == null )
+                       return 0;
                
-               if( _type == CellType.NO_AGG )
-               {
-                       if( a == null && !sparseSafe ) { //empty
-                               //note: we can't determine sparse-safeness by 
executing the operator once 
-                               //as the output might change with different row 
indices
-                               for( int i=rl, ix=rl*n; i<ru; i++ ) 
-                                       for( int j=0; j<n; j++, ix++ ) {
-                                               c[ix] = genexec( 0, b, scalars, 
m, n, i, j ); 
-                                               lnnz += (c[ix]!=0) ? 1 : 0;
-                                       }
-                       }
-                       else if( a != null ) { //general case
-                               for( int i=rl, ix=rl*n; i<ru; i++ ) 
-                                       for( int j=0; j<n; j++, ix++ ) 
-                                               if( a[ix] != 0 || !sparseSafe) {
-                                                       c[ix] = genexec( a[ix], 
b, scalars, m, n, i, j); 
-                                                       lnnz += (c[ix]!=0) ? 1 
: 0;
-                                               }
-                       }
+               if( _type == CellType.NO_AGG ) {
+                       if( out.isInSparseFormat() ) 
+                               return executeSparseNoAggSparse(sblock, b, 
scalars, out, m, n, sparseSafe, rl, ru);
+                       else
+                               return executeSparseNoAggDense(sblock, b, 
scalars, out, m, n, sparseSafe, rl, ru);
                }
-               else if( _type == CellType.ROW_AGG )
-               {
-                       ValueFunction vfun = getAggFunction();
-                       
-                       if( vfun instanceof KahanFunction ) {
-                               KahanObject kbuff = new KahanObject(0, 0);
-                               KahanFunction kplus = (KahanFunction) vfun;
-       
-                               if( a == null && !sparseSafe ) { //empty
-                                       for( int i=rl; i<ru; i++ ) { 
-                                               kbuff.set(0, 0);
-                                               for( int j=0; j<n; j++ )
-                                                       kplus.execute2(kbuff, 
genexec( 0, b, scalars, m, n, i, j ));
-                                               lnnz += ((c[i] = 
kbuff._sum)!=0) ? 1 : 0;
-                                       }
-                               }
-                               else if( a != null ) { //general case
-                                       for( int i=rl, ix=rl*n; i<ru; i++ ) {
-                                               kbuff.set(0, 0);
-                                               for( int j=0; j<n; j++, ix++ )
-                                                       if( a[ix] != 0 || 
!sparseSafe)
-                                                               
kplus.execute2(kbuff, genexec( a[ix], b, scalars, m, n, i, j ));
-                                               lnnz += ((c[i] = 
kbuff._sum)!=0) ? 1 : 0;
-                                       }
-                               }
-                       }
-                       else {
-                               double initialVal = (_aggOp==AggOp.MIN) ? 
Double.MAX_VALUE : -Double.MAX_VALUE;
-                               if( a == null && !sparseSafe ) { //empty
-                                       for( int i=rl; i<ru; i++ ) { 
-                                               double tmp = initialVal;
-                                               for( int j=0; j<n; j++ )
-                                                       tmp = vfun.execute(tmp, 
genexec( 0, b, scalars, m, n, i, j ));
-                                               lnnz += ((c[i] = tmp)!=0) ? 1 : 
0;
-                                       }
-                               }
-                               else if( a != null ) { //general case
-                                       for( int i=rl, ix=rl*n; i<ru; i++ ) {
-                                               double tmp = initialVal;
-                                               for( int j=0; j<n; j++, ix++ )
-                                                       if( a[ix] != 0 || 
!sparseSafe)
-                                                               tmp = 
vfun.execute(tmp, genexec( a[ix], b, scalars, m, n, i, j ));
-                                               if( sparseSafe && 
UtilFunctions.containsZero(a, ix-n, n) )
-                                                       tmp = vfun.execute(tmp, 
0);
-                                               lnnz += ((c[i] = tmp)!=0) ? 1 : 
0;
-                                       }
-                               }
-                       }
+               else if( _type == CellType.ROW_AGG ) {
+                       if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
+                               return executeSparseRowAggSum(sblock, b, 
scalars, out, m, n, sparseSafe, rl, ru);
+                       else
+                               return executeSparseRowAggMxx(sblock, b, 
scalars, out, m, n, sparseSafe, rl, ru);
                }
                
-               return lnnz;
+               return -1;
        }
        
        private double executeSparseAndAgg(SparseBlock sblock, double[][] b, 
double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) 
@@ -366,184 +293,320 @@ public abstract class SpoofCellwise extends 
SpoofOperator implements Serializabl
                if( sparseSafe && sblock == null )
                        return 0;
                
-               ValueFunction vfun = getAggFunction();
-               double ret = 0;
+               if( _aggOp == AggOp.SUM || _aggOp == AggOp.SUM_SQ )
+                       return executeSparseAggSum(sblock, b, scalars, m, n, 
sparseSafe, rl, ru);
+               else
+                       return executeSparseAggMxx(sblock, b, scalars, m, n, 
sparseSafe, rl, ru);
+       }
+       
+       private long executeSparseNoAggSparse(SparseBlock sblock, double[][] b, 
double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, 
int ru) 
+               throws DMLRuntimeException 
+       {
+               //note: sequential scan algorithm for both sparse-safe and 
-unsafe 
+               //in order to avoid binary search for sparse-unsafe 
+               SparseBlock c = out.getSparseBlock();
+               long lnnz = 0;
+               for(int i=rl; i<ru; i++) {
+                       int lastj = -1;
+                       //handle non-empty rows
+                       if( sblock != null && !sblock.isEmpty(i) ) {
+                               int apos = sblock.pos(i);
+                               int alen = sblock.size(i);
+                               int[] aix = sblock.indexes(i);
+                               double[] avals = sblock.values(i);
+                               c.allocate(i, sparseSafe ? alen : n);
+                               for(int k=apos; k<apos+alen; k++) {
+                                       //process zeros before current non-zero
+                                       if( !sparseSafe )
+                                               for(int j=lastj+1; j<aix[k]; 
j++)
+                                                       c.append(i, j, 
genexec(0, b, scalars, m, n, i, j));
+                                       //process current non-zero
+                                       lastj = aix[k];
+                                       c.append(i, lastj, genexec(avals[k], b, 
scalars, m, n, i, lastj));
+                               }
+                       }
+                       //process empty rows or remaining zeros
+                       if( !sparseSafe )
+                               for(int j=lastj+1; j<n; j++)
+                                       c.append(i, j, genexec(0, b, scalars, 
m, n, i, j));
+                       lnnz += c.size(i);
+               }
                
-               //numerically stable aggregation for sum/sum_sq
-               if( vfun instanceof KahanFunction ) {
-                       KahanObject kbuff = new KahanObject(0, 0);
-                       KahanFunction kplus = (KahanFunction) vfun;
+               return lnnz;
+       }
        
-                       //note: sequential scan algorithm for both sparse-safe 
and -unsafe 
-                       //in order to avoid binary search for sparse-unsafe 
-                       for(int i=rl; i<ru; i++) {
-                               int lastj = -1;
-                               //handle non-empty rows
-                               if( sblock != null && !sblock.isEmpty(i) ) {
-                                       int apos = sblock.pos(i);
-                                       int alen = sblock.size(i);
-                                       int[] aix = sblock.indexes(i);
-                                       double[] avals = sblock.values(i);
-                                       for(int k=apos; k<apos+alen; k++) {
-                                               //process zeros before current 
non-zero
-                                               if( !sparseSafe )
-                                                       for(int j=lastj+1; 
j<aix[k]; j++)
-                                                               
kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
-                                               //process current non-zero
-                                               lastj = aix[k];
-                                               kplus.execute2(kbuff, 
genexec(avals[k], b, scalars, m, n, i, lastj));
-                                       }
+       private long executeSparseNoAggDense(SparseBlock sblock, double[][] b, 
double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, 
int ru) 
+               throws DMLRuntimeException 
+       {
+               //note: sequential scan algorithm for both sparse-safe and 
-unsafe 
+               //in order to avoid binary search for sparse-unsafe 
+               double[] c = out.getDenseBlock();
+               long lnnz = 0;
+               for(int i=rl, cix=rl*n; i<ru; i++, cix+=n) {
+                       int lastj = -1;
+                       //handle non-empty rows
+                       if( sblock != null && !sblock.isEmpty(i) ) {
+                               int apos = sblock.pos(i);
+                               int alen = sblock.size(i);
+                               int[] aix = sblock.indexes(i);
+                               double[] avals = sblock.values(i);
+                               for(int k=apos; k<apos+alen; k++) {
+                                       //process zeros before current non-zero
+                                       if( !sparseSafe )
+                                               for(int j=lastj+1; j<aix[k]; 
j++)
+                                                       lnnz += 
((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
+                                       //process current non-zero
+                                       lastj = aix[k];
+                                       lnnz += 
((c[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0;
                                }
-                               //process empty rows or remaining zeros
-                               if( !sparseSafe )
-                                       for(int j=lastj+1; j<n; j++)
-                                               kplus.execute2(kbuff, 
genexec(0, b, scalars, m, n, i, j));
                        }
-                       ret = kbuff._sum;
+                       //process empty rows or remaining zeros
+                       if( !sparseSafe )
+                               for(int j=lastj+1; j<n; j++)
+                                       lnnz += ((c[cix+j]=genexec(0, b, 
scalars, m, n, i, j))!=0)?1:0;
                }
-               //safe aggregation for min/max w/ handling of zero entries
-               //note: sparse safe with zero value as min/max handled outside
-               else {
-                       ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : 
-Double.MAX_VALUE; 
-                       ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : 
ret;
-                       
-                       //note: sequential scan algorithm for both sparse-safe 
and -unsafe 
-                       //in order to avoid binary search for sparse-unsafe 
-                       for(int i=rl; i<ru; i++) {
-                               int lastj = -1;
-                               //handle non-empty rows
-                               if( sblock != null && !sblock.isEmpty(i) ) {
-                                       int apos = sblock.pos(i);
-                                       int alen = sblock.size(i);
-                                       int[] aix = sblock.indexes(i);
-                                       double[] avals = sblock.values(i);
-                                       for(int k=apos; k<apos+alen; k++) {
-                                               //process zeros before current 
non-zero
-                                               if( !sparseSafe )
-                                                       for(int j=lastj+1; 
j<aix[k]; j++)
-                                                               ret = 
vfun.execute(ret, genexec(0, b, scalars, m, n, i, j));
-                                               //process current non-zero
-                                               lastj = aix[k];
-                                               ret = vfun.execute(ret, 
genexec(avals[k], b, scalars, m, n, i, lastj));
-                                       }
+               return lnnz;
+       }
+       
+       private long executeSparseRowAggSum(SparseBlock sblock, double[][] b, 
double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, 
int ru) 
+               throws DMLRuntimeException 
+       {
+               KahanFunction kplus = (KahanFunction) getAggFunction();
+               KahanObject kbuff = new KahanObject(0, 0);
+
+               //note: sequential scan algorithm for both sparse-safe and 
-unsafe 
+               //in order to avoid binary search for sparse-unsafe 
+               double[] c = out.getDenseBlock();
+               long lnnz = 0;
+               for(int i=rl; i<ru; i++) {
+                       kbuff.set(0, 0);
+                       int lastj = -1;
+                       //handle non-empty rows
+                       if( sblock != null && !sblock.isEmpty(i) ) {
+                               int apos = sblock.pos(i);
+                               int alen = sblock.size(i);
+                               int[] aix = sblock.indexes(i);
+                               double[] avals = sblock.values(i);
+                               for(int k=apos; k<apos+alen; k++) {
+                                       //process zeros before current non-zero
+                                       if( !sparseSafe )
+                                               for(int j=lastj+1; j<aix[k]; 
j++)
+                                                       kplus.execute2(kbuff, 
genexec(0, b, scalars, m, n, i, j));
+                                       //process current non-zero
+                                       lastj = aix[k];
+                                       kplus.execute2(kbuff, genexec(avals[k], 
b, scalars, m, n, i, lastj));
                                }
-                               //process empty rows or remaining zeros
-                               if( !sparseSafe )
-                                       for(int j=lastj+1; j<n; j++)
-                                               ret = vfun.execute(ret, 
genexec(0, b, scalars, m, n, i, j));
                        }
+                       //process empty rows or remaining zeros
+                       if( !sparseSafe )
+                               for(int j=lastj+1; j<n; j++)
+                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, i, j));
+                       lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
                }
-               
-               return ret;
+               return lnnz;
        }
        
-       private long executeSparse(SparseBlock sblock, double[][] b, double[] 
scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
+       private long executeSparseRowAggMxx(SparseBlock sblock, double[][] b, 
double[] scalars, MatrixBlock out, int m, int n, boolean sparseSafe, int rl, 
int ru) 
                throws DMLRuntimeException 
        {
-               if( sparseSafe && sblock == null )
-                       return 0;
+               double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : 
-Double.MAX_VALUE;
+               ValueFunction vfun = getAggFunction();
                
+               //note: sequential scan algorithm for both sparse-safe and 
-unsafe 
+               //in order to avoid binary search for sparse-unsafe 
+               double[] c = out.getDenseBlock();
                long lnnz = 0;
-               if( _type == CellType.NO_AGG )
-               {
-                       //note: sequential scan algorithm for both sparse-safe 
and -unsafe 
-                       //in order to avoid binary search for sparse-unsafe 
-                       for(int i=rl, cix=rl*n; i<ru; i++, cix+=n) {
-                               int lastj = -1;
-                               //handle non-empty rows
-                               if( sblock != null && !sblock.isEmpty(i) ) {
-                                       int apos = sblock.pos(i);
-                                       int alen = sblock.size(i);
-                                       int[] aix = sblock.indexes(i);
-                                       double[] avals = sblock.values(i);
-                                       for(int k=apos; k<apos+alen; k++) {
-                                               //process zeros before current 
non-zero
-                                               if( !sparseSafe )
-                                                       for(int j=lastj+1; 
j<aix[k]; j++)
-                                                               lnnz += 
((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
-                                               //process current non-zero
-                                               lastj = aix[k];
-                                               lnnz += 
((c[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0;
-                                       }
+               for(int i=rl; i<ru; i++) {
+                       double tmp = (sparseSafe && sblock.size(i) < n) ? 0 : 
initialVal;
+                       int lastj = -1;
+                       //handle non-empty rows
+                       if( sblock != null && !sblock.isEmpty(i) ) {
+                               int apos = sblock.pos(i);
+                               int alen = sblock.size(i);
+                               int[] aix = sblock.indexes(i);
+                               double[] avals = sblock.values(i);
+                               for(int k=apos; k<apos+alen; k++) {
+                                       //process zeros before current non-zero
+                                       if( !sparseSafe )
+                                               for(int j=lastj+1; j<aix[k]; 
j++)
+                                                       tmp = vfun.execute(tmp, 
genexec(0, b, scalars, m, n, i, j));
+                                       //process current non-zero
+                                       lastj = aix[k];
+                                       tmp = vfun.execute( tmp, 
genexec(avals[k], b, scalars, m, n, i, lastj));
                                }
-                               //process empty rows or remaining zeros
-                               if( !sparseSafe )
-                                       for(int j=lastj+1; j<n; j++)
-                                               lnnz += ((c[cix+j]=genexec(0, 
b, scalars, m, n, i, j))!=0)?1:0;
                        }
+                       //process empty rows or remaining zeros
+                       if( !sparseSafe )
+                               for(int j=lastj+1; j<n; j++)
+                                       tmp = vfun.execute(tmp, genexec(0, b, 
scalars, m, n, i, j));
+                       lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
                }
-               else if( _type == CellType.ROW_AGG ) 
-               {
-                       ValueFunction vfun = getAggFunction();
-
-                       if( vfun instanceof KahanFunction ) {
-                               KahanObject kbuff = new KahanObject(0, 0);
-                               KahanFunction kplus = (KahanFunction) vfun;
+               return lnnz;
+       }
+       
+       private double executeSparseAggSum(SparseBlock sblock, double[][] b, 
double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) 
+               throws DMLRuntimeException 
+       {
+               KahanFunction kplus = (KahanFunction) getAggFunction();
+               KahanObject kbuff = new KahanObject(0, 0);
 
-                               //note: sequential scan algorithm for both 
sparse-safe and -unsafe 
-                               //in order to avoid binary search for 
sparse-unsafe 
-                               for(int i=rl; i<ru; i++) {
-                                       kbuff.set(0, 0);
-                                       int lastj = -1;
-                                       //handle non-empty rows
-                                       if( sblock != null && 
!sblock.isEmpty(i) ) {
-                                               int apos = sblock.pos(i);
-                                               int alen = sblock.size(i);
-                                               int[] aix = sblock.indexes(i);
-                                               double[] avals = 
sblock.values(i);
-                                               for(int k=apos; k<apos+alen; 
k++) {
-                                                       //process zeros before 
current non-zero
-                                                       if( !sparseSafe )
-                                                               for(int 
j=lastj+1; j<aix[k]; j++)
-                                                                       
kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j));
-                                                       //process current 
non-zero
-                                                       lastj = aix[k];
-                                                       kplus.execute2(kbuff, 
genexec(avals[k], b, scalars, m, n, i, lastj));
-                                               }
-                                       }
-                                       //process empty rows or remaining zeros
+               //note: sequential scan algorithm for both sparse-safe and 
-unsafe 
+               //in order to avoid binary search for sparse-unsafe 
+               for(int i=rl; i<ru; i++) {
+                       int lastj = -1;
+                       //handle non-empty rows
+                       if( sblock != null && !sblock.isEmpty(i) ) {
+                               int apos = sblock.pos(i);
+                               int alen = sblock.size(i);
+                               int[] aix = sblock.indexes(i);
+                               double[] avals = sblock.values(i);
+                               for(int k=apos; k<apos+alen; k++) {
+                                       //process zeros before current non-zero
                                        if( !sparseSafe )
-                                               for(int j=lastj+1; j<n; j++)
+                                               for(int j=lastj+1; j<aix[k]; 
j++)
                                                        kplus.execute2(kbuff, 
genexec(0, b, scalars, m, n, i, j));
-                                       lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 
0;
+                                       //process current non-zero
+                                       lastj = aix[k];
+                                       kplus.execute2(kbuff, genexec(avals[k], 
b, scalars, m, n, i, lastj));
                                }
                        }
-                       else {
-                               double initialVal = (_aggOp==AggOp.MIN) ? 
Double.MAX_VALUE : -Double.MAX_VALUE;
-                               
-                               //note: sequential scan algorithm for both 
sparse-safe and -unsafe 
-                               //in order to avoid binary search for 
sparse-unsafe 
-                               for(int i=rl; i<ru; i++) {
-                                       double tmp = (sparseSafe && 
sblock.size(i) < n) ? 0 : initialVal;
-                                       int lastj = -1;
-                                       //handle non-empty rows
-                                       if( sblock != null && 
!sblock.isEmpty(i) ) {
-                                               int apos = sblock.pos(i);
-                                               int alen = sblock.size(i);
-                                               int[] aix = sblock.indexes(i);
-                                               double[] avals = 
sblock.values(i);
-                                               for(int k=apos; k<apos+alen; 
k++) {
-                                                       //process zeros before 
current non-zero
-                                                       if( !sparseSafe )
-                                                               for(int 
j=lastj+1; j<aix[k]; j++)
-                                                                       tmp = 
vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j));
-                                                       //process current 
non-zero
-                                                       lastj = aix[k];
-                                                       tmp = vfun.execute( 
tmp, genexec(avals[k], b, scalars, m, n, i, lastj));
-                                               }
-                                       }
-                                       //process empty rows or remaining zeros
+                       //process empty rows or remaining zeros
+                       if( !sparseSafe )
+                               for(int j=lastj+1; j<n; j++)
+                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, i, j));
+               }
+               return kbuff._sum;
+       }
+       
+       private double executeSparseAggMxx(SparseBlock sblock, double[][] b, 
double[] scalars, int m, int n, boolean sparseSafe, int rl, int ru) 
+               throws DMLRuntimeException 
+       {
+               double ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : 
-Double.MAX_VALUE;
+               ret = (sparseSafe && sblock.size() < (long)m*n) ? 0 : ret;
+               ValueFunction vfun = getAggFunction();
+               
+               //note: sequential scan algorithm for both sparse-safe and 
-unsafe 
+               //in order to avoid binary search for sparse-unsafe 
+               for(int i=rl; i<ru; i++) {
+                       int lastj = -1;
+                       //handle non-empty rows
+                       if( sblock != null && !sblock.isEmpty(i) ) {
+                               int apos = sblock.pos(i);
+                               int alen = sblock.size(i);
+                               int[] aix = sblock.indexes(i);
+                               double[] avals = sblock.values(i);
+                               for(int k=apos; k<apos+alen; k++) {
+                                       //process zeros before current non-zero
                                        if( !sparseSafe )
-                                               for(int j=lastj+1; j<n; j++)
-                                                       tmp = vfun.execute(tmp, 
genexec(0, b, scalars, m, n, i, j));
-                                       lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
+                                               for(int j=lastj+1; j<aix[k]; 
j++)
+                                                       ret = vfun.execute(ret, 
genexec(0, b, scalars, m, n, i, j));
+                                       //process current non-zero
+                                       lastj = aix[k];
+                                       ret = vfun.execute(ret, 
genexec(avals[k], b, scalars, m, n, i, lastj));
                                }
                        }
+                       //process empty rows or remaining zeros
+                       if( !sparseSafe )
+                               for(int j=lastj+1; j<n; j++)
+                                       ret = vfun.execute(ret, genexec(0, b, 
scalars, m, n, i, j));
                }
-               
+               return ret;
+       }
+       
+       private long executeDenseNoAgg(double[] a, double[][] b, double[] 
scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
+               throws DMLRuntimeException 
+       {
+               long lnnz = 0;
+               for( int i=rl, ix=rl*n; i<ru; i++ ) 
+                       for( int j=0; j<n; j++, ix++ ) {
+                               double aval = (a != null) ? a[ix] : 0;
+                               if( aval != 0 || !sparseSafe) {
+                                       c[ix] = genexec( aval, b, scalars, m, 
n, i, j); 
+                                       lnnz += (c[ix]!=0) ? 1 : 0;
+                               }
+                       }
                return lnnz;
        }
-
+       
+       private long executeDenseRowAggSum(double[] a, double[][] b, double[] 
scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
+               throws DMLRuntimeException 
+       {
+               KahanFunction kplus = (KahanFunction) getAggFunction();
+               KahanObject kbuff = new KahanObject(0, 0);
+               long lnnz = 0;
+               for( int i=rl, ix=rl*n; i<ru; i++ ) {
+                       kbuff.set(0, 0);
+                       for( int j=0; j<n; j++, ix++ ) {
+                               double aval = (a != null) ? a[ix] : 0;
+                               if( aval != 0 || !sparseSafe)
+                                       kplus.execute2(kbuff, genexec(aval, b, 
scalars, m, n, i, j));
+                       }
+                       lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
+               }
+               return lnnz;
+       }
+       
+       private long executeDenseRowAggMxx(double[] a, double[][] b, double[] 
scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) 
+               throws DMLRuntimeException 
+       {
+               double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : 
-Double.MAX_VALUE;
+               ValueFunction vfun = getAggFunction();
+               long lnnz = 0;
+               if( a == null && !sparseSafe ) { //empty
+                       for( int i=rl; i<ru; i++ ) { 
+                               double tmp = initialVal;
+                               for( int j=0; j<n; j++ )
+                                       tmp = vfun.execute(tmp, genexec( 0, b, 
scalars, m, n, i, j ));
+                               lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
+                       }
+               }
+               else if( a != null ) { //general case
+                       for( int i=rl, ix=rl*n; i<ru; i++ ) {
+                               double tmp = initialVal;
+                               for( int j=0; j<n; j++, ix++ )
+                                       if( a[ix] != 0 || !sparseSafe)
+                                               tmp = vfun.execute(tmp, 
genexec( a[ix], b, scalars, m, n, i, j ));
+                               if( sparseSafe && UtilFunctions.containsZero(a, 
ix-n, n) )
+                                       tmp = vfun.execute(tmp, 0);
+                               lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
+                       }
+               }
+               return lnnz;
+       }
+       
+       private double executeDenseAggSum(double[] a, double[][] b, double[] 
scalars, int m, int n, boolean sparseSafe, int rl, int ru) 
+               throws DMLRuntimeException 
+       {
+               KahanFunction kplus = (KahanFunction) getAggFunction();
+               KahanObject kbuff = new KahanObject(0, 0);
+               
+               for( int i=rl, ix=rl*n; i<ru; i++ ) 
+                       for( int j=0; j<n; j++, ix++ ) {
+                               double aval = (a != null) ? a[ix] : 0;
+                               if( aval != 0 || !sparseSafe)
+                                       kplus.execute2(kbuff, genexec(aval, b, 
scalars, m, n, i, j));
+                       }
+               return kbuff._sum;
+       }
+       
+       private double executeDenseAggMxx(double[] a, double[][] b, double[] 
scalars, int m, int n, boolean sparseSafe, int rl, int ru) 
+               throws DMLRuntimeException 
+       {
+               //safe aggregation for min/max w/ handling of zero entries
+               //note: sparse safe with zero value as min/max handled outside
+               double ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : 
-Double.MAX_VALUE; 
+               ValueFunction vfun = getAggFunction();
+               
+               for( int i=rl, ix=rl*n; i<ru; i++ ) 
+                       for( int j=0; j<n; j++, ix++ ) {
+                               double aval = (a != null) ? a[ix] : 0;
+                               if( aval != 0 || !sparseSafe)
+                                       ret = vfun.execute(ret, genexec(aval, 
b, scalars, m, n, i, j));
+                       }
+               return ret;
+       }
+       
+       
        protected abstract double genexec( double a, double[][] b, double[] 
scalars, int m, int n, int rowIndex, int colIndex);
        
        private class ParAggTask implements Callable<Double> 
@@ -582,14 +645,14 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                private final MatrixBlock _a;
                private final double[][] _b;
                private final double[] _scalars;
-               private final double[] _c;
+               private final MatrixBlock _c;
                private final int _rlen;
                private final int _clen;
                private final boolean _safe;
                private final int _rl;
                private final int _ru;
 
-               protected ParExecTask( MatrixBlock a, double[][] b, double[] 
scalars, double[] c, 
+               protected ParExecTask( MatrixBlock a, double[][] b, double[] 
scalars, MatrixBlock c, 
                                int rlen, int clen, boolean sparseSafe, int rl, 
int ru ) {
                        _a = a;
                        _b = b;

Reply via email to