Repository: systemml
Updated Branches:
  refs/heads/master 131b647b1 -> a918d5778


[SYSTEMML-2046] Large dense blocks in all codegen runtime operations

This patch modifies all codegen operations (i.e., cell, magg, row,
outer) to support large dense matrix blocks >16GB. In addition, this
also fixes an issue with previously modified cumulative aggregates.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/92f3b7fc
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/92f3b7fc
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/92f3b7fc

Branch: refs/heads/master
Commit: 92f3b7fceff178f275c0586a2ee5d75b40c9c05d
Parents: 131b647
Author: Matthias Boehm <[email protected]>
Authored: Thu Dec 28 21:40:48 2017 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Thu Dec 28 21:40:48 2017 -0800

----------------------------------------------------------------------
 .../sysml/runtime/codegen/CodegenUtils.java     |   2 +-
 .../sysml/runtime/codegen/SpoofCellwise.java    | 214 +++++++++++++------
 .../runtime/codegen/SpoofMultiAggregate.java    |  29 ++-
 .../sysml/runtime/codegen/SpoofOperator.java    |  22 +-
 .../runtime/codegen/SpoofOuterProduct.java      | 185 +++++++++-------
 .../sysml/runtime/codegen/SpoofRowwise.java     |  11 +-
 .../sysml/runtime/matrix/data/LibMatrixAgg.java |   4 +-
 .../sysml/runtime/util/DataConverter.java       |  33 ++-
 8 files changed, 324 insertions(+), 176 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/codegen/CodegenUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/CodegenUtils.java 
b/src/main/java/org/apache/sysml/runtime/codegen/CodegenUtils.java
index 2e84eae..726e267 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/CodegenUtils.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/CodegenUtils.java
@@ -170,7 +170,7 @@ public class CodegenUtils
        public static SideInput createSideInput(MatrixBlock in) {
                SideInput ret = (in.isInSparseFormat() || !in.isAllocated()) ?
                        new SideInput(null, in, in.getNumColumns()) :
-                       new SideInput(in.getDenseBlockValues(), null, 
in.getNumColumns());
+                       new SideInput(in.getDenseBlock(), null, 
in.getNumColumns());
                return (ret.mdat != null) ? new SideInputSparseCell(ret) : ret;
        }
        

http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
index 5eb3b76..7c2ac35 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java
@@ -295,7 +295,7 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                        MatrixBlock out, int m, int n, boolean sparseSafe, int 
rl, int ru) 
                throws DMLRuntimeException 
        {
-               double[] c = out.getDenseBlockValues();
+               DenseBlock c = out.getDenseBlock();
                SideInput[] lb = createSparseSideInputs(b);
                
                if( _type == CellType.NO_AGG ) {
@@ -419,123 +419,172 @@ public abstract class SpoofCellwise extends 
SpoofOperator implements Serializabl
        //core operator skeletons for dense, sparse, and compressed
 
        private long executeDenseNoAgg(DenseBlock a, SideInput[] b, double[] 
scalars,
-                       double[] c, int m, int n, boolean sparseSafe, int rl, 
int ru)
+                       DenseBlock c, int m, int n, boolean sparseSafe, int rl, 
int ru)
                throws DMLRuntimeException
        {
-               double[] data = (a != null) ? a.valuesAt(0) : null;
-               
                long lnnz = 0;
-               for( int i=rl, ix=rl*n; i<ru; i++ )
-                       for( int j=0; j<n; j++, ix++ ) {
-                               double aval = (data != null) ? data[ix] : 0;
-                               if( aval != 0 || !sparseSafe) {
-                                       c[ix] = genexec( aval, b, scalars, m, 
n, i, j);
-                                       lnnz += (c[ix]!=0) ? 1 : 0;
+               if( a == null && !sparseSafe ) {
+                       for( int i=rl; i<ru; i++ ) {
+                               double[] cvals = c.values(i);
+                               int cix = c.pos(i);
+                               for( int j=0; j<n; j++ )
+                                       lnnz += ((cvals[cix+j] = genexec(0, b, 
scalars, m, n, i, j))!=0) ? 1 : 0;
+                       }
+               }
+               else if( a != null ) {
+                       for( int i=rl; i<ru; i++ ) {
+                               double[] avals = a.values(i);
+                               double[] cvals = c.values(i);
+                               int ix = a.pos(i);
+                               for( int j=0; j<n; j++ ) {
+                                       double aval = avals[ix+j];
+                                       if( aval != 0 || !sparseSafe)
+                                               lnnz += ((cvals[ix+j] = 
genexec(aval, b, scalars, m, n, i, j))!=0) ? 1 : 0;
                                }
                        }
+               }
+               
                return lnnz;
        }
        
        private long executeDenseRowAggSum(DenseBlock a, SideInput[] b, 
double[] scalars,
-               double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+               DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru)
        {
-               double[] data = (a != null) ? a.valuesAt(0) : null;
-               
+               //note: output always single block
+               double[] lc = c.valuesAt(0);
                KahanFunction kplus = (KahanFunction) getAggFunction();
                KahanObject kbuff = new KahanObject(0, 0);
+               
                long lnnz = 0;
-               for( int i=rl, ix=rl*n; i<ru; i++ ) {
-                       kbuff.set(0, 0);
-                       for( int j=0; j<n; j++, ix++ ) {
-                               double aval = (data != null) ? data[ix] : 0;
-                               if( aval != 0 || !sparseSafe)
-                                       kplus.execute2(kbuff, genexec(aval, b, 
scalars, m, n, i, j));
+               if( a == null && !sparseSafe ) {
+                       for( int i=rl; i<ru; i++ ) {
+                               kbuff.set(0, 0);
+                               for( int j=0; j<n; j++ )
+                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, i, j));
+                               lnnz += ((lc[i] = kbuff._sum)!=0) ? 1 : 0;
+                       }
+               }
+               else if( a != null ) {
+                       for( int i=rl; i<ru; i++ ) {
+                               kbuff.set(0, 0);
+                               double[] avals = a.values(i);
+                               int aix = a.pos(i);
+                               for( int j=0; j<n; j++ ) {
+                                       double aval = avals[aix+j];
+                                       if( aval != 0 || !sparseSafe)
+                                               kplus.execute2(kbuff, 
genexec(aval, b, scalars, m, n, i, j));
+                               }
+                               lnnz += ((lc[i] = kbuff._sum)!=0) ? 1 : 0;
                        }
-                       lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0;
                }
+               
                return lnnz;
        }
        
        private long executeDenseRowAggMxx(DenseBlock a, SideInput[] b, 
double[] scalars,
-                       double[] c, int m, int n, boolean sparseSafe, int rl, 
int ru)
+                       DenseBlock c, int m, int n, boolean sparseSafe, int rl, 
int ru)
                throws DMLRuntimeException 
        {
-               double[] data = (a != null) ? a.valuesAt(0) : null;
+               double[] lc = c.valuesAt(0); //single block
                
                double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : 
-Double.MAX_VALUE;
                ValueFunction vfun = getAggFunction();
                long lnnz = 0;
-               if( data == null && !sparseSafe ) { //empty
+               if( a == null && !sparseSafe ) { //empty
                        for( int i=rl; i<ru; i++ ) {
                                double tmp = initialVal;
                                for( int j=0; j<n; j++ )
                                        tmp = vfun.execute(tmp, genexec(0, b, 
scalars, m, n, i, j));
-                               lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
+                               lnnz += ((lc[i] = tmp)!=0) ? 1 : 0;
                        }
                }
-               else if( data != null ) { //general case
-                       for( int i=rl, ix=rl*n; i<ru; i++ ) {
+               else if( a != null ) { //general case
+                       for( int i=rl; i<ru; i++ ) {
                                double tmp = initialVal;
-                               for( int j=0; j<n; j++, ix++ )
-                                       if( data[ix] != 0 || !sparseSafe)
-                                               tmp = vfun.execute(tmp, 
genexec(data[ix], b, scalars, m, n, i, j));
-                               if( sparseSafe && 
UtilFunctions.containsZero(data, ix-n, n) )
+                               double[] avals = a.values(i);
+                               int aix = a.pos(i);
+                               for( int j=0; j<n; j++ ) {
+                                       double aval = avals[aix + j];
+                                       if( aval != 0 || !sparseSafe)
+                                               tmp = vfun.execute(tmp, 
genexec(aval, b, scalars, m, n, i, j));
+                               }
+                               if( sparseSafe && 
UtilFunctions.containsZero(avals, aix, n) )
                                        tmp = vfun.execute(tmp, 0);
-                               lnnz += ((c[i] = tmp)!=0) ? 1 : 0;
+                               lnnz += ((lc[i] = tmp)!=0) ? 1 : 0;
                        }
                }
                return lnnz;
        }
        
        private long executeDenseColAggSum(DenseBlock a, SideInput[] b, 
double[] scalars,
-               double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+               DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru)
        {
-               double[] data = (a != null) ? a.valuesAt(0) : null;
+               double[] lc = c.valuesAt(0); //single block
                
                KahanFunction kplus = (KahanFunction) getAggFunction();
                KahanObject kbuff = new KahanObject(0, 0);
                double[] corr = new double[n];
                
-               for( int i=rl, ix=rl*n; i<ru; i++ )
-                       for( int j=0; j<n; j++, ix++ ) {
-                               double aval = (data != null) ? data[ix] : 0;
-                               if( aval != 0 || !sparseSafe) {
-                                       kbuff.set(c[j], corr[j]);
-                                       kplus.execute2(kbuff, genexec(aval, b, 
scalars, m, n, i, j));
-                                       c[j] = kbuff._sum;
+               if( a == null && !sparseSafe ) {
+                       for( int i=rl; i<ru; i++ )
+                               for( int j=0; j<n; j++ ) {
+                                       kbuff.set(lc[j], corr[j]);
+                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, i, j));
+                                       lc[j] = kbuff._sum;
                                        corr[j] = kbuff._correction;
                                }
+               }
+               else if( a != null ) {
+                       for( int i=rl; i<ru; i++ ) {
+                               double[] avals = a.values(i);
+                               int aix = a.pos(i);
+                               for( int j=0; j<n; j++ ) {
+                                       double aval = avals[aix + j];
+                                       if( aval != 0 || !sparseSafe ) {
+                                               kbuff.set(lc[j], corr[j]);
+                                               kplus.execute2(kbuff, 
genexec(aval, b, scalars, m, n, i, j));
+                                               lc[j] = kbuff._sum;
+                                               corr[j] = kbuff._correction;
+                                       }
+                               }
                        }
+               }
+               
                return -1;
        }
        
        private long executeDenseColAggMxx(DenseBlock a, SideInput[] b, 
double[] scalars,
-                       double[] c, int m, int n, boolean sparseSafe, int rl, 
int ru)
+                       DenseBlock c, int m, int n, boolean sparseSafe, int rl, 
int ru)
                throws DMLRuntimeException 
        {
-               double[] data = (a != null) ? a.valuesAt(0) : null;
+               double[] lc = c.valuesAt(0); //single block
                
                double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : 
-Double.MAX_VALUE;
                ValueFunction vfun = getAggFunction();
-               Arrays.fill(c, initialVal);
+               Arrays.fill(lc, initialVal);
                
-               if( data == null && !sparseSafe ) { //empty
+               if( a == null && !sparseSafe ) { //empty
                        for( int i=rl; i<ru; i++ )
                                for( int j=0; j<n; j++ )
-                                       c[j] = vfun.execute(c[j], genexec(0, b, 
scalars, m, n, i, j));
+                                       lc[j] = vfun.execute(lc[j], genexec(0, 
b, scalars, m, n, i, j));
                }
-               else if( data != null ) { //general case
+               else if( a != null ) { //general case
                        int[] counts = new int[n];
-                       for( int i=rl, ix=rl*n; i<ru; i++ )
-                               for( int j=0; j<n; j++, ix++ )
-                                       if( data[ix] != 0 || !sparseSafe) {
-                                               c[j] = vfun.execute(c[j], 
genexec(data[ix], b, scalars, m, n, i, j));
+                       for( int i=rl; i<ru; i++ ) {
+                               double[] avals = a.values(i);
+                               int aix = a.pos(i);
+                               for( int j=0; j<n; j++ ) {
+                                       double aval = avals[aix + j];
+                                       if( aval != 0 || !sparseSafe ) {
+                                               lc[j] = vfun.execute(lc[j], 
genexec(aval, b, scalars, m, n, i, j));
                                                counts[j] ++;
                                        }
+                               }
+                       }
                        if( sparseSafe )
                                for(int j=0; j<n; j++)
                                        if( counts[j] != ru-rl )
-                                               c[j] = vfun.execute(c[j], 0);
+                                               lc[j] = vfun.execute(lc[j], 0);
                }
                return -1;
        }
@@ -544,17 +593,26 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                        int m, int n, boolean sparseSafe, int rl, int ru)
                throws DMLRuntimeException 
        {
-               double[] data = (a != null) ? a.valuesAt(0) : null;
-               
                KahanFunction kplus = (KahanFunction) getAggFunction();
                KahanObject kbuff = new KahanObject(0, 0);
                
-               for( int i=rl, ix=rl*n; i<ru; i++ ) 
-                       for( int j=0; j<n; j++, ix++ ) {
-                               double aval = (data != null) ? data[ix] : 0;
-                               if( aval != 0 || !sparseSafe)
-                                       kplus.execute2(kbuff, genexec(aval, b, 
scalars, m, n, i, j));
+               if( a == null && !sparseSafe ) {
+                       for( int i=rl; i<ru; i++ )
+                               for( int j=0; j<n; j++ )
+                                       kplus.execute2(kbuff, genexec(0, b, 
scalars, m, n, i, j));
+               }
+               else if( a != null ) {
+                       for( int i=rl; i<ru; i++ ) {
+                               double[] avals = a.values(i);
+                               int aix = a.pos(i);
+                               for( int j=0; j<n; j++ ) {
+                                       double aval = avals[aix + j];
+                                       if( aval != 0 || !sparseSafe)
+                                               kplus.execute2(kbuff, 
genexec(aval, b, scalars, m, n, i, j));
+                               }
                        }
+               }
+               
                return kbuff._sum;
        }
        
@@ -562,19 +620,28 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                        int m, int n, boolean sparseSafe, int rl, int ru)
                throws DMLRuntimeException 
        {
-               double[] data = (a != null) ? a.valuesAt(0) : null;
-               
                //safe aggregation for min/max w/ handling of zero entries
                //note: sparse safe with zero value as min/max handled outside
                double ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : 
-Double.MAX_VALUE; 
                ValueFunction vfun = getAggFunction();
                
-               for( int i=rl, ix=rl*n; i<ru; i++ ) 
-                       for( int j=0; j<n; j++, ix++ ) {
-                               double aval = (data != null) ? data[ix] : 0;
-                               if( aval != 0 || !sparseSafe)
-                                       ret = vfun.execute(ret, genexec(aval, 
b, scalars, m, n, i, j));
+               if( a == null && !sparseSafe ) {
+                       for( int i=rl; i<ru; i++ )
+                               for( int j=0; j<n; j++ )
+                                       ret = vfun.execute(ret, genexec(0, b, 
scalars, m, n, i, j));
+               }
+               else if( a != null ) {
+                       for( int i=rl; i<ru; i++ ) {
+                               double[] avals = a.values(i);
+                               int aix = a.pos(i);
+                               for( int j=0; j<n; j++ ) {
+                                       double aval = avals[aix + j];
+                                       if( aval != 0 || !sparseSafe)
+                                               ret = vfun.execute(ret, 
genexec(aval, b, scalars, m, n, i, j));
+                               }
                        }
+               }
+               
                return ret;
        }
        
@@ -621,9 +688,9 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
        {
                //note: sequential scan algorithm for both sparse-safe and 
-unsafe
                //in order to avoid binary search for sparse-unsafe
-               double[] c = out.getDenseBlockValues();
+               DenseBlock c = out.getDenseBlock();
                long lnnz = 0;
-               for(int i=rl, cix=rl*n; i<ru; i++, cix+=n) {
+               for(int i=rl; i<ru; i++) {
                        int lastj = -1;
                        //handle non-empty rows
                        if( sblock != null && !sblock.isEmpty(i) ) {
@@ -631,20 +698,25 @@ public abstract class SpoofCellwise extends SpoofOperator 
implements Serializabl
                                int alen = sblock.size(i);
                                int[] aix = sblock.indexes(i);
                                double[] avals = sblock.values(i);
+                               double[] cvals = c.values(i);
+                               int cix = c.pos(i);
                                for(int k=apos; k<apos+alen; k++) {
                                        //process zeros before current non-zero
                                        if( !sparseSafe )
                                                for(int j=lastj+1; j<aix[k]; 
j++)
-                                                       lnnz += 
((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
+                                                       lnnz += 
((cvals[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0;
                                        //process current non-zero
                                        lastj = aix[k];
-                                       lnnz += 
((c[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0;
+                                       lnnz += 
((cvals[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0;
                                }
                        }
                        //process empty rows or remaining zeros
                        if( !sparseSafe )
-                               for(int j=lastj+1; j<n; j++)
-                                       lnnz += ((c[cix+j]=genexec(0, b, 
scalars, m, n, i, j))!=0)?1:0;
+                               for(int j=lastj+1; j<n; j++) {
+                                       double[] cvals = c.values(i);
+                                       int cix = c.pos(i);
+                                       lnnz += ((cvals[cix+j]=genexec(0, b, 
scalars, m, n, i, j))!=0)?1:0;
+                               }
                }
                return lnnz;
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
index c141b2d..85c894a 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java
@@ -40,6 +40,7 @@ import org.apache.sysml.runtime.functionobjects.KahanPlusSq;
 import org.apache.sysml.runtime.functionobjects.ValueFunction;
 import org.apache.sysml.runtime.instructions.cp.KahanObject;
 import org.apache.sysml.runtime.instructions.cp.ScalarObject;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
 import org.apache.sysml.runtime.matrix.data.IJV;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.SparseBlock;
@@ -94,7 +95,7 @@ public abstract class SpoofMultiAggregate extends 
SpoofOperator implements Seria
                //result allocation and preparations
                out.reset(1, _aggOps.length, false);
                out.allocateDenseBlock();
-               double[] c = out.getDenseBlockValues();
+               double[] c = out.getDenseBlockValues(); //1x<num_agg>
                setInitialOutputValues(c);
                
                //input preparation
@@ -109,8 +110,8 @@ public abstract class SpoofMultiAggregate extends 
SpoofOperator implements Seria
                        if( inputs.get(0) instanceof CompressedMatrixBlock )
                                
executeCompressed((CompressedMatrixBlock)inputs.get(0), b, scalars, c, m, n, 0, 
m);
                        else if( !inputs.get(0).isInSparseFormat() )
-                               
executeDense(inputs.get(0).getDenseBlockValues(), b, scalars, c, m, n, 0, m);
-                       else    
+                               executeDense(inputs.get(0).getDenseBlock(), b, 
scalars, c, m, n, sparseSafe, 0, m);
+                       else
                                executeSparse(inputs.get(0).getSparseBlock(), 
b, scalars, c, m, n, sparseSafe, 0, m);
                }
                else  //MULTI-THREADED
@@ -144,15 +145,23 @@ public abstract class SpoofMultiAggregate extends 
SpoofOperator implements Seria
                return out;
        }
        
-       private void executeDense(double[] a, SideInput[] b, double[] scalars, 
double[] c, int m, int n, int rl, int ru) throws DMLRuntimeException 
+       private void executeDense(DenseBlock a, SideInput[] b, double[] 
scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru)
+               throws DMLRuntimeException
        {
                SideInput[] lb = createSparseSideInputs(b);
                
                //core dense aggregation operation
-               for( int i=rl, ix=rl*n; i<ru; i++ ) { 
-                       for( int j=0; j<n; j++, ix++ ) {
-                               double in = (a != null) ? a[ix] : 0;
-                               genexec( in, lb, scalars, c, m, n, i, j );
+               if( a == null && !sparseSafe ) {
+                       for( int i=rl; i<ru; i++ )
+                               for( int j=0; j<n; j++ )
+                                       genexec( 0, lb, scalars, c, m, n, i, j 
);
+               }
+               else if( a != null ) {
+                       for( int i=rl; i<ru; i++ ) { 
+                               double[] avals = a.values(i);
+                               int aix = a.pos(i);
+                               for( int j=0; j<n; j++ )
+                                       genexec( avals[aix+j], lb, scalars, c, 
m, n, i, j );
                        }
                }
        }
@@ -307,8 +316,8 @@ public abstract class SpoofMultiAggregate extends 
SpoofOperator implements Seria
                        if( _a instanceof CompressedMatrixBlock )
                                executeCompressed((CompressedMatrixBlock)_a, 
_b, _scalars, c, _rlen, _clen, _rl, _ru);
                        else if( !_a.isInSparseFormat() )
-                               executeDense(_a.getDenseBlockValues(), _b, 
_scalars, c, _rlen, _clen, _rl, _ru);
-                       else    
+                               executeDense(_a.getDenseBlock(), _b, _scalars, 
c, _rlen, _clen, _safe, _rl, _ru);
+                       else
                                executeSparse(_a.getSparseBlock(), _b, 
_scalars, c, _rlen, _clen, _safe, _rl, _ru);
                        return c;
                }

http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java
index 2267a47..af0a22b 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java
@@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysml.runtime.instructions.cp.ScalarObject;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
 import org.apache.sysml.runtime.matrix.data.LibMatrixReorg;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.SparseBlock;
@@ -102,7 +103,7 @@ public abstract class SpoofOperator implements Serializable
                                if( in.getNumColumns()==1 && 
in.isEmptyBlock(false) ) //dense empty
                                        b[i-offset] = new SideInput(null, null, 
clen);
                                else {
-                                       b[i-offset] = new 
SideInput(DataConverter.convertToDoubleVector(in), null, clen);
+                                       b[i-offset] = new 
SideInput(DataConverter.convertToDenseBlock(in, false), null, clen);
                                        LOG.warn(getClass().getName()+": 
Converted "+in.getNumRows()+"x"+in.getNumColumns()+
                                                ", nnz="+in.getNonZeros()+" 
sideways input matrix from sparse to dense.");
                                }
@@ -111,8 +112,7 @@ public abstract class SpoofOperator implements Serializable
                                b[i-offset] = new SideInput(null, in, clen);
                        }
                        else {
-                               b[i-offset] = new SideInput(
-                                       in.getDenseBlockValues(), null, clen);
+                               b[i-offset] = new SideInput(in.getDenseBlock(), 
null, clen);
                        }
                }
                
@@ -142,8 +142,8 @@ public abstract class SpoofOperator implements Serializable
                return ret;
        }
        
-       public static double[][] getDenseMatrices(SideInput[] inputs) {
-               double[][] ret = new double[inputs.length][];
+       public static DenseBlock[] getDenseMatrices(SideInput[] inputs) {
+               DenseBlock[] ret = new DenseBlock[inputs.length];
                for( int i=0; i<inputs.length; i++ )
                        ret[i] = inputs[i].ddat;
                return ret;
@@ -204,7 +204,7 @@ public abstract class SpoofOperator implements Serializable
        
        protected static double getValue(SideInput data, int rowIndex) {
                //note: wrapper sideinput guaranteed to exist
-               return (data.ddat!=null) ? data.ddat[rowIndex] :
+               return (data.ddat!=null) ? data.ddat.valuesAt(0)[rowIndex] :
                        (data.mdat!=null) ? data.mdat.quickGetValue(rowIndex, 
0) : 0;
        }
        
@@ -216,7 +216,7 @@ public abstract class SpoofOperator implements Serializable
        
        protected static double getValue(SideInput data, int n, int rowIndex, 
int colIndex) {
                //note: wrapper sideinput guaranteed to exist
-               return (data.ddat!=null) ? data.ddat[rowIndex*n+colIndex] : 
+               return (data.ddat!=null) ? data.ddat.get(rowIndex, colIndex) :
                        (data instanceof SideInputSparseCell) ? 
                        ((SideInputSparseCell)data).next(rowIndex, colIndex) :
                        (data.mdat!=null) ? data.mdat.quickGetValue(rowIndex, 
colIndex) : 0;
@@ -235,19 +235,19 @@ public abstract class SpoofOperator implements 
Serializable
        }
        
        public static class SideInput {
-               public final double[] ddat;
+               public final DenseBlock ddat;
                public final MatrixBlock mdat;
                public final int clen;
-               public SideInput(double[] ddata, MatrixBlock mdata, int 
clength) {
+               public SideInput(DenseBlock ddata, MatrixBlock mdata, int 
clength) {
                        ddat = ddata;
                        mdat = mdata;
                        clen = clength;
                }
                public int pos(int r) {
-                       return r * clen;
+                       return (ddat!=null) ? ddat.pos(r) : r * clen;
                }
                public double[] values(int r) {
-                       return ddat;
+                       return (ddat!=null) ? ddat.values(r) : null;
                }
                public double getValue(int r, int c) {
                        return SpoofOperator.getValue(this, clen, r, c);

http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
index 6919655..e6e93a8 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
@@ -33,6 +33,7 @@ import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysml.runtime.instructions.cp.DoubleObject;
 import org.apache.sysml.runtime.instructions.cp.ScalarObject;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
 import org.apache.sysml.runtime.matrix.data.IJV;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.SparseBlock;
@@ -81,7 +82,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
                        return new DoubleObject(0);
                
                //input preparation
-               double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 
2, true, false));
+               DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 
2, true, false));
                SideInput[] b = prepInputMatrices(inputs, 3, false);
                double[] scalars = prepInputScalars(scalarObjects);
                
@@ -97,7 +98,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator
                if( a instanceof CompressedMatrixBlock )
                        executeCellwiseCompressed((CompressedMatrixBlock)a, 
ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, 0, n);
                else if( !a.isInSparseFormat() )
-                       executeCellwiseDense(a.getDenseBlockValues(), ab[0], 
ab[1], b, scalars, out.getDenseBlockValues(), m, n, k, _outerProductType, 0, m, 
0, n);
+                       executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], 
b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n);
                else
                        executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], 
b, scalars, out, m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n);
                return new DoubleObject(out.getDenseBlock().get(0, 0));
@@ -117,7 +118,7 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                        return execute(inputs, scalarObjects); //sequential
                
                //input preparation
-               double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 
2, true, false));
+               DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 
2, true, false));
                SideInput[] b = prepInputMatrices(inputs, 3, false);
                double[] scalars = prepInputScalars(scalarObjects);
                
@@ -185,7 +186,7 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                out.allocateBlock();
                
                //input preparation
-               double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 
2, true, false));
+               DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 
2, true, false));
                SideInput[] b = prepInputMatrices(inputs, 3, false);
                double[] scalars = prepInputScalars(scalarObjects);
                
@@ -200,19 +201,19 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                        case LEFT_OUTER_PRODUCT:
                        case RIGHT_OUTER_PRODUCT:
                                if( a instanceof CompressedMatrixBlock )
-                                       
executeCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, 
out.getDenseBlockValues(), 
+                                       
executeCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, 
out.getDenseBlock(),
                                                m, n, k, _outerProductType, 0, 
m, 0, ((CompressedMatrixBlock)a).getNumColGroups());
                                else if( !a.isInSparseFormat() )
-                                       executeDense(a.getDenseBlockValues(), 
ab[0], ab[1], b, scalars, out.getDenseBlockValues(), m, n, k, 
_outerProductType, 0, m, 0, n);
+                                       executeDense(a.getDenseBlock(), ab[0], 
ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n);
                                else
-                                       executeSparse(a.getSparseBlock(), 
ab[0], ab[1], b, scalars, out.getDenseBlockValues(), m, n, k, a.getNonZeros(), 
_outerProductType, 0, m, 0, n);
+                                       executeSparse(a.getSparseBlock(), 
ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, a.getNonZeros(), 
_outerProductType, 0, m, 0, n);
                                break;
                                
                        case CELLWISE_OUTER_PRODUCT:
                                if( a instanceof CompressedMatrixBlock )
                                        
executeCellwiseCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, 
out, m, n, k, _outerProductType, 0, m, 0, n);
                                else if( !a.isInSparseFormat() )
-                                       
executeCellwiseDense(a.getDenseBlockValues(), ab[0], ab[1], b, scalars, 
out.getDenseBlockValues(), m, n, k, _outerProductType, 0, m, 0, n);
+                                       executeCellwiseDense(a.getDenseBlock(), 
ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, 
m, 0, n);
                                else 
                                        
executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, m, n, 
k, a.getNonZeros(), _outerProductType, 0, m, 0, n);
                                break;
@@ -267,7 +268,7 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                        return execute(inputs, scalarObjects, out); //sequential
                
                //input preparation
-               double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 
2, true, false));
+               DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 
2, true, false));
                SideInput[] b = prepInputMatrices(inputs, 3, false);
                double[] scalars = prepInputScalars(scalarObjects);
                
@@ -338,8 +339,8 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                return UtilFunctions.roundToNext(base, k);
        }
        
-       private void executeDense(double[] a, double[] u, double[] v, 
SideInput[] b, double[] scalars,
-               double[] c, int m, int n, int k, OutProdType type, int rl, int 
ru, int cl, int cu )
+       private void executeDense(DenseBlock a, DenseBlock u, DenseBlock v, 
SideInput[] b, double[] scalars,
+               DenseBlock c, int m, int n, int k, OutProdType type, int rl, 
int ru, int cl, int cu )
        {
                //approach: iterate over non-zeros of w, selective mm 
computation
                //cache-conscious blocking: due to blocksize constraint 
(default 1000),
@@ -357,17 +358,23 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                                int bjmin = Math.min(cu, bj+blocksizeIJ);
                                
                                //core computation
-                               for( int i=bi, ix=bi*n, uix=bi*k; i<bimin; i++, 
ix+=n, uix+=k )
-                                       for( int j=bj, vix=bj*k; j<bjmin; j++, 
vix+=k)
-                                               if( a[ix+j] != 0 ) {
+                               for( int i=bi; i<bimin; i++ ) {
+                                       double[] avals = a.values(i);
+                                       double[] uvals = u.values(i);
+                                       int aix = a.pos(i), uix = u.pos(i);
+                                       for( int j=bj; j<bjmin; j++)
+                                               if( avals[aix+j] != 0 ) {
+                                                       int vix = v.pos(j);
                                                        cix = (type == 
OutProdType.LEFT_OUTER_PRODUCT) ? vix : uix;
-                                                       genexecDense( a[ix+j], 
u, uix, v, vix, b, scalars, c, cix, m, n, k, i, j); 
+                                                       genexecDense( 
avals[aix+j], uvals, uix, v.values(j), vix,
+                                                               b, scalars, 
c.values(j), cix, m, n, k, i, j); 
                                                }
+                               }
                        }
        }
        
-       private void executeCellwiseDense(double[] a, double[] u, double[] v, 
SideInput[] b, double[] scalars,
-               double[] c, int m, int n, int k, OutProdType type, int rl, int 
ru, int cl, int cu )
+       private void executeCellwiseDense(DenseBlock a, DenseBlock u, 
DenseBlock v, SideInput[] b, double[] scalars,
+               DenseBlock c, int m, int n, int k, OutProdType type, int rl, 
int ru, int cl, int cu )
        {
                //approach: iterate over non-zeros of w, selective mm 
computation
                //cache-conscious blocking: due to blocksize constraint 
(default 1000),
@@ -385,21 +392,32 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                                int bjmin = Math.min(cu, bj+blocksizeIJ);
                                
                                //core computation
-                               for( int i=bi, ix=bi*n, uix=bi*k; i<bimin; i++, 
ix+=n, uix+=k )
-                                       for( int j=bj, vix=bj*k; j<bjmin; j++, 
vix+=k)
-                                               if( a[ix+j] != 0 ) {
-                                                       if(type == 
OutProdType.CELLWISE_OUTER_PRODUCT)
-                                                               c[ix+j] = 
genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j );
-                                                       else
-                                                               sum += 
genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j);
-                                               }
+                               for( int i=bi; i<bimin; i++ ) {
+                                       double[] avals = a.values(i);
+                                       double[] uvals = u.values(i);
+                                       int aix = a.pos(i), uix = u.pos(i);
+                                       if(type == 
OutProdType.CELLWISE_OUTER_PRODUCT) {
+                                               double[] cvals = c.values(i);
+                                               for( int j=bj; j<bjmin; j++)
+                                                       if( avals[aix+j] != 0 )
+                                                               cvals[aix+j] = 
genexecCellwise( avals[aix+j], uvals, uix,
+                                                                       
v.values(j), v.pos(j), b, scalars, m, n, k, i, j );
+                                       }
+                                       else {
+                                               for( int j=bj; j<bjmin; j++)
+                                                       if( avals[aix+j] != 0 )
+                                                               sum += 
genexecCellwise( avals[aix+j], uvals, uix,
+                                                                       
v.values(j), v.pos(j), b, scalars, m, n, k, i, j);
+                                               
+                                       }
+                               }
                        }
                if( type != OutProdType.CELLWISE_OUTER_PRODUCT )
-                       c[0] = sum;
+                       c.set(0, 0, sum);
        }
        
-       private void executeSparse(SparseBlock sblock, double[] u, double[] v, 
SideInput[] b, double[] scalars,
-               double[] c, int m, int n, int k, long nnz, OutProdType type, 
int rl, int ru, int cl, int cu) 
+       private void executeSparse(SparseBlock sblock, DenseBlock u, DenseBlock 
v, SideInput[] b, double[] scalars,
+               DenseBlock c, int m, int n, int k, long nnz, OutProdType type, 
int rl, int ru, int cl, int cu) 
        {
                boolean left = (_outerProductType== 
OutProdType.LEFT_OUTER_PRODUCT);
                
@@ -414,21 +432,24 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                        //for ultra-sparse matrices, we do not allocate the 
index array because
                        //its allocation and maintenance can dominate the total 
runtime.
                        SideInput[] lb = createSparseSideInputs(b);
-                               
+                       
                        //core wdivmm block matrix mult
-                       for( int i=rl, uix=rl*k; i<ru; i++, uix+=k ) {
+                       for( int i=rl; i<ru; i++ ) {
                                if( sblock.isEmpty(i) ) continue;
                                
                                int wpos = sblock.pos(i);
                                int wlen = sblock.size(i);
                                int[] wix = sblock.indexes(i);
-                               double[] wval = sblock.values(i);
+                               double[] wvals = sblock.values(i);
+                               double[] uvals = u.values(i);
+                               int uix = u.pos(i);
                                
                                int index = (cl==0||sblock.isEmpty(i)) ? 0 : 
sblock.posFIndexGTE(i,cl);
                                index = wpos + ((index>=0) ? index : n);
                                for( ; index<wpos+wlen && wix[index]<cu; 
index++ ) {
-                                       genexecDense(wval[index], u, uix, v, 
wix[index]*k, lb, scalars, c,
-                                               (left ? wix[index]*k : uix), m, 
n, k, i, wix[index]);
+                                       int jix = wix[index];
+                                       genexecDense(wvals[index], uvals, uix, 
v.values(jix), v.pos(jix), lb, scalars,
+                                               c.values(jix), (left ? 
v.pos(jix) : uix), m, n, k, i, wix[index]);
                                }
                        }
                }
@@ -454,18 +475,21 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                                {
                                        int bjmin = Math.min(cu, bj+blocksizeJ);
                                        //core wdivmm block matrix mult
-                                       for( int i=bi, uix=bi*k; i<bimin; i++, 
uix+=k ) {
+                                       for( int i=bi; i<bimin; i++ ) {
                                                if( sblock.isEmpty(i) ) 
continue;
                                                
                                                int wpos = sblock.pos(i);
                                                int wlen = sblock.size(i);
                                                int[] wix = sblock.indexes(i);
-                                               double[] wval = 
sblock.values(i);
+                                               double[] wvals = 
sblock.values(i);
+                                               double[] uvals = u.values(i);
+                                               int uix = u.pos(i);
                                                
                                                int index = wpos + curk[i-bi];
                                                for( ; index<wpos+wlen && 
wix[index]<bjmin; index++ ) {
-                                                       
genexecDense(wval[index], u, uix, v, wix[index]*k, b, scalars, c,
-                                                               (left ? 
wix[index]*k : uix), m, n, k, i, wix[index]);
+                                                       int jix = wix[index];
+                                                       
genexecDense(wvals[index], uvals, uix, v.values(jix), v.pos(jix), b, scalars,
+                                                               c.values(jix), 
(left ? wix[index]*k : uix), m, n, k, i, wix[index]);
                                                }
                                                curk[i-bi] = index - wpos;
                                        }
@@ -474,7 +498,7 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                }
        }
        
-       private void executeCellwiseSparse(SparseBlock sblock, double[] u, 
double[] v, SideInput[] b, double[] scalars, 
+       private void executeCellwiseSparse(SparseBlock sblock, DenseBlock u, 
DenseBlock v, SideInput[] b, double[] scalars, 
                MatrixBlock out, int m, int n, int k, long nnz, OutProdType 
type, int rl, int ru, int cl, int cu ) 
        {
                //NOTE: we don't create sparse side inputs w/ row-major cursors 
because 
@@ -485,7 +509,7 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                
                if( !out.isInSparseFormat() ) //DENSE
                {
-                       double[] c = out.getDenseBlockValues();
+                       DenseBlock c = out.getDenseBlock();
                        double tmp = 0;
                        for( int bi=rl; bi<ru; bi+=blocksizeIJ ) {
                                int bimin = Math.min(ru, bi+blocksizeIJ);
@@ -494,27 +518,34 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                                //blocked execution over column blocks
                                for( int bj=0; bj<n; bj+=blocksizeIJ ) {
                                        int bjmin = Math.min(n, bj+blocksizeIJ);
-                                       for( int i=bi, uix=bi*k; i<bimin; i++, 
uix+=k ) {
+                                       for( int i=bi; i<bimin; i++ ) {
                                                if( sblock.isEmpty(i) ) 
continue;
                                                int wpos = sblock.pos(i);
                                                int wlen = sblock.size(i);
                                                int[] wix = sblock.indexes(i);
-                                               double[] wval = 
sblock.values(i);
+                                               double[] wvals = 
sblock.values(i);
+                                               double[] cvals = c.values(i);
+                                               double[] uvals = u.values(i);
+                                               int uix = u.pos(i);
                                                int index = wpos + curk[i-bi];
                                                if( type == 
OutProdType.CELLWISE_OUTER_PRODUCT )
-                                                       for( ; index<wpos+wlen 
&& wix[index]<bjmin; index++ )
-                                                               c[wix[index]] = 
genexecCellwise( wval[index], 
-                                                                       u, uix, 
v, wix[index]*k, b, scalars, m, n, k, i, wix[index] );
+                                                       for( ; index<wpos+wlen 
&& wix[index]<bjmin; index++ ) {
+                                                               int jix = 
wix[index];
+                                                               cvals[jix] = 
genexecCellwise( wvals[index],
+                                                                       uvals, 
uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index] );
+                                                       }
                                                else
-                                                       for( ; index<wpos+wlen 
&& wix[index]<bjmin; index++ )
-                                                               tmp += 
genexecCellwise( wval[index], 
-                                                                       u, uix, 
v, wix[index]*k, b, scalars, m, n, k, i, wix[index]);
+                                                       for( ; index<wpos+wlen 
&& wix[index]<bjmin; index++ ) {
+                                                               int jix = 
wix[index];
+                                                               tmp += 
genexecCellwise( wvals[index], 
+                                                                       uvals, 
uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index]);
+                                                       }
                                                curk[i-bi] = index - wpos;
                                        }
                                }
                        }
                        if( type != OutProdType.CELLWISE_OUTER_PRODUCT )
-                               c[0] = tmp;
+                               c.set(0, 0, tmp);
                }
                else //SPARSE
                {
@@ -526,16 +557,19 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                                //blocked execution over column blocks
                                for( int bj=0; bj<n; bj+=blocksizeIJ ) {
                                        int bjmin = Math.min(n, bj+blocksizeIJ);
-                                       for( int i=bi, uix=bi*k; i<bimin; i++, 
uix+=k ) {
+                                       for( int i=bi; i<bimin; i++ ) {
                                                if( sblock.isEmpty(i) ) 
continue;
                                                int wpos = sblock.pos(i);
                                                int wlen = sblock.size(i);
                                                int[] wix = sblock.indexes(i);
                                                double[] wval = 
sblock.values(i);
+                                               double[] uvals = u.values(i);
+                                               int uix = u.pos(i);
                                                int index = wpos + curk[i-bi];
                                                for( ; index<wpos+wlen && 
wix[index]<bjmin; index++ ) {
-                                                       c.append(i, wix[index], 
genexecCellwise( wval[index], u, uix, v,
-                                                               wix[index]*k, 
b, scalars, m, n, k, i, wix[index] ));
+                                                       int jix = wix[index];
+                                                       c.append(i, wix[index], 
genexecCellwise( wval[index], uvals, uix,
+                                                               v.values(jix), 
v.pos(jix), b, scalars, m, n, k, i, wix[index] ));
                                                }
                                                curk[i-bi] = index - wpos;
                                        }
@@ -544,8 +578,8 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                }
        }
        
-       private void executeCompressed(CompressedMatrixBlock a, double[] u, 
double[] v, SideInput[] b, double[] scalars, 
-                       double[] c, int m, int n, int k, OutProdType type, int 
rl, int ru, int cl, int cu) 
+       private void executeCompressed(CompressedMatrixBlock a, DenseBlock u, 
DenseBlock v, SideInput[] b, double[] scalars, 
+               DenseBlock c, int m, int n, int k, OutProdType type, int rl, 
int ru, int cl, int cu) 
        {
                //NOTE: we don't create sparse side inputs w/ row-major cursors 
because 
                //compressed data is access in a column-major order 
@@ -556,14 +590,17 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                        a.getIterator(rl, ru, cl, cu, false); //cl/cu -> 
colgroups
                while( iter.hasNext() ) {
                        IJV cell = iter.next();
-                       int uix = cell.getI() * k;
-                       int vix = cell.getJ() * k;
-                       genexecDense(cell.getV(), u, uix, v, vix, b, scalars, c,
+                       double[] uvals = u.values(cell.getI());
+                       double[] vvals = v.values(cell.getJ());
+                       double[] cvals = c.values(left?cell.getJ():cell.getI());
+                       int uix = u.pos(cell.getI());
+                       int vix = v.pos(cell.getJ());
+                       genexecDense(cell.getV(), uvals, uix, vvals, vix, b, 
scalars, cvals,
                                left ? vix : uix, m, n, k, cell.getI(), 
cell.getJ());
                }
        }
        
-       private void executeCellwiseCompressed(CompressedMatrixBlock a, 
double[] u, double[] v, SideInput[] b, double[] scalars,
+       private void executeCellwiseCompressed(CompressedMatrixBlock a, 
DenseBlock u, DenseBlock v, SideInput[] b, double[] scalars,
                MatrixBlock out, int m, int n, int k, OutProdType type, int rl, 
int ru, int cl, int cu )
        {
                //NOTE: we don't create sparse side inputs w/ row-major cursors 
because 
@@ -576,21 +613,23 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                Iterator<IJV> iter = a.getIterator(rl, ru, false);
                while( iter.hasNext() ) {
                        IJV cell = iter.next();
-                       int uix = cell.getI() * k;
-                       int vix = cell.getJ() * k;
+                       double[] uvals = u.values(cell.getI());
+                       double[] vvals = v.values(cell.getJ());
+                       int uix = u.pos(cell.getI());
+                       int vix = v.pos(cell.getJ());
                        if( type == OutProdType.CELLWISE_OUTER_PRODUCT ) {
                                if( out.isInSparseFormat() ) {
                                        csblock.allocate(cell.getI());
                                        csblock.append(cell.getI(), cell.getJ(),
-                                               genexecCellwise(cell.getV(), u, 
uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ()));
+                                               genexecCellwise(cell.getV(), 
uvals, uix, vvals, vix, b, scalars, m, n, k, cell.getI(), cell.getJ()));
                                }
                                else {
                                        c[cell.getI()*n+cell.getJ()] =
-                                               genexecCellwise(cell.getV(), u, 
uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ());
+                                               genexecCellwise(cell.getV(), 
uvals, uix, vvals, vix, b, scalars, m, n, k, cell.getI(), cell.getJ());
                                }
                        }
                        else {
-                               c[0] += genexecCellwise(cell.getV(), u, uix, v, 
vix, b, scalars, m, n, k, cell.getI(), cell.getJ());
+                               c[0] += genexecCellwise(cell.getV(), uvals, 
uix, vvals, vix, b, scalars, m, n, k, cell.getI(), cell.getJ());
                        }
                }
        }
@@ -604,8 +643,8 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
        private class ParExecTask implements Callable<Long> 
        {
                private final MatrixBlock _a;
-               private final double[] _u;
-               private final double[] _v;
+               private final DenseBlock _u;
+               private final DenseBlock _v;
                private final SideInput[] _b;
                private final double[] _scalars;
                private final MatrixBlock _c;
@@ -618,7 +657,7 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                private final int _cl;
                private final int _cu;
                
-               protected ParExecTask( MatrixBlock a, double[] u, double[] v, 
SideInput[] b, double[] scalars , MatrixBlock c, int m, int n, int k, 
OutProdType type, int rl, int ru, int cl, int cu ) {
+               protected ParExecTask( MatrixBlock a, DenseBlock u, DenseBlock 
v, SideInput[] b, double[] scalars , MatrixBlock c, int m, int n, int k, 
OutProdType type, int rl, int ru, int cl, int cu ) {
                        _a = a;
                        _u = u;
                        _v = v;
@@ -642,17 +681,17 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                                case LEFT_OUTER_PRODUCT:
                                case RIGHT_OUTER_PRODUCT:
                                        if( _a instanceof CompressedMatrixBlock 
)
-                                               
executeCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, 
_c.getDenseBlockValues(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
+                                               
executeCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, 
_c.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
                                        else if( !_a.isInSparseFormat() )
-                                               
executeDense(_a.getDenseBlockValues(), _u, _v, _b, _scalars, 
_c.getDenseBlockValues(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
+                                               
executeDense(_a.getDenseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), 
_rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
                                        else
-                                               
executeSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, 
_c.getDenseBlockValues(), _rlen, _clen, _k, _a.getNonZeros(), _type,  _rl, _ru, 
_cl, _cu);
+                                               
executeSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), 
_rlen, _clen, _k, _a.getNonZeros(), _type,  _rl, _ru, _cl, _cu);
                                        break;
                                case CELLWISE_OUTER_PRODUCT:
                                        if( _a instanceof CompressedMatrixBlock 
)
                                                
executeCellwiseCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, _c, 
_rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
                                        else if( !_c.isInSparseFormat() )
-                                               
executeCellwiseDense(_a.getDenseBlockValues(), _u, _v, _b, _scalars, 
_c.getDenseBlockValues(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
+                                               
executeCellwiseDense(_a.getDenseBlock(), _u, _v, _b, _scalars, 
_c.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
                                        else 
                                                
executeCellwiseSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c, _rlen, 
_clen, _k, _a.getNonZeros(), _type,  _rl, _ru, _cl, _cu);
                                        break;
@@ -671,8 +710,8 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
        private class ParOuterProdAggTask implements Callable<Double> 
        {
                private final MatrixBlock _a;
-               private final double[] _u;
-               private final double[] _v;
+               private final DenseBlock _u;
+               private final DenseBlock _v;
                private final SideInput[] _b;
                private final double[] _scalars;
                private final int _rlen;
@@ -684,7 +723,7 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                private final int _cl;
                private final int _cu;
                
-               protected ParOuterProdAggTask( MatrixBlock a, double[] u, 
double[] v, SideInput[] b, double[] scalars, int m, int n, int k, OutProdType 
type, int rl, int ru, int cl, int cu ) {
+               protected ParOuterProdAggTask( MatrixBlock a, DenseBlock u, 
DenseBlock v, SideInput[] b, double[] scalars, int m, int n, int k, OutProdType 
type, int rl, int ru, int cl, int cu ) {
                        _a = a;
                        _u = u;
                        _v = v;
@@ -707,7 +746,7 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                        if( _a instanceof CompressedMatrixBlock )
                                
executeCellwiseCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, out, 
_rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
                        else if( !_a.isInSparseFormat() )
-                               executeCellwiseDense(_a.getDenseBlockValues(), 
_u, _v, _b, _scalars, out.getDenseBlockValues(), _rlen, _clen, _k, _type, _rl, 
_ru, _cl, _cu);
+                               executeCellwiseDense(_a.getDenseBlock(), _u, 
_v, _b, _scalars, out.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, 
_cu);
                        else
                                executeCellwiseSparse(_a.getSparseBlock(), _u, 
_v, _b, _scalars, out, _rlen, _clen, _k, _a.getNonZeros(), _type, _rl, _ru, 
_cl, _cu);
                        return out.quickGetValue(0, 0);

http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
index 39dd2ed..6e098b9 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
@@ -289,14 +289,15 @@ public abstract class SpoofRowwise extends SpoofOperator
        
        private void executeDense(DenseBlock a, SideInput[] b, double[] 
scalars, double[] c, int n, int rl, int ru) 
        {
-               double[] data = (a != null) ? a.valuesAt(0) : null;
-               if( data == null )
+               //TODO handle large dense outputs (potentially misaligned)
+               if( a == null )
                        return;
                
                SideInput[] lb = createSparseSideInputs(b, true);
-               for( int i=rl, aix=rl*n; i<ru; i++, aix+=n ) {
-                       //call generated method
-                       genexec( data, aix, lb, scalars, c, n, i );
+               for( int i=rl; i<ru; i++ ) {
+                       double[] avals = a.values(i);
+                       int aix = a.pos(i);
+                       genexec( avals, aix, lb, scalars, c, n, i );
                }
        }
        

http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
index c371c5a..3602534 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java
@@ -285,7 +285,7 @@ public class LibMatrixAgg
                //filter empty input blocks (incl special handling for 
sparse-unsafe operations)
                if( in.isEmptyBlock(false) ){
                        return aggregateUnaryMatrixEmpty(in, out, aggtype, 
null);
-               }       
+               }
                
                //allocate output arrays (if required)
                out.reset(m2, n2, false); //always dense
@@ -1499,7 +1499,7 @@ public class LibMatrixAgg
                final int n = in.clen;
                
                DenseBlock da = in.getDenseBlock();
-               DenseBlock dc = in.getDenseBlock();
+               DenseBlock dc = out.getDenseBlock();
                double[] a = in.getDenseBlockValues();
                double[] c = out.getDenseBlockValues();
                

http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java 
b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
index fdc7af0..5c3ad25 100644
--- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
+++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java
@@ -40,6 +40,8 @@ import org.apache.sysml.runtime.io.MatrixWriterFactory;
 import org.apache.sysml.runtime.io.ReadProperties;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.data.CTableMap;
+import org.apache.sysml.runtime.matrix.data.DenseBlock;
+import org.apache.sysml.runtime.matrix.data.DenseBlockFactory;
 import org.apache.sysml.runtime.matrix.data.FileFormatProperties;
 import org.apache.sysml.runtime.matrix.data.FrameBlock;
 import org.apache.sysml.runtime.matrix.data.IJV;
@@ -321,8 +323,34 @@ public class DataConverter
                }
                return ret;
        }
+       
+       public static DenseBlock convertToDenseBlock(MatrixBlock mb) {
+               return convertToDenseBlock(mb, true);
+       }
+       
+       public static DenseBlock convertToDenseBlock(MatrixBlock mb, boolean 
deep) {
+               int rows = mb.getNumRows();
+               int cols = mb.getNumColumns();
+               DenseBlock ret = (!mb.isInSparseFormat() && mb.isAllocated() && 
!deep) ? 
+                       mb.getDenseBlock() : 
DenseBlockFactory.createDenseBlock(rows, cols); //0-initialized
+               
+               if( !mb.isEmptyBlock(false) ) {
+                       if( mb.isInSparseFormat() ) {
+                               Iterator<IJV> iter = 
mb.getSparseBlockIterator();
+                               while( iter.hasNext() ) {
+                                       IJV cell = iter.next();
+                                       ret.set(cell.getI(), cols+cell.getJ(), 
cell.getV());
+                               }
+                       }
+                       else if( deep ) {
+                               ret.set(mb.getDenseBlock());
+                       }
+               }
+               
+               return ret;
+       }
 
-       public static double[] convertToDoubleVector( MatrixBlock mb ) {
+       public static double[] convertToDoubleVector(MatrixBlock mb) {
                return convertToDoubleVector(mb, true);
        }
        
@@ -333,8 +361,7 @@ public class DataConverter
                double[] ret = (!mb.isInSparseFormat() && mb.isAllocated() && 
!deep) ? 
                        mb.getDenseBlockValues() : new double[rows*cols]; 
//0-initialized
                
-               if( !mb.isEmptyBlock(false) )
-               {
+               if( !mb.isEmptyBlock(false) ) {
                        if( mb.isInSparseFormat() ) {
                                Iterator<IJV> iter = 
mb.getSparseBlockIterator();
                                while( iter.hasNext() ) {

Reply via email to