[SYSTEMML-1289] Codegen outer product ops over compressed matrices

This patch extends the codegen outer product template with support for
compressed matrices. The basic approach is use sparse-safe column
iterators to process all relevant values without decompression. For left
outer product operations, we parallelize over column group partitions
and extends the column group iterator accordingly. 

Furthermore, this patch also includes a variety of smaller bug fixes:

1) Fix ColGroupOffset iterator (OLE, RLE) to handle the case of
iterators without row indexes in a specified row range.
2) Fix result correctness of sparse cell-wise outer products (output
indexing).
3) Fix outer product handling of empty input matrices.
4) Fix the unnecessary casting of the number of non-zeros to int, which
could lead to truncation/overflows because it's represented as long.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/9dad2fe8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/9dad2fe8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/9dad2fe8

Branch: refs/heads/master
Commit: 9dad2fe8b39d698b6b289955e249e27dc4e2b8a1
Parents: 8738121
Author: Matthias Boehm <mboe...@gmail.com>
Authored: Mon May 29 22:24:11 2017 -0700
Committer: Matthias Boehm <mboe...@gmail.com>
Committed: Mon May 29 22:36:13 2017 -0700

----------------------------------------------------------------------
 .../sysml/hops/codegen/SpoofCompiler.java       |   2 +
 .../hops/codegen/cplan/CNodeOuterProduct.java   |   4 +-
 .../runtime/codegen/SpoofOuterProduct.java      | 214 +++++++++++----
 .../sysml/runtime/compress/ColGroupOLE.java     |   2 +
 .../sysml/runtime/compress/ColGroupOffset.java  |  35 +--
 .../sysml/runtime/compress/ColGroupRLE.java     |   2 +
 .../runtime/compress/CompressedMatrixBlock.java |  19 +-
 .../codegen/CompressedCellwiseTest.java         |   1 +
 .../codegen/CompressedMultiAggregateTest.java   |   1 +
 .../codegen/CompressedOuterProductTest.java     | 267 +++++++++++++++++++
 .../codegen/CompressedOuterProductMain.R        |  36 +++
 .../codegen/CompressedOuterProductMain.dml      |  33 +++
 .../functions/codegen/ZPackageSuite.java        |   1 +
 13 files changed, 535 insertions(+), 82 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java 
b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
index d87c107..d96dda1 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
@@ -411,6 +411,8 @@ public class SpoofCompiler
                        }
                }
                catch( Exception ex ) {
+                       LOG.error("Codegen failed to optimize the following HOP 
DAG: \n" + 
+                               Explain.explainHops(roots));
                        throw new DMLRuntimeException(ex);
                }
                

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java 
b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
index 0dd6d62..aaf6f20 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java
@@ -39,10 +39,10 @@ public class CNodeOuterProduct extends CNodeTpl
                        + "  public %TMP%() {\n"
                        + "    _outerProductType = OutProdType.%TYPE%;\n"
                        + "  }\n"
-                       + "  protected void genexecDense( double a, double[] 
a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, double[] c, 
int ci, int n, int m, int k, int rowIndex, int colIndex) { \n"
+                       + "  protected void genexecDense(double a, double[] a1, 
int a1i, double[] a2, int a2i, double[][] b, double[] scalars, double[] c, int 
ci, int m, int n, int k, int rowIndex, int colIndex) { \n"
                        + "%BODY_dense%"
                        + "  }\n"
-                       + "  protected double genexecCellwise( double a, 
double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, int 
n, int m, int k, int rowIndex, int colIndex) { \n"
+                       + "  protected double genexecCellwise(double a, 
double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, int 
m, int n, int k, int rowIndex, int colIndex) { \n"
                        + "%BODY_cellwise%"
                        + "    return %OUT_cellwise%;\n"
                        + "  }\n"                       

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
index 64470ea..9d0203e 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java
@@ -21,6 +21,7 @@ package org.apache.sysml.runtime.codegen;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Iterator;
 import java.util.List;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutorService;
@@ -28,8 +29,10 @@ import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 
 import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysml.runtime.instructions.cp.DoubleObject;
 import org.apache.sysml.runtime.instructions.cp.ScalarObject;
+import org.apache.sysml.runtime.matrix.data.IJV;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.SparseBlock;
 
@@ -72,6 +75,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator
                //sanity check
                if( inputs==null || inputs.size() < 3 )
                        throw new RuntimeException("Invalid input arguments.");
+               if( inputs.get(0).isEmptyBlock(false) )
+                       return new DoubleObject(0);
                
                //input preparation
                double[][] ab = prepInputMatricesDense(inputs, 1, 2);
@@ -87,10 +92,12 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                MatrixBlock out = new MatrixBlock(1, 1, false);
                out.allocateDenseBlock();
                
-               if(!a.isInSparseFormat())
-                       executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], 
b, scalars, out.getDenseBlock(), n, m, k, _outerProductType, 0, m, 0, n);
+               if( a instanceof CompressedMatrixBlock )
+                       executeCellwiseCompressed((CompressedMatrixBlock)a, 
ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, 0, n);
+               else if( !a.isInSparseFormat() )
+                       executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], 
b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n);
                else
-                       executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], 
b, scalars, out, n, m, k, (int) a.getNonZeros(), _outerProductType, 0, m, 0, n);
+                       executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], 
b, scalars, out, m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n);
                return new DoubleObject(out.getDenseBlock()[0]);
        }
        
@@ -101,6 +108,8 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                //sanity check
                if( inputs==null || inputs.size() < 3 )
                        throw new RuntimeException("Invalid input arguments.");
+               if( inputs.get(0).isEmptyBlock(false) )
+                       return new DoubleObject(0);
                
                //input preparation
                double[][] ab = prepInputMatricesDense(inputs, 1, 2);
@@ -114,14 +123,14 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                double sum = 0;
                
                try 
-               {                       
+               {
                        ExecutorService pool = Executors.newFixedThreadPool(k);
                        ArrayList<ParOuterProdAggTask> tasks = new 
ArrayList<ParOuterProdAggTask>();                    
                        //create tasks (for wdivmm-left, parallelization over 
columns;
                        //for wdivmm-right, parallelization over rows; both 
ensure disjoint results)
                        int blklen = (int)(Math.ceil((double)m/numThreads));
                        for( int i=0; i<numThreads & i*blklen<m; i++ )
-                               tasks.add(new 
ParOuterProdAggTask(inputs.get(0), ab[0], ab[1], b, scalars, n, m, k, 
_outerProductType, i*blklen, Math.min((i+1)*blklen,m), 0, n));
+                               tasks.add(new 
ParOuterProdAggTask(inputs.get(0), ab[0], ab[1], b, scalars, m, n, k, 
_outerProductType, i*blklen, Math.min((i+1)*blklen,m), 0, n));
                        //execute tasks
                        List<Future<Double>> taskret = pool.invokeAll(tasks);
                        pool.shutdown();
@@ -154,17 +163,20 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                if(_outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT) {
                        //assign it to the time and sparse representation of 
the major input matrix
                        out.reset(inputs.get(0).getNumRows(), 
inputs.get(0).getNumColumns(), inputs.get(0).isInSparseFormat());
-                       out.allocateDenseOrSparseBlock();
                }               
                else {  
                        //if left outerproduct gives a value of k*n instead of 
n*k, change it back to n*k and then transpose the output
                        //if(_outerProductType == 
OutProdType.LEFT_OUTER_PRODUCT &&  out.getNumRows() == 
inputs.get(2).getNumColumns() &&  out.getNumColumns() == 
inputs.get(2).getNumRows())
                        if(_outerProductType == OutProdType.LEFT_OUTER_PRODUCT )
-                               
out.reset(inputs.get(0).getNumColumns(),inputs.get(1).getNumColumns()); // n*k
+                               out.reset(inputs.get(0).getNumColumns(), 
inputs.get(1).getNumColumns(), false); // n*k
                        else if(_outerProductType == 
OutProdType.RIGHT_OUTER_PRODUCT )
-                               
out.reset(inputs.get(0).getNumRows(),inputs.get(1).getNumColumns()); // m*k
-                       out.allocateDenseBlock();
-               }                       
+                               out.reset(inputs.get(0).getNumRows(), 
inputs.get(1).getNumColumns(), false); // m*k
+               }
+               
+               //check for empty inputs; otherwise allocate result
+               if( inputs.get(0).isEmptyBlock(false) )
+                       return;
+               out.allocateDenseOrSparseBlock();
                
                //input preparation
                double[][] ab = prepInputMatricesDense(inputs, 1, 2);
@@ -181,17 +193,21 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                switch(_outerProductType) {
                        case LEFT_OUTER_PRODUCT:        
                        case RIGHT_OUTER_PRODUCT:
-                               if( !a.isInSparseFormat() )
-                                       executeDense(a.getDenseBlock(), ab[0], 
ab[1], b, scalars, out.getDenseBlock(), n, m, k, _outerProductType, 0, m, 0, n);
+                               if( a instanceof CompressedMatrixBlock )
+                                       
executeCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, 
out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n);
+                               else if( !a.isInSparseFormat() )
+                                       executeDense(a.getDenseBlock(), ab[0], 
ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n);
                                else
-                                       executeSparse(a.getSparseBlock(), 
ab[0], ab[1], b, scalars, out.getDenseBlock(), n, m, k, (int) a.getNonZeros(), 
_outerProductType, 0, m, 0, n);
+                                       executeSparse(a.getSparseBlock(), 
ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, a.getNonZeros(), 
_outerProductType, 0, m, 0, n);
                                break;
                                
                        case CELLWISE_OUTER_PRODUCT:
-                               if( !a.isInSparseFormat() )
-                                       executeCellwiseDense(a.getDenseBlock(), 
ab[0], ab[1], b, scalars, out.getDenseBlock(), n, m, k, _outerProductType, 0, 
m, 0, n);
+                               if( a instanceof CompressedMatrixBlock )
+                                       
executeCellwiseCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, 
out, m, n, k, _outerProductType, 0, m, 0, n);
+                               else if( !a.isInSparseFormat() )
+                                       executeCellwiseDense(a.getDenseBlock(), 
ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, 
m, 0, n);
                                else 
-                                       
executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, n, m, 
k, (int) a.getNonZeros(), _outerProductType, 0, m, 0, n);
+                                       
executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, m, n, 
k, a.getNonZeros(), _outerProductType, 0, m, 0, n);
                                break;
        
                        case AGG_OUTER_PRODUCT:
@@ -199,6 +215,9 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                }
                
                //post-processing
+               if( a instanceof CompressedMatrixBlock && out.isInSparseFormat()
+                       && _outerProductType == 
OutProdType.CELLWISE_OUTER_PRODUCT )
+                       out.sortSparseRows();
                out.recomputeNonZeros();
                out.examSparsity();
        }
@@ -230,10 +249,10 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                {
                        //if left outerproduct gives a value of k*n instead of 
n*k, change it back to n*k and then transpose the output
                        //if(_outerProductType == 
OutProdType.LEFT_OUTER_PRODUCT &&  out.getNumRows() == 
inputs.get(2).getNumColumns() &&  out.getNumColumns() == 
inputs.get(2).getNumRows())
-                       if(_outerProductType == OutProdType.LEFT_OUTER_PRODUCT )
-                               
out.reset(inputs.get(0).getNumColumns(),inputs.get(1).getNumColumns()); // n*k
-                       else if(_outerProductType == 
OutProdType.RIGHT_OUTER_PRODUCT )
-                               
out.reset(inputs.get(0).getNumRows(),inputs.get(1).getNumColumns()); // m*k
+                       if( _outerProductType == OutProdType.LEFT_OUTER_PRODUCT 
)
+                               
out.reset(inputs.get(0).getNumColumns(),inputs.get(1).getNumColumns(), false); 
// n*k
+                       else if( _outerProductType == 
OutProdType.RIGHT_OUTER_PRODUCT )
+                               
out.reset(inputs.get(0).getNumRows(),inputs.get(1).getNumColumns(), false); // 
m*k
                        out.allocateDenseBlock();
                }       
                
@@ -247,6 +266,8 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                final int n = inputs.get(0).getNumColumns();    
                final int k = inputs.get(1).getNumColumns(); // rank
                
+               MatrixBlock a = inputs.get(0);
+               
                try 
                {                       
                        ExecutorService pool = 
Executors.newFixedThreadPool(numThreads);
@@ -255,14 +276,25 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                        //for wdivmm-right, parallelization over rows; both 
ensure disjoint results)
                        
                        if( _outerProductType == OutProdType.LEFT_OUTER_PRODUCT 
) {
-                               int blklen = 
(int)(Math.ceil((double)n/numThreads));
-                               for( int j=0; j<numThreads & j*blklen<n; j++ )
-                                       tasks.add(new 
ParExecTask(inputs.get(0), ab[0], ab[1], b, scalars, out, n, m, k, 
_outerProductType,  0, m, j*blklen, Math.min((j+1)*blklen, n)));
+                               if( a instanceof CompressedMatrixBlock ) {
+                                       //parallelize over column groups
+                                       int numCG = 
((CompressedMatrixBlock)a).getNumColGroups();
+                                       int blklen = 
(int)(Math.ceil((double)numCG/numThreads));
+                                       for( int j=0; j<numThreads & 
j*blklen<numCG; j++ )
+                                               tasks.add(new ParExecTask(a, 
ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType,  0, m, j*blklen, 
Math.min((j+1)*blklen, numCG)));
+                               }
+                               else {
+                                       //parallelize over column partitions
+                                       int blklen = 
(int)(Math.ceil((double)n/numThreads));
+                                       for( int j=0; j<numThreads & 
j*blklen<n; j++ )
+                                               tasks.add(new ParExecTask(a, 
ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType,  0, m, j*blklen, 
Math.min((j+1)*blklen, n)));
+                               }
                        }
-                       else { ///right // cellwise
+                       else { //right or cell-wise
+                               //parallelize over row partitions
                                int blklen = 
(int)(Math.ceil((double)m/numThreads));
                                for( int i=0; i<numThreads & i*blklen<m; i++ )
-                                       tasks.add(new 
ParExecTask(inputs.get(0), ab[0], ab[1], b, scalars, out, n, m, k, 
_outerProductType, i*blklen, Math.min((i+1)*blklen,m), 0, n));
+                                       tasks.add(new ParExecTask(a, ab[0], 
ab[1], b, scalars, out, m, n, k, _outerProductType, i*blklen, 
Math.min((i+1)*blklen,m), 0, n));
                        }
                        List<Future<Long>> taskret = pool.invokeAll(tasks);
                        pool.shutdown();
@@ -271,13 +303,20 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                } 
                catch (Exception e) {
                        throw new DMLRuntimeException(e);
-               } 
+               }
                
                //post-processing
+               if( a instanceof CompressedMatrixBlock ) { 
+                       if( out.isInSparseFormat() && _outerProductType == 
OutProdType.CELLWISE_OUTER_PRODUCT )
+                               out.sortSparseRows();
+                       else if( _outerProductType == 
OutProdType.LEFT_OUTER_PRODUCT )
+                               out.recomputeNonZeros();
+               }
                out.examSparsity();
        }
        
-       private void executeDense(double[] a, double[] u, double[] v, 
double[][] b, double[] scalars , double[] c, int n, int m, int k, OutProdType 
type, int rl, int ru, int cl, int cu ) 
+       private void executeDense(double[] a, double[] u, double[] v, 
double[][] b, double[] scalars, 
+               double[] c, int m, int n, int k, OutProdType type, int rl, int 
ru, int cl, int cu ) 
        {
                //approach: iterate over non-zeros of w, selective mm 
computation
                //cache-conscious blocking: due to blocksize constraint 
(default 1000),
@@ -296,12 +335,13 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                                        for( int j=bj, vix=bj*k; j<bjmin; j++, 
vix+=k)
                                                if( a[ix+j] != 0 ) {
                                                        cix = (type == 
OutProdType.LEFT_OUTER_PRODUCT) ? vix : uix;
-                                                       genexecDense( a[ix+j], 
u, uix, v, vix, b, scalars, c, cix, n, m, k, i,j);//(ix+j)/n, (ix+j)%n ); 
+                                                       genexecDense( a[ix+j], 
u, uix, v, vix, b, scalars, c, cix, m, n, k, i, j); 
                                                }
                        }
        }
        
-       private void executeCellwiseDense(double[] a, double[] u, double[] v, 
double[][] b, double[] scalars , double[] c, int n, int m, int k, OutProdType 
type, int rl, int ru, int cl, int cu ) 
+       private void executeCellwiseDense(double[] a, double[] u, double[] v, 
double[][] b, double[] scalars, 
+               double[] c, int m, int n, int k, OutProdType type, int rl, int 
ru, int cl, int cu ) 
        {
                //approach: iterate over non-zeros of w, selective mm 
computation
                //cache-conscious blocking: due to blocksize constraint 
(default 1000),
@@ -320,14 +360,15 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                                                if( a[ix+j] != 0 ) {
                                                        //int cix = (type == 
OutProdType.LEFT_OUTER_PRODUCT) ? vix : uix;
                                                        if(type == 
OutProdType.CELLWISE_OUTER_PRODUCT)
-                                                               c[ix+j] = 
genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, n, m, k, i, j ); 
+                                                               c[ix+j] = 
genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j ); 
                                                        else
-                                                               c[0]  += 
genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, n, m, k, i, j); // 
(ix+j)/n, (ix+j)%n );         
+                                                               c[0]  += 
genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j);  
                                                }
                        }
        }
        
-       private void executeSparse(SparseBlock sblock,  double[] u, double[] v, 
double[][] b, double[] scalars , double[] c, int n, int m, int k, int nnz, 
OutProdType type, int rl, int ru, int cl, int cu) 
+       private void executeSparse(SparseBlock sblock,  double[] u, double[] v, 
double[][] b, double[] scalars, 
+               double[] c, int m, int n, int k, long nnz, OutProdType type, 
int rl, int ru, int cl, int cu) 
        {
                boolean left = (_outerProductType== 
OutProdType.LEFT_OUTER_PRODUCT);
                
@@ -364,7 +405,7 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                                        int index = wpos + curk[i-bi];
                                        for( ; index<wpos+wlen && 
wix[index]<bjmin; index++ ) {
                                                genexecDense( wval[index], u, 
uix, v, wix[index]*k, b, scalars, c, 
-                                                               (left ? 
wix[index]*k : uix), n, m, k, i, wix[index] );
+                                                               (left ? 
wix[index]*k : uix), m, n, k, i, wix[index] );
                                        }
                                        curk[i-bi] = index - wpos;
                                }
@@ -372,7 +413,8 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                }
        }
        
-       private void executeCellwiseSparse(SparseBlock sblock, double[] u, 
double[] v, double[][] b, double[] scalars , MatrixBlock out, int n, int m, int 
k, long nnz, OutProdType type, int rl, int ru, int cl, int cu ) 
+       private void executeCellwiseSparse(SparseBlock sblock, double[] u, 
double[] v, double[][] b, double[] scalars, 
+               MatrixBlock out, int m, int n, int k, long nnz, OutProdType 
type, int rl, int ru, int cl, int cu ) 
        {
                final int blocksizeIJ = (int) (8L*m*n/nnz); 
                int[] curk = new int[blocksizeIJ];                      
@@ -396,9 +438,9 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                                                int index = wpos + curk[i-bi];
                                                for( ; index<wpos+wlen && 
wix[index]<bjmin; index++ ) {
                                                        if(type == 
OutProdType.CELLWISE_OUTER_PRODUCT)
-                                                               c[index] = 
genexecCellwise( wval[index], u, uix, v, wix[index]*k, b, scalars, n, m, k, i, 
wix[index] ); 
+                                                               c[wix[index]] = 
genexecCellwise( wval[index], u, uix, v, wix[index]*k, b, scalars, m, n, k, i, 
wix[index] ); 
                                                        else
-                                                               c[0] += 
genexecCellwise( wval[index], u, uix, v, wix[index]*k, b, scalars, n, m, k, i, 
wix[index]); // (ix+j)/n, (ix+j)%n );
+                                                               c[0] += 
genexecCellwise( wval[index], u, uix, v, wix[index]*k, b, scalars, m, n, k, i, 
wix[index]);
                                                }
                                                curk[i-bi] = index - wpos;
                                        }
@@ -423,8 +465,8 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                                                double[] wval = 
sblock.values(i);
                                                int index = wpos + curk[i-bi];
                                                for( ; index<wpos+wlen && 
wix[index]<bjmin; index++ ) {
-                                                       c.append(i, index, 
genexecCellwise( wval[index], u, uix, v, 
-                                                                       
wix[index]*k, b, scalars, n, m, k, i, wix[index] )); 
+                                                       c.append(i, wix[index], 
genexecCellwise( wval[index], u, uix, v, 
+                                                                       
wix[index]*k, b, scalars, m, n, k, i, wix[index] )); 
                                                }
                                                curk[i-bi] = index - wpos;
                                        }
@@ -432,10 +474,56 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                        }
                }
        }
+       
+       private void executeCompressed(CompressedMatrixBlock a, double[] u, 
double[] v, double[][] b, double[] scalars, 
+                       double[] c, int m, int n, int k, OutProdType type, int 
rl, int ru, int cl, int cu) 
+       {
+               boolean left = 
(_outerProductType==OutProdType.LEFT_OUTER_PRODUCT);
+               
+               Iterator<IJV> iter = !left ? a.getIterator(rl, ru, false) : 
+                       a.getIterator(rl, ru, cl, cu, false); //cl/cu -> 
colgroups
+               while( iter.hasNext() ) {
+                       IJV cell = iter.next();
+                       int uix = cell.getI() * k;
+                       int vix = cell.getJ() * k;
+                       genexecDense(cell.getV(), u, uix, v, vix, b, scalars, 
c, 
+                               left ? vix : uix, m, n, k, cell.getI(), 
cell.getJ());
+               }
+       }
+       
+       private void executeCellwiseCompressed(CompressedMatrixBlock a, 
double[] u, double[] v, double[][] b, double[] scalars, 
+               MatrixBlock out, int m, int n, int k, OutProdType type, int rl, 
int ru, int cl, int cu ) 
+       {                       
+               double[] c = out.getDenseBlock();
+               SparseBlock csblock = out.getSparseBlock();
+               
+               Iterator<IJV> iter = a.getIterator(rl, ru, false);
+               while( iter.hasNext() ) {
+                       IJV cell = iter.next();
+                       int uix = cell.getI() * k;
+                       int vix = cell.getJ() * k;
+                       if( type == OutProdType.CELLWISE_OUTER_PRODUCT ) {
+                               if( out.isInSparseFormat() ) {
+                                       csblock.allocate(cell.getI());
+                                       csblock.append(cell.getI(), 
cell.getJ(), 
+                                               genexecCellwise(cell.getV(), u, 
uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ()));
+                               }
+                               else {
+                                       c[cell.getI()*n+cell.getJ()] = 
+                                               genexecCellwise(cell.getV(), u, 
uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ());
+                               }
+                       }
+                       else {
+                               c[0] += genexecCellwise(cell.getV(), u, uix, v, 
vix, b, scalars, m, n, k, cell.getI(), cell.getJ());
+                       }
+               }
+       }
 
-       protected abstract void genexecDense( double a, double[] u, int ui, 
double[] v, int vi, double[][] b, double[] scalars , double[] c, int ci, int n, 
int m, int k, int rowIndex, int colIndex );
+       protected abstract void genexecDense( double a, double[] u, int ui, 
double[] v, int vi, double[][] b, 
+               double[] scalars, double[] c, int ci, int m, int n, int k, int 
rowIndex, int colIndex);
        
-       protected abstract double genexecCellwise( double a, double[] u, int 
ui, double[] v, int vi, double[][] b, double[] scalars , int n, int m, int k, 
int rowIndex, int colIndex);
+       protected abstract double genexecCellwise( double a, double[] u, int 
ui, double[] v, int vi, double[][] b, 
+               double[] scalars, int m, int n, int k, int rowIndex, int 
colIndex);
 
        private class ParExecTask implements Callable<Long> 
        {
@@ -454,15 +542,15 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                private final int _cl;
                private final int _cu;
                
-               protected ParExecTask( MatrixBlock a, double[] u, double[] v, 
double[][] b, double[] scalars , MatrixBlock c, int clen, int rlen, int k, 
OutProdType type, int rl, int ru, int cl, int cu ) {
+               protected ParExecTask( MatrixBlock a, double[] u, double[] v, 
double[][] b, double[] scalars , MatrixBlock c, int m, int n, int k, 
OutProdType type, int rl, int ru, int cl, int cu ) {
                        _a = a;
                        _u = u;
                        _v = v;
                        _b = b;
                        _c = c;
                        _scalars = scalars;
-                       _clen = clen;
-                       _rlen = rlen;
+                       _rlen = m;
+                       _clen = n;
                        _k = k;
                        _type = type;
                        _rl = rl;
@@ -477,24 +565,30 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                        {
                                case LEFT_OUTER_PRODUCT:        
                                case RIGHT_OUTER_PRODUCT:
-                                       if( !_a.isInSparseFormat() )
-                                               
executeDense(_a.getDenseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), 
_clen, _rlen, _k, _type, _rl, _ru, _cl, _cu);
+                                       if( _a instanceof CompressedMatrixBlock 
)
+                                               
executeCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, 
_c.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
+                                       else if( !_a.isInSparseFormat() )
+                                               
executeDense(_a.getDenseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), 
_rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
                                        else
-                                               
executeSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), 
_clen, _rlen, _k, (int) _a.getNonZeros(), _type,  _rl, _ru, _cl, _cu);
+                                               
executeSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), 
_rlen, _clen, _k, _a.getNonZeros(), _type,  _rl, _ru, _cl, _cu);
                                        break;
                                case CELLWISE_OUTER_PRODUCT:
-                                       if( !_c.isInSparseFormat() )
-                                               
executeCellwiseDense(_a.getDenseBlock(), _u, _v, _b, _scalars, 
_c.getDenseBlock(), _clen, _rlen, _k, _type, _rl, _ru, _cl, _cu);
+                                       if( _a instanceof CompressedMatrixBlock 
)
+                                               
executeCellwiseCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, _c, 
_rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
+                                       else if( !_c.isInSparseFormat() )
+                                               
executeCellwiseDense(_a.getDenseBlock(), _u, _v, _b, _scalars, 
_c.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
                                        else 
-                                               
executeCellwiseSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c, _clen, 
_rlen, _k, (int) _a.getNonZeros(), _type,  _rl, _ru, _cl, _cu);
+                                               
executeCellwiseSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c, _rlen, 
_clen, _k, _a.getNonZeros(), _type,  _rl, _ru, _cl, _cu);
                                        break;                  
                                case AGG_OUTER_PRODUCT:
                                        throw new DMLRuntimeException("Wrong 
codepath for aggregate outer product.");
                        }
                        
-                       int rl = _outerProductType == 
OutProdType.LEFT_OUTER_PRODUCT ? _cl : _rl;
-                       int ru = _outerProductType == 
OutProdType.LEFT_OUTER_PRODUCT ? _cu : _ru;
-                       return _c.recomputeNonZeros(rl, ru-1, 0, 
_c.getNumColumns()-1);
+                       boolean left = (_outerProductType == 
OutProdType.LEFT_OUTER_PRODUCT);
+                       int rl = left ? _cl : _rl;
+                       int ru = left ? _cu : _ru;
+                       return (_a instanceof CompressedMatrixBlock && left) ? 
-1 :
+                               _c.recomputeNonZeros(rl, ru-1, 0, 
_c.getNumColumns()-1);
                }
        }
        
@@ -505,8 +599,8 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                private final double[] _v;
                private final double[][] _b;
                private final double[] _scalars;
-               private final int _clen;
                private final int _rlen;
+               private final int _clen;
                private final int _k;
                private final OutProdType _type;
                private final int _rl;
@@ -514,14 +608,14 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                private final int _cl;
                private final int _cu;
                
-               protected ParOuterProdAggTask( MatrixBlock a, double[] u, 
double[] v, double[][] b, double[] scalars, int clen, int rlen, int k, 
OutProdType type, int rl, int ru, int cl, int cu ) {
+               protected ParOuterProdAggTask( MatrixBlock a, double[] u, 
double[] v, double[][] b, double[] scalars, int m, int n, int k, OutProdType 
type, int rl, int ru, int cl, int cu ) {
                        _a = a;
                        _u = u;
                        _v = v;
                        _b = b;
                        _scalars = scalars;
-                       _clen = clen;
-                       _rlen = rlen;
+                       _rlen = m;
+                       _clen = n;
                        _k = k;
                        _type = type;
                        _rl = rl;
@@ -534,10 +628,12 @@ public abstract class SpoofOuterProduct extends 
SpoofOperator
                public Double call() throws DMLRuntimeException {
                        MatrixBlock out = new MatrixBlock(1, 1, false);
                        out.allocateDenseBlock();
-                       if(!_a.isInSparseFormat())
-                               executeCellwiseDense(_a.getDenseBlock(), _u, 
_v, _b, _scalars, out.getDenseBlock(), _clen, _rlen, _k, _type, _rl, _ru, _cl, 
_cu);
+                       if( _a instanceof CompressedMatrixBlock )
+                               
executeCellwiseCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, out, 
_rlen, _clen, _k, _type, _rl, _ru, _cl, _cu);
+                       else if( !_a.isInSparseFormat() )
+                               executeCellwiseDense(_a.getDenseBlock(), _u, 
_v, _b, _scalars, out.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, 
_cu);
                        else
-                               executeCellwiseSparse(_a.getSparseBlock(), _u, 
_v, _b, _scalars, out, _clen, _rlen, _k, _a.getNonZeros(), _type, _rl, _ru, 
_cl, _cu);
+                               executeCellwiseSparse(_a.getSparseBlock(), _u, 
_v, _b, _scalars, out, _rlen, _clen, _k, _a.getNonZeros(), _type, _rl, _ru, 
_cl, _cu);
                        return out.getDenseBlock()[0];
                }
        }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java 
b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
index f100495..ac0b803 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java
@@ -798,6 +798,8 @@ public class ColGroupOLE extends ColGroupOffset
 
                @Override
                public Integer next() {
+                       if( !hasNext() )
+                               throw new RuntimeException("No more OLE 
entries.");
                        int ret = _rpos;
                        nextRowOffset();
                        return ret;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java 
b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java
index 184c69a..5fc4a5a 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java
@@ -451,7 +451,7 @@ public abstract class ColGroupOffset extends ColGroupValue
                        _rl = rl;
                        _ru = ru;
                        _inclZeros = inclZeros;
-                       _vpos = 0;
+                       _vpos = -1;
                        _rpos = -1;
                        _cpos = 0;
                        getNextValue();
@@ -461,7 +461,7 @@ public abstract class ColGroupOffset extends ColGroupValue
                public boolean hasNext() {
                        return (_rpos < _ru);
                }
-
+               
                @Override
                public IJV next() {
                        _buff.set(_rpos, _colIndexes[_cpos], (_vpos >= 
getNumValues()) ? 
@@ -472,27 +472,28 @@ public abstract class ColGroupOffset extends ColGroupValue
                
                private void getNextValue() {
                        //advance to next value iterator if required
-                       if( _viter == null ) {
-                               _viter = (getNumValues()>0) ? //first iterator
-                                       getIterator(_vpos, _rl, _ru) : new 
ZeroIterator(_rl, _ru);
-                       }
-                       else if( _viter instanceof ZeroIterator && 
!_viter.hasNext() ) {
+                       if(_viter != null && _viter instanceof ZeroIterator && 
!_viter.hasNext() ) {
                                _rpos = _ru; //end after zero iterator
                                return;
                        }
-                       else if( _cpos+1 >= getNumCols() && !_viter.hasNext() ) 
{
-                               _vpos++; //
-                               if( _vpos < getNumValues() )
-                                       _viter = getIterator(_vpos, _rl, _ru);
-                               else if( _inclZeros && _zeros)
-                                       _viter = new ZeroIterator(_rl, _ru);
-                               else
-                                       _rpos = _ru; //end w/o zero iterator
+                       else if( _cpos+1 >= getNumCols() && !(_viter!=null && 
_viter.hasNext()) ) {
+                               do {
+                                       _vpos++;
+                                       if( _vpos < getNumValues() )
+                                               _viter = getIterator(_vpos, 
_rl, _ru);
+                                       else if( _inclZeros && _zeros)
+                                               _viter = new ZeroIterator(_rl, 
_ru);
+                                       else {
+                                               _rpos = _ru; //end w/o zero 
iterator
+                                               return;
+                                       }
+                               }
+                               while(!_viter.hasNext());
                                _rpos = -1;
                        }
                        
-                       //get next value
-                       if( _rpos < 0 || _cpos+1 >= getNumCols()) {
+                       //get next value from valid iterator
+                       if( _rpos < 0 || _cpos+1 >= getNumCols() ) {
                                _rpos = _viter.next();
                                _cpos = 0;
                        }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java 
b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
index 7f0dcf9..5b1f804 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java
@@ -802,6 +802,8 @@ public class ColGroupRLE extends ColGroupOffset
 
                @Override
                public Integer next() {
+                       if( !hasNext() )
+                               throw new RuntimeException("No more RLE 
entries.");
                        int ret = _rpos;
                        nextRowOffset();
                        return ret;

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
index d3bdcca..0fbe608 100644
--- a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java
@@ -169,6 +169,10 @@ public class CompressedMatrixBlock extends MatrixBlock 
implements Externalizable
        public ArrayList<ColGroup> getColGroups() {
                return _colGroups;
        }
+       
+       public int getNumColGroups() {
+               return _colGroups.size();
+       }
 
        /**
         * Obtain whether this block is in compressed form or not.
@@ -802,7 +806,11 @@ public class CompressedMatrixBlock extends MatrixBlock 
implements Externalizable
        }
        
        public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros) {
-               return new ColumnGroupIterator(rl, ru, inclZeros);
+               return getIterator(rl, ru, 0, getNumColGroups(), inclZeros);
+       }
+       
+       public Iterator<IJV> getIterator(int rl, int ru, int cgl, int cgu, 
boolean inclZeros) {
+               return new ColumnGroupIterator(rl, ru, cgl, cgu, inclZeros);
        }
        
        //////////////////////////////////////////
@@ -2229,6 +2237,7 @@ public class CompressedMatrixBlock extends MatrixBlock 
implements Externalizable
                //iterator configuration 
                private final int _rl;
                private final int _ru;
+               private final int _cgu;
                private final boolean _inclZeros;
                
                //iterator state
@@ -2236,10 +2245,12 @@ public class CompressedMatrixBlock extends MatrixBlock 
implements Externalizable
                private Iterator<IJV> _iterColGroup = null;
                private boolean _noNext = false;
                
-               public ColumnGroupIterator(int rl, int ru, boolean inclZeros) {
+               public ColumnGroupIterator(int rl, int ru, int cgl, int cgu, 
boolean inclZeros) {
                        _rl = rl;
                        _ru = ru;
+                       _cgu = cgu;
                        _inclZeros = inclZeros;
+                       _posColGroup = cgl-1;
                        getNextIterator();
                }
 
@@ -2252,14 +2263,14 @@ public class CompressedMatrixBlock extends MatrixBlock 
implements Externalizable
                public IJV next() {
                        if( _noNext )
                                throw new RuntimeException("No more entries.");
-                       IJV ret = _iterColGroup.next(); 
+                       IJV ret = _iterColGroup.next();
                        if( !_iterColGroup.hasNext() )
                                getNextIterator();
                        return ret;
                }
                
                private void getNextIterator() {
-                       while( _posColGroup+1 < _colGroups.size() ) {
+                       while( _posColGroup+1 < _cgu ) {
                                _posColGroup++;
                                _iterColGroup = _colGroups.get(_posColGroup)
                                        .getIterator(_rl, _ru, _inclZeros);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java
index a94cdb2..417905d 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java
@@ -315,6 +315,7 @@ public class CompressedCellwiseTest extends 
AutomatedTestBase
                        OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = 
oldRewrites;
                        OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
                        OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+                       CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
                }
        }       
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedMultiAggregateTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedMultiAggregateTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedMultiAggregateTest.java
index 77b1294..4a16694 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedMultiAggregateTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedMultiAggregateTest.java
@@ -315,6 +315,7 @@ public class CompressedMultiAggregateTest extends 
AutomatedTestBase
                        OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = 
oldRewrites;
                        OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
                        OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+                       CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
                }
        }       
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedOuterProductTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedOuterProductTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedOuterProductTest.java
new file mode 100644
index 0000000..8c3a695
--- /dev/null
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedOuterProductTest.java
@@ -0,0 +1,267 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import java.io.File;
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class CompressedOuterProductTest extends AutomatedTestBase 
+{      
+       private static final String TEST_NAME1 = "CompressedOuterProductMain";
+       private static final String TEST_DIR = "functions/codegen/";
+       private static final String TEST_CLASS_DIR = TEST_DIR + 
CompressedOuterProductTest.class.getSimpleName() + "/";
+       private final static String TEST_CONF = 
"SystemML-config-codegen-compress.xml";
+       private final static File   TEST_CONF_FILE = new File(SCRIPT_DIR + 
TEST_DIR, TEST_CONF);
+       
+       private static final int rows = 2023;
+       private static final int cols = 1987;
+       private static final double sparsity1 = 0.9;
+       private static final double sparsity2 = 0.1;
+       private static final double sparsity3 = 0.0;
+       private static final double eps = Math.pow(10, -6);
+       
+       public enum SparsityType {
+               DENSE,
+               SPARSE,
+               EMPTY,
+       }
+       
+       public enum ValueType {
+               RAND, //UC
+               CONST, //RLE
+               RAND_ROUND_OLE, //OLE
+               RAND_ROUND_DDC, //RLE
+       }
+       
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration( TEST_NAME1, new 
TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "R" }) );
+       }
+               
+       @Test
+       public void testCompressedCellwiseMainDenseConstCP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, 
ValueType.CONST, ExecType.CP );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainDenseRandCP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, 
ValueType.RAND, ExecType.CP );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainDenseRand2CP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, 
ValueType.RAND_ROUND_DDC, ExecType.CP );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainDenseRand3CP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, 
ValueType.RAND_ROUND_OLE, ExecType.CP );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainSparseConstCP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, 
ValueType.CONST, ExecType.CP );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainSparseRandCP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, 
ValueType.RAND, ExecType.CP );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainSparseRand2CP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, 
ValueType.RAND_ROUND_DDC, ExecType.CP );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainSparseRand3CP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, 
ValueType.RAND_ROUND_OLE, ExecType.CP );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainEmptyConstCP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, 
ValueType.CONST, ExecType.CP );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainEmptyRandCP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, 
ValueType.RAND, ExecType.CP );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainEmptyRand2CP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, 
ValueType.RAND_ROUND_DDC, ExecType.CP );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainEmptyRand3CP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, 
ValueType.RAND_ROUND_OLE, ExecType.CP );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainDenseConstSP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, 
ValueType.CONST, ExecType.SPARK );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainDenseRandSP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, 
ValueType.RAND, ExecType.SPARK );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainDenseRand2SP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, 
ValueType.RAND_ROUND_DDC, ExecType.SPARK );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainDenseRand3SP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, 
ValueType.RAND_ROUND_OLE, ExecType.SPARK );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainSparseConstSP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, 
ValueType.CONST, ExecType.SPARK );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainSparseRandSP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, 
ValueType.RAND, ExecType.SPARK );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainSparseRand2SP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, 
ValueType.RAND_ROUND_DDC, ExecType.SPARK );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainSparseRand3SP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, 
ValueType.RAND_ROUND_OLE, ExecType.SPARK );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainEmptyConstSP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, 
ValueType.CONST, ExecType.SPARK );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainEmptyRandSP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, 
ValueType.RAND, ExecType.SPARK );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainEmptyRand2SP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, 
ValueType.RAND_ROUND_DDC, ExecType.SPARK );
+       }
+       
+       @Test
+       public void testCompressedCellwiseMainEmptyRand3SP() {
+               testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, 
ValueType.RAND_ROUND_OLE, ExecType.SPARK );
+       }
+       
+       private void testCompressedCellwise(String testname, SparsityType 
stype, ValueType vtype, ExecType et)
+       {       
+               boolean oldRewrites = 
OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+               RUNTIME_PLATFORM platformOld = rtplatform;
+               switch( et ){
+                       case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+                       case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+                       default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; 
break;
+               }
+       
+               boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+               if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == 
RUNTIME_PLATFORM.HYBRID_SPARK )
+                       DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+               
+               try
+               {
+                       OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = true;
+                       TestConfiguration config = 
getTestConfiguration(testname);
+                       loadTestConfiguration(config);
+                       
+                       String HOME = SCRIPT_DIR + TEST_DIR;
+                       fullDMLScriptName = HOME + testname + ".dml";
+                       programArgs = new String[]{"-explain", "-stats", 
+                                       "-args", input("X"), output("R") };
+                       
+                       fullRScriptName = HOME + testname + ".R";
+                       rCmd = getRCmd(inputDir(), expectedDir());              
        
+
+                       //generate input data
+                       double sparsity = -1;
+                       switch( stype ){
+                               case DENSE: sparsity = sparsity1; break;
+                               case SPARSE: sparsity = sparsity2; break;
+                               case EMPTY: sparsity = sparsity3; break;
+                       }
+                       
+                       //generate input data
+                       double min = (vtype==ValueType.CONST)? 10 : -10;
+                       double[][] X = TestUtils.generateTestMatrix(rows, cols, 
min, 10, sparsity, 7);
+                       if( vtype==ValueType.RAND_ROUND_OLE || 
vtype==ValueType.RAND_ROUND_DDC ) {
+                               CompressedMatrixBlock.ALLOW_DDC_ENCODING = 
(vtype==ValueType.RAND_ROUND_DDC);
+                               X = TestUtils.round(X);
+                       }
+                       writeInputMatrixWithMTD("X", X, true);
+                       
+                       //run tests
+                       runTest(true, false, null, -1); 
+                       runRScript(true); 
+                       
+                       //compare matrices 
+                       HashMap<CellIndex, Double> dmlfile = 
readDMLMatrixFromHDFS("R");
+                       HashMap<CellIndex, Double> rfile  = 
readRMatrixFromFS("R");     
+                       TestUtils.compareMatrices(dmlfile, rfile, eps, 
"Stat-DML", "Stat-R");
+                       
Assert.assertTrue(heavyHittersContainsSubString("spoofOP") 
+                               || heavyHittersContainsSubString("sp_spoofOP"));
+               }
+               finally {
+                       rtplatform = platformOld;
+                       DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+                       OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = 
oldRewrites;
+                       OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+                       OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+                       CompressedMatrixBlock.ALLOW_DDC_ENCODING = true;
+               }
+       }       
+
+       /**
+        * Override default configuration with custom test configuration to 
ensure
+        * scratch space and local temporary directory locations are also 
updated.
+        */
+       @Override
+       protected File getConfigTemplateFile() {
+               // Instrumentation in this test's output log to show custom 
configuration file used for template.
+               System.out.println("This test case overrides default 
configuration with " + TEST_CONF_FILE.getPath());
+               return TEST_CONF_FILE;
+       }
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/test/scripts/functions/codegen/CompressedOuterProductMain.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/CompressedOuterProductMain.R 
b/src/test/scripts/functions/codegen/CompressedOuterProductMain.R
new file mode 100644
index 0000000..e457645
--- /dev/null
+++ b/src/test/scripts/functions/codegen/CompressedOuterProductMain.R
@@ -0,0 +1,36 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+library("Matrix")
+library("matrixStats")
+
+X = readMM(paste(args[1], "X.mtx", sep=""));
+
+U = matrix(0.0001, nrow(X), 10);
+V = matrix(0.0001, ncol(X), 10);
+
+R1 = X * (7 + (U %*% t(V) + 3));
+R2 = (X * (7 + (U %*% t(V) + 3))) %*% V;
+R3 = t(U) %*% (X * (7 + (U %*% t(V) + 3)));
+R = as.matrix(sum(R1 * (7 + (R2 %*% R3 + 3))/1e6));
+
+writeMM(as(R,"CsparseMatrix"), paste(args[2], "R", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/test/scripts/functions/codegen/CompressedOuterProductMain.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/CompressedOuterProductMain.dml 
b/src/test/scripts/functions/codegen/CompressedOuterProductMain.dml
new file mode 100644
index 0000000..c7596e8
--- /dev/null
+++ b/src/test/scripts/functions/codegen/CompressedOuterProductMain.dml
@@ -0,0 +1,33 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($1)
+
+U = matrix(0.0001, nrow(X), 10);
+V = matrix(0.0001, ncol(X), 10);
+
+R1 = X * (7 + (U %*% t(V) + 3));
+R2 = (X * (7 + (U %*% t(V) + 3))) %*% V;
+R3 = t(U) %*% (X * (7 + (U %*% t(V) + 3)));
+if(1==1) {}
+R = as.matrix(sum(R1 * (7 + (R2 %*% R3 + 3))/1e6));
+
+write(R, $2)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
----------------------------------------------------------------------
diff --git 
a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
 
b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
index bc5c21f..ea8a1f1 100644
--- 
a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
+++ 
b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java
@@ -35,6 +35,7 @@ import org.junit.runners.Suite;
        CellwiseTmplTest.class,
        CompressedCellwiseTest.class,
        CompressedMultiAggregateTest.class,
+       CompressedOuterProductTest.class,
        DAGCellwiseTmplTest.class,
        MultiAggTmplTest.class,
        OuterProdTmplTest.class,

Reply via email to