[SYSTEMML-1289] Codegen outer product ops over compressed matrices This patch extends the codegen outer product template with support for compressed matrices. The basic approach is use sparse-safe column iterators to process all relevant values without decompression. For left outer product operations, we parallelize over column group partitions and extends the column group iterator accordingly.
Furthermore, this patch also includes a variety of smaller bug fixes: 1) Fix ColGroupOffset iterator (OLE, RLE) to handle the case of iterators without row indexes in a specified row range. 2) Fix result correctness of sparse cell-wise outer products (output indexing). 3) Fix outer product handling of empty input matrices. 4) Fix the unnecessary casting of the number of non-zeros to int, which could lead to truncation/overflows because it's represented as long. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/9dad2fe8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/9dad2fe8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/9dad2fe8 Branch: refs/heads/master Commit: 9dad2fe8b39d698b6b289955e249e27dc4e2b8a1 Parents: 8738121 Author: Matthias Boehm <mboe...@gmail.com> Authored: Mon May 29 22:24:11 2017 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Mon May 29 22:36:13 2017 -0700 ---------------------------------------------------------------------- .../sysml/hops/codegen/SpoofCompiler.java | 2 + .../hops/codegen/cplan/CNodeOuterProduct.java | 4 +- .../runtime/codegen/SpoofOuterProduct.java | 214 +++++++++++---- .../sysml/runtime/compress/ColGroupOLE.java | 2 + .../sysml/runtime/compress/ColGroupOffset.java | 35 +-- .../sysml/runtime/compress/ColGroupRLE.java | 2 + .../runtime/compress/CompressedMatrixBlock.java | 19 +- .../codegen/CompressedCellwiseTest.java | 1 + .../codegen/CompressedMultiAggregateTest.java | 1 + .../codegen/CompressedOuterProductTest.java | 267 +++++++++++++++++++ .../codegen/CompressedOuterProductMain.R | 36 +++ .../codegen/CompressedOuterProductMain.dml | 33 +++ .../functions/codegen/ZPackageSuite.java | 1 + 13 files changed, 535 insertions(+), 82 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java index d87c107..d96dda1 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java +++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java @@ -411,6 +411,8 @@ public class SpoofCompiler } } catch( Exception ex ) { + LOG.error("Codegen failed to optimize the following HOP DAG: \n" + + Explain.explainHops(roots)); throw new DMLRuntimeException(ex); } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java index 0dd6d62..aaf6f20 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeOuterProduct.java @@ -39,10 +39,10 @@ public class CNodeOuterProduct extends CNodeTpl + " public %TMP%() {\n" + " _outerProductType = OutProdType.%TYPE%;\n" + " }\n" - + " protected void genexecDense( double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, double[] c, int ci, int n, int m, int k, int rowIndex, int colIndex) { \n" + + " protected void genexecDense(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, double[] c, int ci, int m, int n, int k, int rowIndex, int colIndex) { \n" + "%BODY_dense%" + " }\n" - + " protected double genexecCellwise( double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, int n, int m, int k, int rowIndex, int colIndex) { \n" + + " protected double genexecCellwise(double a, double[] a1, int a1i, double[] a2, int a2i, double[][] b, double[] scalars, int m, int n, int k, int rowIndex, int colIndex) { \n" + "%BODY_cellwise%" + " return %OUT_cellwise%;\n" + " }\n" http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java index 64470ea..9d0203e 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java @@ -21,6 +21,7 @@ package org.apache.sysml.runtime.codegen; import java.util.ArrayList; import java.util.Arrays; +import java.util.Iterator; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; @@ -28,8 +29,10 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.compress.CompressedMatrixBlock; import org.apache.sysml.runtime.instructions.cp.DoubleObject; import org.apache.sysml.runtime.instructions.cp.ScalarObject; +import org.apache.sysml.runtime.matrix.data.IJV; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.SparseBlock; @@ -72,6 +75,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator //sanity check if( inputs==null || inputs.size() < 3 ) throw new RuntimeException("Invalid input arguments."); + if( inputs.get(0).isEmptyBlock(false) ) + return new DoubleObject(0); //input preparation double[][] ab = prepInputMatricesDense(inputs, 1, 2); @@ -87,10 +92,12 @@ public abstract class SpoofOuterProduct extends SpoofOperator MatrixBlock out = new MatrixBlock(1, 1, false); out.allocateDenseBlock(); - if(!a.isInSparseFormat()) - executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), n, m, k, _outerProductType, 0, m, 0, n); + if( a instanceof CompressedMatrixBlock ) + executeCellwiseCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, 0, n); + else if( !a.isInSparseFormat() ) + executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n); else - executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, n, m, k, (int) a.getNonZeros(), _outerProductType, 0, m, 0, n); + executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n); return new DoubleObject(out.getDenseBlock()[0]); } @@ -101,6 +108,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator //sanity check if( inputs==null || inputs.size() < 3 ) throw new RuntimeException("Invalid input arguments."); + if( inputs.get(0).isEmptyBlock(false) ) + return new DoubleObject(0); //input preparation double[][] ab = prepInputMatricesDense(inputs, 1, 2); @@ -114,14 +123,14 @@ public abstract class SpoofOuterProduct extends SpoofOperator double sum = 0; try - { + { ExecutorService pool = Executors.newFixedThreadPool(k); ArrayList<ParOuterProdAggTask> tasks = new ArrayList<ParOuterProdAggTask>(); //create tasks (for wdivmm-left, parallelization over columns; //for wdivmm-right, parallelization over rows; both ensure disjoint results) int blklen = (int)(Math.ceil((double)m/numThreads)); for( int i=0; i<numThreads & i*blklen<m; i++ ) - tasks.add(new ParOuterProdAggTask(inputs.get(0), ab[0], ab[1], b, scalars, n, m, k, _outerProductType, i*blklen, Math.min((i+1)*blklen,m), 0, n)); + tasks.add(new ParOuterProdAggTask(inputs.get(0), ab[0], ab[1], b, scalars, m, n, k, _outerProductType, i*blklen, Math.min((i+1)*blklen,m), 0, n)); //execute tasks List<Future<Double>> taskret = pool.invokeAll(tasks); pool.shutdown(); @@ -154,17 +163,20 @@ public abstract class SpoofOuterProduct extends SpoofOperator if(_outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT) { //assign it to the time and sparse representation of the major input matrix out.reset(inputs.get(0).getNumRows(), inputs.get(0).getNumColumns(), inputs.get(0).isInSparseFormat()); - out.allocateDenseOrSparseBlock(); } else { //if left outerproduct gives a value of k*n instead of n*k, change it back to n*k and then transpose the output //if(_outerProductType == OutProdType.LEFT_OUTER_PRODUCT && out.getNumRows() == inputs.get(2).getNumColumns() && out.getNumColumns() == inputs.get(2).getNumRows()) if(_outerProductType == OutProdType.LEFT_OUTER_PRODUCT ) - out.reset(inputs.get(0).getNumColumns(),inputs.get(1).getNumColumns()); // n*k + out.reset(inputs.get(0).getNumColumns(), inputs.get(1).getNumColumns(), false); // n*k else if(_outerProductType == OutProdType.RIGHT_OUTER_PRODUCT ) - out.reset(inputs.get(0).getNumRows(),inputs.get(1).getNumColumns()); // m*k - out.allocateDenseBlock(); - } + out.reset(inputs.get(0).getNumRows(), inputs.get(1).getNumColumns(), false); // m*k + } + + //check for empty inputs; otherwise allocate result + if( inputs.get(0).isEmptyBlock(false) ) + return; + out.allocateDenseOrSparseBlock(); //input preparation double[][] ab = prepInputMatricesDense(inputs, 1, 2); @@ -181,17 +193,21 @@ public abstract class SpoofOuterProduct extends SpoofOperator switch(_outerProductType) { case LEFT_OUTER_PRODUCT: case RIGHT_OUTER_PRODUCT: - if( !a.isInSparseFormat() ) - executeDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), n, m, k, _outerProductType, 0, m, 0, n); + if( a instanceof CompressedMatrixBlock ) + executeCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n); + else if( !a.isInSparseFormat() ) + executeDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n); else - executeSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), n, m, k, (int) a.getNonZeros(), _outerProductType, 0, m, 0, n); + executeSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n); break; case CELLWISE_OUTER_PRODUCT: - if( !a.isInSparseFormat() ) - executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), n, m, k, _outerProductType, 0, m, 0, n); + if( a instanceof CompressedMatrixBlock ) + executeCellwiseCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, 0, n); + else if( !a.isInSparseFormat() ) + executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n); else - executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, n, m, k, (int) a.getNonZeros(), _outerProductType, 0, m, 0, n); + executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n); break; case AGG_OUTER_PRODUCT: @@ -199,6 +215,9 @@ public abstract class SpoofOuterProduct extends SpoofOperator } //post-processing + if( a instanceof CompressedMatrixBlock && out.isInSparseFormat() + && _outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT ) + out.sortSparseRows(); out.recomputeNonZeros(); out.examSparsity(); } @@ -230,10 +249,10 @@ public abstract class SpoofOuterProduct extends SpoofOperator { //if left outerproduct gives a value of k*n instead of n*k, change it back to n*k and then transpose the output //if(_outerProductType == OutProdType.LEFT_OUTER_PRODUCT && out.getNumRows() == inputs.get(2).getNumColumns() && out.getNumColumns() == inputs.get(2).getNumRows()) - if(_outerProductType == OutProdType.LEFT_OUTER_PRODUCT ) - out.reset(inputs.get(0).getNumColumns(),inputs.get(1).getNumColumns()); // n*k - else if(_outerProductType == OutProdType.RIGHT_OUTER_PRODUCT ) - out.reset(inputs.get(0).getNumRows(),inputs.get(1).getNumColumns()); // m*k + if( _outerProductType == OutProdType.LEFT_OUTER_PRODUCT ) + out.reset(inputs.get(0).getNumColumns(),inputs.get(1).getNumColumns(), false); // n*k + else if( _outerProductType == OutProdType.RIGHT_OUTER_PRODUCT ) + out.reset(inputs.get(0).getNumRows(),inputs.get(1).getNumColumns(), false); // m*k out.allocateDenseBlock(); } @@ -247,6 +266,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator final int n = inputs.get(0).getNumColumns(); final int k = inputs.get(1).getNumColumns(); // rank + MatrixBlock a = inputs.get(0); + try { ExecutorService pool = Executors.newFixedThreadPool(numThreads); @@ -255,14 +276,25 @@ public abstract class SpoofOuterProduct extends SpoofOperator //for wdivmm-right, parallelization over rows; both ensure disjoint results) if( _outerProductType == OutProdType.LEFT_OUTER_PRODUCT ) { - int blklen = (int)(Math.ceil((double)n/numThreads)); - for( int j=0; j<numThreads & j*blklen<n; j++ ) - tasks.add(new ParExecTask(inputs.get(0), ab[0], ab[1], b, scalars, out, n, m, k, _outerProductType, 0, m, j*blklen, Math.min((j+1)*blklen, n))); + if( a instanceof CompressedMatrixBlock ) { + //parallelize over column groups + int numCG = ((CompressedMatrixBlock)a).getNumColGroups(); + int blklen = (int)(Math.ceil((double)numCG/numThreads)); + for( int j=0; j<numThreads & j*blklen<numCG; j++ ) + tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, j*blklen, Math.min((j+1)*blklen, numCG))); + } + else { + //parallelize over column partitions + int blklen = (int)(Math.ceil((double)n/numThreads)); + for( int j=0; j<numThreads & j*blklen<n; j++ ) + tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, j*blklen, Math.min((j+1)*blklen, n))); + } } - else { ///right // cellwise + else { //right or cell-wise + //parallelize over row partitions int blklen = (int)(Math.ceil((double)m/numThreads)); for( int i=0; i<numThreads & i*blklen<m; i++ ) - tasks.add(new ParExecTask(inputs.get(0), ab[0], ab[1], b, scalars, out, n, m, k, _outerProductType, i*blklen, Math.min((i+1)*blklen,m), 0, n)); + tasks.add(new ParExecTask(a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, i*blklen, Math.min((i+1)*blklen,m), 0, n)); } List<Future<Long>> taskret = pool.invokeAll(tasks); pool.shutdown(); @@ -271,13 +303,20 @@ public abstract class SpoofOuterProduct extends SpoofOperator } catch (Exception e) { throw new DMLRuntimeException(e); - } + } //post-processing + if( a instanceof CompressedMatrixBlock ) { + if( out.isInSparseFormat() && _outerProductType == OutProdType.CELLWISE_OUTER_PRODUCT ) + out.sortSparseRows(); + else if( _outerProductType == OutProdType.LEFT_OUTER_PRODUCT ) + out.recomputeNonZeros(); + } out.examSparsity(); } - private void executeDense(double[] a, double[] u, double[] v, double[][] b, double[] scalars , double[] c, int n, int m, int k, OutProdType type, int rl, int ru, int cl, int cu ) + private void executeDense(double[] a, double[] u, double[] v, double[][] b, double[] scalars, + double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) { //approach: iterate over non-zeros of w, selective mm computation //cache-conscious blocking: due to blocksize constraint (default 1000), @@ -296,12 +335,13 @@ public abstract class SpoofOuterProduct extends SpoofOperator for( int j=bj, vix=bj*k; j<bjmin; j++, vix+=k) if( a[ix+j] != 0 ) { cix = (type == OutProdType.LEFT_OUTER_PRODUCT) ? vix : uix; - genexecDense( a[ix+j], u, uix, v, vix, b, scalars, c, cix, n, m, k, i,j);//(ix+j)/n, (ix+j)%n ); + genexecDense( a[ix+j], u, uix, v, vix, b, scalars, c, cix, m, n, k, i, j); } } } - private void executeCellwiseDense(double[] a, double[] u, double[] v, double[][] b, double[] scalars , double[] c, int n, int m, int k, OutProdType type, int rl, int ru, int cl, int cu ) + private void executeCellwiseDense(double[] a, double[] u, double[] v, double[][] b, double[] scalars, + double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) { //approach: iterate over non-zeros of w, selective mm computation //cache-conscious blocking: due to blocksize constraint (default 1000), @@ -320,14 +360,15 @@ public abstract class SpoofOuterProduct extends SpoofOperator if( a[ix+j] != 0 ) { //int cix = (type == OutProdType.LEFT_OUTER_PRODUCT) ? vix : uix; if(type == OutProdType.CELLWISE_OUTER_PRODUCT) - c[ix+j] = genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, n, m, k, i, j ); + c[ix+j] = genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j ); else - c[0] += genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, n, m, k, i, j); // (ix+j)/n, (ix+j)%n ); + c[0] += genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j); } } } - private void executeSparse(SparseBlock sblock, double[] u, double[] v, double[][] b, double[] scalars , double[] c, int n, int m, int k, int nnz, OutProdType type, int rl, int ru, int cl, int cu) + private void executeSparse(SparseBlock sblock, double[] u, double[] v, double[][] b, double[] scalars, + double[] c, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu) { boolean left = (_outerProductType== OutProdType.LEFT_OUTER_PRODUCT); @@ -364,7 +405,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator int index = wpos + curk[i-bi]; for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) { genexecDense( wval[index], u, uix, v, wix[index]*k, b, scalars, c, - (left ? wix[index]*k : uix), n, m, k, i, wix[index] ); + (left ? wix[index]*k : uix), m, n, k, i, wix[index] ); } curk[i-bi] = index - wpos; } @@ -372,7 +413,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator } } - private void executeCellwiseSparse(SparseBlock sblock, double[] u, double[] v, double[][] b, double[] scalars , MatrixBlock out, int n, int m, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu ) + private void executeCellwiseSparse(SparseBlock sblock, double[] u, double[] v, double[][] b, double[] scalars, + MatrixBlock out, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu ) { final int blocksizeIJ = (int) (8L*m*n/nnz); int[] curk = new int[blocksizeIJ]; @@ -396,9 +438,9 @@ public abstract class SpoofOuterProduct extends SpoofOperator int index = wpos + curk[i-bi]; for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) { if(type == OutProdType.CELLWISE_OUTER_PRODUCT) - c[index] = genexecCellwise( wval[index], u, uix, v, wix[index]*k, b, scalars, n, m, k, i, wix[index] ); + c[wix[index]] = genexecCellwise( wval[index], u, uix, v, wix[index]*k, b, scalars, m, n, k, i, wix[index] ); else - c[0] += genexecCellwise( wval[index], u, uix, v, wix[index]*k, b, scalars, n, m, k, i, wix[index]); // (ix+j)/n, (ix+j)%n ); + c[0] += genexecCellwise( wval[index], u, uix, v, wix[index]*k, b, scalars, m, n, k, i, wix[index]); } curk[i-bi] = index - wpos; } @@ -423,8 +465,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator double[] wval = sblock.values(i); int index = wpos + curk[i-bi]; for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) { - c.append(i, index, genexecCellwise( wval[index], u, uix, v, - wix[index]*k, b, scalars, n, m, k, i, wix[index] )); + c.append(i, wix[index], genexecCellwise( wval[index], u, uix, v, + wix[index]*k, b, scalars, m, n, k, i, wix[index] )); } curk[i-bi] = index - wpos; } @@ -432,10 +474,56 @@ public abstract class SpoofOuterProduct extends SpoofOperator } } } + + private void executeCompressed(CompressedMatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars, + double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu) + { + boolean left = (_outerProductType==OutProdType.LEFT_OUTER_PRODUCT); + + Iterator<IJV> iter = !left ? a.getIterator(rl, ru, false) : + a.getIterator(rl, ru, cl, cu, false); //cl/cu -> colgroups + while( iter.hasNext() ) { + IJV cell = iter.next(); + int uix = cell.getI() * k; + int vix = cell.getJ() * k; + genexecDense(cell.getV(), u, uix, v, vix, b, scalars, c, + left ? vix : uix, m, n, k, cell.getI(), cell.getJ()); + } + } + + private void executeCellwiseCompressed(CompressedMatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars, + MatrixBlock out, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) + { + double[] c = out.getDenseBlock(); + SparseBlock csblock = out.getSparseBlock(); + + Iterator<IJV> iter = a.getIterator(rl, ru, false); + while( iter.hasNext() ) { + IJV cell = iter.next(); + int uix = cell.getI() * k; + int vix = cell.getJ() * k; + if( type == OutProdType.CELLWISE_OUTER_PRODUCT ) { + if( out.isInSparseFormat() ) { + csblock.allocate(cell.getI()); + csblock.append(cell.getI(), cell.getJ(), + genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ())); + } + else { + c[cell.getI()*n+cell.getJ()] = + genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ()); + } + } + else { + c[0] += genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ()); + } + } + } - protected abstract void genexecDense( double a, double[] u, int ui, double[] v, int vi, double[][] b, double[] scalars , double[] c, int ci, int n, int m, int k, int rowIndex, int colIndex ); + protected abstract void genexecDense( double a, double[] u, int ui, double[] v, int vi, double[][] b, + double[] scalars, double[] c, int ci, int m, int n, int k, int rowIndex, int colIndex); - protected abstract double genexecCellwise( double a, double[] u, int ui, double[] v, int vi, double[][] b, double[] scalars , int n, int m, int k, int rowIndex, int colIndex); + protected abstract double genexecCellwise( double a, double[] u, int ui, double[] v, int vi, double[][] b, + double[] scalars, int m, int n, int k, int rowIndex, int colIndex); private class ParExecTask implements Callable<Long> { @@ -454,15 +542,15 @@ public abstract class SpoofOuterProduct extends SpoofOperator private final int _cl; private final int _cu; - protected ParExecTask( MatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars , MatrixBlock c, int clen, int rlen, int k, OutProdType type, int rl, int ru, int cl, int cu ) { + protected ParExecTask( MatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars , MatrixBlock c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) { _a = a; _u = u; _v = v; _b = b; _c = c; _scalars = scalars; - _clen = clen; - _rlen = rlen; + _rlen = m; + _clen = n; _k = k; _type = type; _rl = rl; @@ -477,24 +565,30 @@ public abstract class SpoofOuterProduct extends SpoofOperator { case LEFT_OUTER_PRODUCT: case RIGHT_OUTER_PRODUCT: - if( !_a.isInSparseFormat() ) - executeDense(_a.getDenseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), _clen, _rlen, _k, _type, _rl, _ru, _cl, _cu); + if( _a instanceof CompressedMatrixBlock ) + executeCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, _c.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); + else if( !_a.isInSparseFormat() ) + executeDense(_a.getDenseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); else - executeSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), _clen, _rlen, _k, (int) _a.getNonZeros(), _type, _rl, _ru, _cl, _cu); + executeSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), _rlen, _clen, _k, _a.getNonZeros(), _type, _rl, _ru, _cl, _cu); break; case CELLWISE_OUTER_PRODUCT: - if( !_c.isInSparseFormat() ) - executeCellwiseDense(_a.getDenseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), _clen, _rlen, _k, _type, _rl, _ru, _cl, _cu); + if( _a instanceof CompressedMatrixBlock ) + executeCellwiseCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, _c, _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); + else if( !_c.isInSparseFormat() ) + executeCellwiseDense(_a.getDenseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); else - executeCellwiseSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c, _clen, _rlen, _k, (int) _a.getNonZeros(), _type, _rl, _ru, _cl, _cu); + executeCellwiseSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c, _rlen, _clen, _k, _a.getNonZeros(), _type, _rl, _ru, _cl, _cu); break; case AGG_OUTER_PRODUCT: throw new DMLRuntimeException("Wrong codepath for aggregate outer product."); } - int rl = _outerProductType == OutProdType.LEFT_OUTER_PRODUCT ? _cl : _rl; - int ru = _outerProductType == OutProdType.LEFT_OUTER_PRODUCT ? _cu : _ru; - return _c.recomputeNonZeros(rl, ru-1, 0, _c.getNumColumns()-1); + boolean left = (_outerProductType == OutProdType.LEFT_OUTER_PRODUCT); + int rl = left ? _cl : _rl; + int ru = left ? _cu : _ru; + return (_a instanceof CompressedMatrixBlock && left) ? -1 : + _c.recomputeNonZeros(rl, ru-1, 0, _c.getNumColumns()-1); } } @@ -505,8 +599,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator private final double[] _v; private final double[][] _b; private final double[] _scalars; - private final int _clen; private final int _rlen; + private final int _clen; private final int _k; private final OutProdType _type; private final int _rl; @@ -514,14 +608,14 @@ public abstract class SpoofOuterProduct extends SpoofOperator private final int _cl; private final int _cu; - protected ParOuterProdAggTask( MatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars, int clen, int rlen, int k, OutProdType type, int rl, int ru, int cl, int cu ) { + protected ParOuterProdAggTask( MatrixBlock a, double[] u, double[] v, double[][] b, double[] scalars, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) { _a = a; _u = u; _v = v; _b = b; _scalars = scalars; - _clen = clen; - _rlen = rlen; + _rlen = m; + _clen = n; _k = k; _type = type; _rl = rl; @@ -534,10 +628,12 @@ public abstract class SpoofOuterProduct extends SpoofOperator public Double call() throws DMLRuntimeException { MatrixBlock out = new MatrixBlock(1, 1, false); out.allocateDenseBlock(); - if(!_a.isInSparseFormat()) - executeCellwiseDense(_a.getDenseBlock(), _u, _v, _b, _scalars, out.getDenseBlock(), _clen, _rlen, _k, _type, _rl, _ru, _cl, _cu); + if( _a instanceof CompressedMatrixBlock ) + executeCellwiseCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, out, _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); + else if( !_a.isInSparseFormat() ) + executeCellwiseDense(_a.getDenseBlock(), _u, _v, _b, _scalars, out.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); else - executeCellwiseSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, out, _clen, _rlen, _k, _a.getNonZeros(), _type, _rl, _ru, _cl, _cu); + executeCellwiseSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, out, _rlen, _clen, _k, _a.getNonZeros(), _type, _rl, _ru, _cl, _cu); return out.getDenseBlock()[0]; } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java index f100495..ac0b803 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java @@ -798,6 +798,8 @@ public class ColGroupOLE extends ColGroupOffset @Override public Integer next() { + if( !hasNext() ) + throw new RuntimeException("No more OLE entries."); int ret = _rpos; nextRowOffset(); return ret; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java index 184c69a..5fc4a5a 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOffset.java @@ -451,7 +451,7 @@ public abstract class ColGroupOffset extends ColGroupValue _rl = rl; _ru = ru; _inclZeros = inclZeros; - _vpos = 0; + _vpos = -1; _rpos = -1; _cpos = 0; getNextValue(); @@ -461,7 +461,7 @@ public abstract class ColGroupOffset extends ColGroupValue public boolean hasNext() { return (_rpos < _ru); } - + @Override public IJV next() { _buff.set(_rpos, _colIndexes[_cpos], (_vpos >= getNumValues()) ? @@ -472,27 +472,28 @@ public abstract class ColGroupOffset extends ColGroupValue private void getNextValue() { //advance to next value iterator if required - if( _viter == null ) { - _viter = (getNumValues()>0) ? //first iterator - getIterator(_vpos, _rl, _ru) : new ZeroIterator(_rl, _ru); - } - else if( _viter instanceof ZeroIterator && !_viter.hasNext() ) { + if(_viter != null && _viter instanceof ZeroIterator && !_viter.hasNext() ) { _rpos = _ru; //end after zero iterator return; } - else if( _cpos+1 >= getNumCols() && !_viter.hasNext() ) { - _vpos++; // - if( _vpos < getNumValues() ) - _viter = getIterator(_vpos, _rl, _ru); - else if( _inclZeros && _zeros) - _viter = new ZeroIterator(_rl, _ru); - else - _rpos = _ru; //end w/o zero iterator + else if( _cpos+1 >= getNumCols() && !(_viter!=null && _viter.hasNext()) ) { + do { + _vpos++; + if( _vpos < getNumValues() ) + _viter = getIterator(_vpos, _rl, _ru); + else if( _inclZeros && _zeros) + _viter = new ZeroIterator(_rl, _ru); + else { + _rpos = _ru; //end w/o zero iterator + return; + } + } + while(!_viter.hasNext()); _rpos = -1; } - //get next value - if( _rpos < 0 || _cpos+1 >= getNumCols()) { + //get next value from valid iterator + if( _rpos < 0 || _cpos+1 >= getNumCols() ) { _rpos = _viter.next(); _cpos = 0; } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java index 7f0dcf9..5b1f804 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupRLE.java @@ -802,6 +802,8 @@ public class ColGroupRLE extends ColGroupOffset @Override public Integer next() { + if( !hasNext() ) + throw new RuntimeException("No more RLE entries."); int ret = _rpos; nextRowOffset(); return ret; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java index d3bdcca..0fbe608 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/compress/CompressedMatrixBlock.java @@ -169,6 +169,10 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable public ArrayList<ColGroup> getColGroups() { return _colGroups; } + + public int getNumColGroups() { + return _colGroups.size(); + } /** * Obtain whether this block is in compressed form or not. @@ -802,7 +806,11 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable } public Iterator<IJV> getIterator(int rl, int ru, boolean inclZeros) { - return new ColumnGroupIterator(rl, ru, inclZeros); + return getIterator(rl, ru, 0, getNumColGroups(), inclZeros); + } + + public Iterator<IJV> getIterator(int rl, int ru, int cgl, int cgu, boolean inclZeros) { + return new ColumnGroupIterator(rl, ru, cgl, cgu, inclZeros); } ////////////////////////////////////////// @@ -2229,6 +2237,7 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable //iterator configuration private final int _rl; private final int _ru; + private final int _cgu; private final boolean _inclZeros; //iterator state @@ -2236,10 +2245,12 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable private Iterator<IJV> _iterColGroup = null; private boolean _noNext = false; - public ColumnGroupIterator(int rl, int ru, boolean inclZeros) { + public ColumnGroupIterator(int rl, int ru, int cgl, int cgu, boolean inclZeros) { _rl = rl; _ru = ru; + _cgu = cgu; _inclZeros = inclZeros; + _posColGroup = cgl-1; getNextIterator(); } @@ -2252,14 +2263,14 @@ public class CompressedMatrixBlock extends MatrixBlock implements Externalizable public IJV next() { if( _noNext ) throw new RuntimeException("No more entries."); - IJV ret = _iterColGroup.next(); + IJV ret = _iterColGroup.next(); if( !_iterColGroup.hasNext() ) getNextIterator(); return ret; } private void getNextIterator() { - while( _posColGroup+1 < _colGroups.size() ) { + while( _posColGroup+1 < _cgu ) { _posColGroup++; _iterColGroup = _colGroups.get(_posColGroup) .getIterator(_rl, _ru, _inclZeros); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java index a94cdb2..417905d 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedCellwiseTest.java @@ -315,6 +315,7 @@ public class CompressedCellwiseTest extends AutomatedTestBase OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldRewrites; OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true; OptimizerUtils.ALLOW_OPERATOR_FUSION = true; + CompressedMatrixBlock.ALLOW_DDC_ENCODING = true; } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedMultiAggregateTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedMultiAggregateTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedMultiAggregateTest.java index 77b1294..4a16694 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedMultiAggregateTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedMultiAggregateTest.java @@ -315,6 +315,7 @@ public class CompressedMultiAggregateTest extends AutomatedTestBase OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldRewrites; OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true; OptimizerUtils.ALLOW_OPERATOR_FUSION = true; + CompressedMatrixBlock.ALLOW_DDC_ENCODING = true; } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedOuterProductTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedOuterProductTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedOuterProductTest.java new file mode 100644 index 0000000..8c3a695 --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CompressedOuterProductTest.java @@ -0,0 +1,267 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.codegen; + +import java.io.File; +import java.util.HashMap; + +import org.junit.Assert; +import org.junit.Test; +import org.apache.sysml.api.DMLScript; +import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM; +import org.apache.sysml.hops.OptimizerUtils; +import org.apache.sysml.lops.LopProperties.ExecType; +import org.apache.sysml.runtime.compress.CompressedMatrixBlock; +import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.integration.TestConfiguration; +import org.apache.sysml.test.utils.TestUtils; + +public class CompressedOuterProductTest extends AutomatedTestBase +{ + private static final String TEST_NAME1 = "CompressedOuterProductMain"; + private static final String TEST_DIR = "functions/codegen/"; + private static final String TEST_CLASS_DIR = TEST_DIR + CompressedOuterProductTest.class.getSimpleName() + "/"; + private final static String TEST_CONF = "SystemML-config-codegen-compress.xml"; + private final static File TEST_CONF_FILE = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF); + + private static final int rows = 2023; + private static final int cols = 1987; + private static final double sparsity1 = 0.9; + private static final double sparsity2 = 0.1; + private static final double sparsity3 = 0.0; + private static final double eps = Math.pow(10, -6); + + public enum SparsityType { + DENSE, + SPARSE, + EMPTY, + } + + public enum ValueType { + RAND, //UC + CONST, //RLE + RAND_ROUND_OLE, //OLE + RAND_ROUND_DDC, //RLE + } + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + addTestConfiguration( TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "R" }) ); + } + + @Test + public void testCompressedCellwiseMainDenseConstCP() { + testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, ValueType.CONST, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseMainDenseRandCP() { + testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, ValueType.RAND, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseMainDenseRand2CP() { + testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, ValueType.RAND_ROUND_DDC, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseMainDenseRand3CP() { + testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseMainSparseConstCP() { + testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, ValueType.CONST, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseMainSparseRandCP() { + testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, ValueType.RAND, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseMainSparseRand2CP() { + testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseMainSparseRand3CP() { + testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseMainEmptyConstCP() { + testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, ValueType.CONST, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseMainEmptyRandCP() { + testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, ValueType.RAND, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseMainEmptyRand2CP() { + testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, ValueType.RAND_ROUND_DDC, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseMainEmptyRand3CP() { + testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, ValueType.RAND_ROUND_OLE, ExecType.CP ); + } + + @Test + public void testCompressedCellwiseMainDenseConstSP() { + testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, ValueType.CONST, ExecType.SPARK ); + } + + @Test + public void testCompressedCellwiseMainDenseRandSP() { + testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, ValueType.RAND, ExecType.SPARK ); + } + + @Test + public void testCompressedCellwiseMainDenseRand2SP() { + testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, ValueType.RAND_ROUND_DDC, ExecType.SPARK ); + } + + @Test + public void testCompressedCellwiseMainDenseRand3SP() { + testCompressedCellwise( TEST_NAME1, SparsityType.DENSE, ValueType.RAND_ROUND_OLE, ExecType.SPARK ); + } + + @Test + public void testCompressedCellwiseMainSparseConstSP() { + testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, ValueType.CONST, ExecType.SPARK ); + } + + @Test + public void testCompressedCellwiseMainSparseRandSP() { + testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, ValueType.RAND, ExecType.SPARK ); + } + + @Test + public void testCompressedCellwiseMainSparseRand2SP() { + testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, ValueType.RAND_ROUND_DDC, ExecType.SPARK ); + } + + @Test + public void testCompressedCellwiseMainSparseRand3SP() { + testCompressedCellwise( TEST_NAME1, SparsityType.SPARSE, ValueType.RAND_ROUND_OLE, ExecType.SPARK ); + } + + @Test + public void testCompressedCellwiseMainEmptyConstSP() { + testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, ValueType.CONST, ExecType.SPARK ); + } + + @Test + public void testCompressedCellwiseMainEmptyRandSP() { + testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, ValueType.RAND, ExecType.SPARK ); + } + + @Test + public void testCompressedCellwiseMainEmptyRand2SP() { + testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, ValueType.RAND_ROUND_DDC, ExecType.SPARK ); + } + + @Test + public void testCompressedCellwiseMainEmptyRand3SP() { + testCompressedCellwise( TEST_NAME1, SparsityType.EMPTY, ValueType.RAND_ROUND_OLE, ExecType.SPARK ); + } + + private void testCompressedCellwise(String testname, SparsityType stype, ValueType vtype, ExecType et) + { + boolean oldRewrites = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION; + RUNTIME_PLATFORM platformOld = rtplatform; + switch( et ){ + case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break; + case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break; + default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break; + } + + boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; + if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK ) + DMLScript.USE_LOCAL_SPARK_CONFIG = true; + + try + { + OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = true; + TestConfiguration config = getTestConfiguration(testname); + loadTestConfiguration(config); + + String HOME = SCRIPT_DIR + TEST_DIR; + fullDMLScriptName = HOME + testname + ".dml"; + programArgs = new String[]{"-explain", "-stats", + "-args", input("X"), output("R") }; + + fullRScriptName = HOME + testname + ".R"; + rCmd = getRCmd(inputDir(), expectedDir()); + + //generate input data + double sparsity = -1; + switch( stype ){ + case DENSE: sparsity = sparsity1; break; + case SPARSE: sparsity = sparsity2; break; + case EMPTY: sparsity = sparsity3; break; + } + + //generate input data + double min = (vtype==ValueType.CONST)? 10 : -10; + double[][] X = TestUtils.generateTestMatrix(rows, cols, min, 10, sparsity, 7); + if( vtype==ValueType.RAND_ROUND_OLE || vtype==ValueType.RAND_ROUND_DDC ) { + CompressedMatrixBlock.ALLOW_DDC_ENCODING = (vtype==ValueType.RAND_ROUND_DDC); + X = TestUtils.round(X); + } + writeInputMatrixWithMTD("X", X, true); + + //run tests + runTest(true, false, null, -1); + runRScript(true); + + //compare matrices + HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("R"); + HashMap<CellIndex, Double> rfile = readRMatrixFromFS("R"); + TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R"); + Assert.assertTrue(heavyHittersContainsSubString("spoofOP") + || heavyHittersContainsSubString("sp_spoofOP")); + } + finally { + rtplatform = platformOld; + DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; + OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldRewrites; + OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true; + OptimizerUtils.ALLOW_OPERATOR_FUSION = true; + CompressedMatrixBlock.ALLOW_DDC_ENCODING = true; + } + } + + /** + * Override default configuration with custom test configuration to ensure + * scratch space and local temporary directory locations are also updated. + */ + @Override + protected File getConfigTemplateFile() { + // Instrumentation in this test's output log to show custom configuration file used for template. + System.out.println("This test case overrides default configuration with " + TEST_CONF_FILE.getPath()); + return TEST_CONF_FILE; + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/test/scripts/functions/codegen/CompressedOuterProductMain.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegen/CompressedOuterProductMain.R b/src/test/scripts/functions/codegen/CompressedOuterProductMain.R new file mode 100644 index 0000000..e457645 --- /dev/null +++ b/src/test/scripts/functions/codegen/CompressedOuterProductMain.R @@ -0,0 +1,36 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +args <- commandArgs(TRUE) +library("Matrix") +library("matrixStats") + +X = readMM(paste(args[1], "X.mtx", sep="")); + +U = matrix(0.0001, nrow(X), 10); +V = matrix(0.0001, ncol(X), 10); + +R1 = X * (7 + (U %*% t(V) + 3)); +R2 = (X * (7 + (U %*% t(V) + 3))) %*% V; +R3 = t(U) %*% (X * (7 + (U %*% t(V) + 3))); +R = as.matrix(sum(R1 * (7 + (R2 %*% R3 + 3))/1e6)); + +writeMM(as(R,"CsparseMatrix"), paste(args[2], "R", sep="")); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/test/scripts/functions/codegen/CompressedOuterProductMain.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/codegen/CompressedOuterProductMain.dml b/src/test/scripts/functions/codegen/CompressedOuterProductMain.dml new file mode 100644 index 0000000..c7596e8 --- /dev/null +++ b/src/test/scripts/functions/codegen/CompressedOuterProductMain.dml @@ -0,0 +1,33 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +X = read($1) + +U = matrix(0.0001, nrow(X), 10); +V = matrix(0.0001, ncol(X), 10); + +R1 = X * (7 + (U %*% t(V) + 3)); +R2 = (X * (7 + (U %*% t(V) + 3))) %*% V; +R3 = t(U) %*% (X * (7 + (U %*% t(V) + 3))); +if(1==1) {} +R = as.matrix(sum(R1 * (7 + (R2 %*% R3 + 3))/1e6)); + +write(R, $2) http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/9dad2fe8/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java ---------------------------------------------------------------------- diff --git a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java index bc5c21f..ea8a1f1 100644 --- a/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java +++ b/src/test_suites/java/org/apache/sysml/test/integration/functions/codegen/ZPackageSuite.java @@ -35,6 +35,7 @@ import org.junit.runners.Suite; CellwiseTmplTest.class, CompressedCellwiseTest.class, CompressedMultiAggregateTest.class, + CompressedOuterProductTest.class, DAGCellwiseTmplTest.class, MultiAggTmplTest.class, OuterProdTmplTest.class,