Repository: systemml Updated Branches: refs/heads/master b9b273d87 -> 1f0745c5c
[SYSTEMML-2074] Fix codegen row operations over large dense blocks This patch fixes failing codegen row operations over large (i.e., multi-chunk) dense blocks for inputs and outputs. In detail, this entails a slightly modified api of generated operators and related implementation changes to also handle scenarios where large dense input and output blocks are not aligned (e.g., with different ncol). Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ecfec126 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ecfec126 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ecfec126 Branch: refs/heads/master Commit: ecfec12606c0465a23b2a34b5cc7f3880f5c6d2a Parents: b9b273d Author: Matthias Boehm <[email protected]> Authored: Fri Jan 19 14:36:36 2018 -0800 Committer: Matthias Boehm <[email protected]> Committed: Fri Jan 19 17:55:07 2018 -0800 ---------------------------------------------------------------------- .../sysml/hops/codegen/cplan/CNodeRow.java | 8 +-- .../sysml/runtime/codegen/SpoofRowwise.java | 59 +++++++++----------- 2 files changed, 29 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/ecfec126/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java index 1a89570..e6aa53a 100644 --- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java +++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java @@ -41,17 +41,17 @@ public class CNodeRow extends CNodeTpl + " public %TMP%() {\n" + " super(RowType.%TYPE%, %CONST_DIM2%, %TB1%, %VECT_MEM%);\n" + " }\n" - + " protected void genexec(double[] a, int ai, SideInput[] b, double[] scalars, double[] c, int len, int rix) { \n" + + " protected void genexec(double[] a, int ai, SideInput[] b, double[] scalars, double[] c, int ci, int len, int rix) { \n" + "%BODY_dense%" + " }\n" - + " protected void genexec(double[] avals, int[] aix, int ai, SideInput[] b, double[] scalars, double[] c, int alen, int len, int rix) { \n" + + " protected void genexec(double[] avals, int[] aix, int ai, SideInput[] b, double[] scalars, double[] c, int ci, int alen, int len, int rix) { \n" + "%BODY_sparse%" - + " }\n" + + " }\n" + "}\n"; private static final String TEMPLATE_ROWAGG_OUT = " c[rix] = %IN%;\n"; private static final String TEMPLATE_FULLAGG_OUT = " c[0] += %IN%;\n"; - private static final String TEMPLATE_NOAGG_OUT = " LibSpoofPrimitives.vectWrite(%IN%, c, rix*%LEN%, %LEN%);\n"; + private static final String TEMPLATE_NOAGG_OUT = " LibSpoofPrimitives.vectWrite(%IN%, c, ci, %LEN%);\n"; public CNodeRow(ArrayList<CNode> inputs, CNode output ) { super(inputs, output); http://git-wip-us.apache.org/repos/asf/systemml/blob/ecfec126/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java index 6e098b9..33d67c1 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java @@ -34,6 +34,7 @@ import org.apache.sysml.runtime.compress.CompressedMatrixBlock; import org.apache.sysml.runtime.instructions.cp.DoubleObject; import org.apache.sysml.runtime.instructions.cp.ScalarObject; import org.apache.sysml.runtime.matrix.data.DenseBlock; +import org.apache.sysml.runtime.matrix.data.DenseBlockFactory; import org.apache.sysml.runtime.matrix.data.LibMatrixMult; import org.apache.sysml.runtime.matrix.data.LibMatrixReorg; import org.apache.sysml.runtime.matrix.data.MatrixBlock; @@ -138,7 +139,7 @@ public abstract class SpoofRowwise extends SpoofOperator getMinColsMatrixSideInputs(inputs) : -1; if( !aggIncr || !out.isAllocated() ) allocateOutputMatrix(m, n, n2, out); - double[] c = out.getDenseBlockValues(); + DenseBlock c = out.getDenseBlock(); final boolean flipOut = _type.isRowTypeB1ColumnAgg() && LibSpoofPrimitives.isFlipOuter(out.getNumRows(), out.getNumColumns()); @@ -215,11 +216,11 @@ public abstract class SpoofRowwise extends SpoofOperator int outLen = out.getNumRows() * out.getNumColumns(); for( int i=0, lb=0; i<blklens.size(); lb+=blklens.get(i), i++ ) tasks.add(new ParColAggTask(a, b, scalars, n, n2, outLen, lb, lb+blklens.get(i))); - List<Future<double[]>> taskret = pool.invokeAll(tasks); + List<Future<DenseBlock>> taskret = pool.invokeAll(tasks); //aggregate partial results int len = _type.isColumnAgg() ? out.getNumRows()*out.getNumColumns() : 1; - for( Future<double[]> task : taskret ) - LibMatrixMult.vectAdd(task.get(), out.getDenseBlockValues(), 0, 0, len); + for( Future<DenseBlock> task : taskret ) + LibMatrixMult.vectAdd(task.get().valuesAt(0), out.getDenseBlockValues(), 0, 0, len); out.recomputeNonZeros(); } else { @@ -287,65 +288,55 @@ public abstract class SpoofRowwise extends SpoofOperator out.setNumColumns(rlen); } - private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, double[] c, int n, int rl, int ru) - { - //TODO handle large dense outputs (potentially misaligned) + private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru) { if( a == null ) return; - SideInput[] lb = createSparseSideInputs(b, true); for( int i=rl; i<ru; i++ ) { - double[] avals = a.values(i); - int aix = a.pos(i); - genexec( avals, aix, lb, scalars, c, n, i ); + genexec(a.values(i), a.pos(i), lb, scalars, + c.values(i), c.pos(i), n, i ); } } - private void executeSparse(SparseBlock sblock, SideInput[] b, double[] scalars, double[] c, int n, int rl, int ru) - { + private void executeSparse(SparseBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru) { SideInput[] lb = createSparseSideInputs(b, true); SparseRow empty = new SparseRowVector(1); for( int i=rl; i<ru; i++ ) { - if( sblock!=null && !sblock.isEmpty(i) ) { - double[] avals = sblock.values(i); - int[] aix = sblock.indexes(i); - int apos = sblock.pos(i); - int alen = sblock.size(i); - + if( a!=null && !a.isEmpty(i) ) { //call generated method - genexec(avals, aix, apos, lb, scalars, c, alen, n, i); + genexec(a.values(i), a.indexes(i), a.pos(i), lb, scalars, + c.values(i), c.pos(i), a.size(i), n, i); } else - genexec(empty.values(), - empty.indexes(), 0, lb, scalars, c, 0, n, i); + genexec(empty.values(), empty.indexes(), 0, lb, scalars, + c.values(i), c.pos(i), 0, n, i); } } - private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, double[] c, int n, int rl, int ru) - { + private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, double[] scalars, DenseBlock c, int n, int rl, int ru) { if( a.isEmptyBlock(false) ) return; - SideInput[] lb = createSparseSideInputs(b, true); Iterator<double[]> iter = a.getDenseRowIterator(rl, ru); for( int i=rl; iter.hasNext(); i++ ) { - genexec(iter.next(), 0, lb, scalars, c, n, i); + genexec(iter.next(), 0, lb, scalars, + c.values(i), c.pos(i), n, i); } } //methods to be implemented by generated operators of type SpoofRowAggrgate protected abstract void genexec(double[] a, int ai, - SideInput[] b, double[] scalars, double[] c, int len, int rowIndex); + SideInput[] b, double[] scalars, double[] c, int ci, int len, int rowIndex); protected abstract void genexec(double[] avals, int[] aix, int ai, - SideInput[] b, double[] scalars, double[] c, int alen, int n, int rowIndex); + SideInput[] b, double[] scalars, double[] c, int ci, int alen, int n, int rowIndex); /** * Task for multi-threaded column aggregation operations. */ - private class ParColAggTask implements Callable<double[]> + private class ParColAggTask implements Callable<DenseBlock> { private final MatrixBlock _a; private final SideInput[] _b; @@ -365,12 +356,12 @@ public abstract class SpoofRowwise extends SpoofOperator } @Override - public double[] call() throws DMLRuntimeException { + public DenseBlock call() throws DMLRuntimeException { //allocate vector intermediates and partial output if( _reqVectMem > 0 ) LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2); - double[] c = new double[_outLen]; + DenseBlock c = DenseBlockFactory.createDenseBlock(1, _outLen); if( _a instanceof CompressedMatrixBlock ) executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _clen, _rl, _ru); @@ -417,11 +408,11 @@ public abstract class SpoofRowwise extends SpoofOperator LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2); if( _a instanceof CompressedMatrixBlock ) - executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c.getDenseBlockValues(), _clen, _rl, _ru); + executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru); else if( !_a.isInSparseFormat() ) - executeDense(_a.getDenseBlock(), _b, _scalars, _c.getDenseBlockValues(), _clen, _rl, _ru); + executeDense(_a.getDenseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru); else - executeSparse(_a.getSparseBlock(), _b, _scalars, _c.getDenseBlockValues(), _clen, _rl, _ru); + executeSparse(_a.getSparseBlock(), _b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru); if( _reqVectMem > 0 ) LibSpoofPrimitives.cleanupThreadLocalMemory();
