Repository: systemml
Updated Branches:
  refs/heads/master b9b273d87 -> 1f0745c5c


[SYSTEMML-2074] Fix codegen row operations over large dense blocks

This patch fixes failing codegen row operations over large (i.e.,
multi-chunk) dense blocks for inputs and outputs. In detail, this
entails a slightly modified api of generated operators and related
implementation changes to also handle scenarios where large dense input
and output blocks are not aligned (e.g., with different ncol).


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ecfec126
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ecfec126
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ecfec126

Branch: refs/heads/master
Commit: ecfec12606c0465a23b2a34b5cc7f3880f5c6d2a
Parents: b9b273d
Author: Matthias Boehm <[email protected]>
Authored: Fri Jan 19 14:36:36 2018 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Fri Jan 19 17:55:07 2018 -0800

----------------------------------------------------------------------
 .../sysml/hops/codegen/cplan/CNodeRow.java      |  8 +--
 .../sysml/runtime/codegen/SpoofRowwise.java     | 59 +++++++++-----------
 2 files changed, 29 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/ecfec126/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java 
b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
index 1a89570..e6aa53a 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeRow.java
@@ -41,17 +41,17 @@ public class CNodeRow extends CNodeTpl
                        + "  public %TMP%() {\n"
                        + "    super(RowType.%TYPE%, %CONST_DIM2%, %TB1%, 
%VECT_MEM%);\n"
                        + "  }\n"
-                       + "  protected void genexec(double[] a, int ai, 
SideInput[] b, double[] scalars, double[] c, int len, int rix) { \n"
+                       + "  protected void genexec(double[] a, int ai, 
SideInput[] b, double[] scalars, double[] c, int ci, int len, int rix) { \n"
                        + "%BODY_dense%"
                        + "  }\n"
-                       + "  protected void genexec(double[] avals, int[] aix, 
int ai, SideInput[] b, double[] scalars, double[] c, int alen, int len, int 
rix) { \n"
+                       + "  protected void genexec(double[] avals, int[] aix, 
int ai, SideInput[] b, double[] scalars, double[] c, int ci, int alen, int len, 
int rix) { \n"
                        + "%BODY_sparse%"
-                       + "  }\n"                       
+                       + "  }\n"
                        + "}\n";
 
        private static final String TEMPLATE_ROWAGG_OUT  = "    c[rix] = 
%IN%;\n";
        private static final String TEMPLATE_FULLAGG_OUT = "    c[0] += 
%IN%;\n";
-       private static final String TEMPLATE_NOAGG_OUT   = "    
LibSpoofPrimitives.vectWrite(%IN%, c, rix*%LEN%, %LEN%);\n";
+       private static final String TEMPLATE_NOAGG_OUT   = "    
LibSpoofPrimitives.vectWrite(%IN%, c, ci, %LEN%);\n";
        
        public CNodeRow(ArrayList<CNode> inputs, CNode output ) {
                super(inputs, output);

http://git-wip-us.apache.org/repos/asf/systemml/blob/ecfec126/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java 
b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
index 6e098b9..33d67c1 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java
@@ -34,6 +34,7 @@ import 
org.apache.sysml.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysml.runtime.instructions.cp.DoubleObject;
 import org.apache.sysml.runtime.instructions.cp.ScalarObject;
 import org.apache.sysml.runtime.matrix.data.DenseBlock;
+import org.apache.sysml.runtime.matrix.data.DenseBlockFactory;
 import org.apache.sysml.runtime.matrix.data.LibMatrixMult;
 import org.apache.sysml.runtime.matrix.data.LibMatrixReorg;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
@@ -138,7 +139,7 @@ public abstract class SpoofRowwise extends SpoofOperator
                        getMinColsMatrixSideInputs(inputs) : -1;
                if( !aggIncr || !out.isAllocated() )
                        allocateOutputMatrix(m, n, n2, out);
-               double[] c = out.getDenseBlockValues();
+               DenseBlock c = out.getDenseBlock();
                final boolean flipOut = _type.isRowTypeB1ColumnAgg()
                        && LibSpoofPrimitives.isFlipOuter(out.getNumRows(), 
out.getNumColumns());
                
@@ -215,11 +216,11 @@ public abstract class SpoofRowwise extends SpoofOperator
                                int outLen = out.getNumRows() * 
out.getNumColumns();
                                for( int i=0, lb=0; i<blklens.size(); 
lb+=blklens.get(i), i++ )
                                        tasks.add(new ParColAggTask(a, b, 
scalars, n, n2, outLen, lb, lb+blklens.get(i)));
-                               List<Future<double[]>> taskret = 
pool.invokeAll(tasks);
+                               List<Future<DenseBlock>> taskret = 
pool.invokeAll(tasks);
                                //aggregate partial results
                                int len = _type.isColumnAgg() ? 
out.getNumRows()*out.getNumColumns() : 1;
-                               for( Future<double[]> task : taskret )
-                                       LibMatrixMult.vectAdd(task.get(), 
out.getDenseBlockValues(), 0, 0, len);
+                               for( Future<DenseBlock> task : taskret )
+                                       
LibMatrixMult.vectAdd(task.get().valuesAt(0), out.getDenseBlockValues(), 0, 0, 
len);
                                out.recomputeNonZeros();
                        }
                        else {
@@ -287,65 +288,55 @@ public abstract class SpoofRowwise extends SpoofOperator
                out.setNumColumns(rlen);
        }
        
-       private void executeDense(DenseBlock a, SideInput[] b, double[] 
scalars, double[] c, int n, int rl, int ru) 
-       {
-               //TODO handle large dense outputs (potentially misaligned)
+       private void executeDense(DenseBlock a, SideInput[] b, double[] 
scalars, DenseBlock c, int n, int rl, int ru) {
                if( a == null )
                        return;
-               
                SideInput[] lb = createSparseSideInputs(b, true);
                for( int i=rl; i<ru; i++ ) {
-                       double[] avals = a.values(i);
-                       int aix = a.pos(i);
-                       genexec( avals, aix, lb, scalars, c, n, i );
+                       genexec(a.values(i), a.pos(i), lb, scalars,
+                               c.values(i), c.pos(i), n, i );
                }
        }
        
-       private void executeSparse(SparseBlock sblock, SideInput[] b, double[] 
scalars, double[] c, int n, int rl, int ru) 
-       {
+       private void executeSparse(SparseBlock a, SideInput[] b, double[] 
scalars, DenseBlock c, int n, int rl, int ru) {
                SideInput[] lb = createSparseSideInputs(b, true);
                SparseRow empty = new SparseRowVector(1);
                for( int i=rl; i<ru; i++ ) {
-                       if( sblock!=null && !sblock.isEmpty(i) ) {
-                               double[] avals = sblock.values(i);
-                               int[] aix = sblock.indexes(i);
-                               int apos = sblock.pos(i);
-                               int alen = sblock.size(i);
-                               
+                       if( a!=null && !a.isEmpty(i) ) {
                                //call generated method
-                               genexec(avals, aix, apos, lb, scalars, c, alen, 
n, i);
+                               genexec(a.values(i), a.indexes(i), a.pos(i), 
lb, scalars,
+                                       c.values(i), c.pos(i), a.size(i), n, i);
                        }
                        else
-                               genexec(empty.values(),
-                                       empty.indexes(), 0, lb, scalars, c, 0, 
n, i);
+                               genexec(empty.values(), empty.indexes(), 0, lb, 
scalars,
+                                       c.values(i), c.pos(i), 0, n, i);
                }
        }
        
-       private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, 
double[] scalars, double[] c, int n, int rl, int ru) 
-       {
+       private void executeCompressed(CompressedMatrixBlock a, SideInput[] b, 
double[] scalars, DenseBlock c, int n, int rl, int ru) {
                if( a.isEmptyBlock(false) )
                        return;
-               
                SideInput[] lb = createSparseSideInputs(b, true);
                Iterator<double[]> iter = a.getDenseRowIterator(rl, ru);
                for( int i=rl; iter.hasNext(); i++ ) {
-                       genexec(iter.next(), 0, lb, scalars, c, n, i);
+                       genexec(iter.next(), 0, lb, scalars,
+                               c.values(i), c.pos(i), n, i);
                }
        }
        
        //methods to be implemented by generated operators of type 
SpoofRowAggrgate 
        
        protected abstract void genexec(double[] a, int ai, 
-               SideInput[] b, double[] scalars, double[] c, int len, int 
rowIndex);
+               SideInput[] b, double[] scalars, double[] c, int ci, int len, 
int rowIndex);
        
        protected abstract void genexec(double[] avals, int[] aix, int ai, 
-               SideInput[] b, double[] scalars, double[] c, int alen, int n, 
int rowIndex);
+               SideInput[] b, double[] scalars, double[] c, int ci, int alen, 
int n, int rowIndex);
 
        
        /**
         * Task for multi-threaded column aggregation operations.
         */
-       private class ParColAggTask implements Callable<double[]> 
+       private class ParColAggTask implements Callable<DenseBlock> 
        {
                private final MatrixBlock _a;
                private final SideInput[] _b;
@@ -365,12 +356,12 @@ public abstract class SpoofRowwise extends SpoofOperator
                }
                
                @Override
-               public double[] call() throws DMLRuntimeException {
+               public DenseBlock call() throws DMLRuntimeException {
                        
                        //allocate vector intermediates and partial output
                        if( _reqVectMem > 0 )
                                
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
-                       double[] c = new double[_outLen];
+                       DenseBlock c = DenseBlockFactory.createDenseBlock(1, 
_outLen);
                        
                        if( _a instanceof CompressedMatrixBlock )
                                executeCompressed((CompressedMatrixBlock)_a, 
_b, _scalars, c, _clen, _rl, _ru);
@@ -417,11 +408,11 @@ public abstract class SpoofRowwise extends SpoofOperator
                                
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
                        
                        if( _a instanceof CompressedMatrixBlock )
-                               executeCompressed((CompressedMatrixBlock)_a, 
_b, _scalars, _c.getDenseBlockValues(), _clen, _rl, _ru);
+                               executeCompressed((CompressedMatrixBlock)_a, 
_b, _scalars, _c.getDenseBlock(), _clen, _rl, _ru);
                        else if( !_a.isInSparseFormat() )
-                               executeDense(_a.getDenseBlock(), _b, _scalars, 
_c.getDenseBlockValues(), _clen, _rl, _ru);
+                               executeDense(_a.getDenseBlock(), _b, _scalars, 
_c.getDenseBlock(), _clen, _rl, _ru);
                        else
-                               executeSparse(_a.getSparseBlock(), _b, 
_scalars, _c.getDenseBlockValues(), _clen, _rl, _ru);
+                               executeSparse(_a.getSparseBlock(), _b, 
_scalars, _c.getDenseBlock(), _clen, _rl, _ru);
                        
                        if( _reqVectMem > 0 )
                                LibSpoofPrimitives.cleanupThreadLocalMemory();

Reply via email to