Repository: systemml Updated Branches: refs/heads/master 131b647b1 -> a918d5778
[SYSTEMML-2046] Large dense blocks in all codegen runtime operations This patch modifies all codegen operations (i.e., cell, magg, row, outer) to support large dense matrix blocks >16GB. In addition, this also fixes an issue with previously modified cumulative aggregates. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/92f3b7fc Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/92f3b7fc Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/92f3b7fc Branch: refs/heads/master Commit: 92f3b7fceff178f275c0586a2ee5d75b40c9c05d Parents: 131b647 Author: Matthias Boehm <[email protected]> Authored: Thu Dec 28 21:40:48 2017 -0800 Committer: Matthias Boehm <[email protected]> Committed: Thu Dec 28 21:40:48 2017 -0800 ---------------------------------------------------------------------- .../sysml/runtime/codegen/CodegenUtils.java | 2 +- .../sysml/runtime/codegen/SpoofCellwise.java | 214 +++++++++++++------ .../runtime/codegen/SpoofMultiAggregate.java | 29 ++- .../sysml/runtime/codegen/SpoofOperator.java | 22 +- .../runtime/codegen/SpoofOuterProduct.java | 185 +++++++++------- .../sysml/runtime/codegen/SpoofRowwise.java | 11 +- .../sysml/runtime/matrix/data/LibMatrixAgg.java | 4 +- .../sysml/runtime/util/DataConverter.java | 33 ++- 8 files changed, 324 insertions(+), 176 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/codegen/CodegenUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/CodegenUtils.java b/src/main/java/org/apache/sysml/runtime/codegen/CodegenUtils.java index 2e84eae..726e267 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/CodegenUtils.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/CodegenUtils.java @@ -170,7 +170,7 @@ public class CodegenUtils public static SideInput createSideInput(MatrixBlock in) { SideInput ret = (in.isInSparseFormat() || !in.isAllocated()) ? new SideInput(null, in, in.getNumColumns()) : - new SideInput(in.getDenseBlockValues(), null, in.getNumColumns()); + new SideInput(in.getDenseBlock(), null, in.getNumColumns()); return (ret.mdat != null) ? new SideInputSparseCell(ret) : ret; } http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java index 5eb3b76..7c2ac35 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java @@ -295,7 +295,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl MatrixBlock out, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double[] c = out.getDenseBlockValues(); + DenseBlock c = out.getDenseBlock(); SideInput[] lb = createSparseSideInputs(b); if( _type == CellType.NO_AGG ) { @@ -419,123 +419,172 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl //core operator skeletons for dense, sparse, and compressed private long executeDenseNoAgg(DenseBlock a, SideInput[] b, double[] scalars, - double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double[] data = (a != null) ? a.valuesAt(0) : null; - long lnnz = 0; - for( int i=rl, ix=rl*n; i<ru; i++ ) - for( int j=0; j<n; j++, ix++ ) { - double aval = (data != null) ? data[ix] : 0; - if( aval != 0 || !sparseSafe) { - c[ix] = genexec( aval, b, scalars, m, n, i, j); - lnnz += (c[ix]!=0) ? 1 : 0; + if( a == null && !sparseSafe ) { + for( int i=rl; i<ru; i++ ) { + double[] cvals = c.values(i); + int cix = c.pos(i); + for( int j=0; j<n; j++ ) + lnnz += ((cvals[cix+j] = genexec(0, b, scalars, m, n, i, j))!=0) ? 1 : 0; + } + } + else if( a != null ) { + for( int i=rl; i<ru; i++ ) { + double[] avals = a.values(i); + double[] cvals = c.values(i); + int ix = a.pos(i); + for( int j=0; j<n; j++ ) { + double aval = avals[ix+j]; + if( aval != 0 || !sparseSafe) + lnnz += ((cvals[ix+j] = genexec(aval, b, scalars, m, n, i, j))!=0) ? 1 : 0; } } + } + return lnnz; } private long executeDenseRowAggSum(DenseBlock a, SideInput[] b, double[] scalars, - double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru) { - double[] data = (a != null) ? a.valuesAt(0) : null; - + //note: output always single block + double[] lc = c.valuesAt(0); KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); + long lnnz = 0; - for( int i=rl, ix=rl*n; i<ru; i++ ) { - kbuff.set(0, 0); - for( int j=0; j<n; j++, ix++ ) { - double aval = (data != null) ? data[ix] : 0; - if( aval != 0 || !sparseSafe) - kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j)); + if( a == null && !sparseSafe ) { + for( int i=rl; i<ru; i++ ) { + kbuff.set(0, 0); + for( int j=0; j<n; j++ ) + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + lnnz += ((lc[i] = kbuff._sum)!=0) ? 1 : 0; + } + } + else if( a != null ) { + for( int i=rl; i<ru; i++ ) { + kbuff.set(0, 0); + double[] avals = a.values(i); + int aix = a.pos(i); + for( int j=0; j<n; j++ ) { + double aval = avals[aix+j]; + if( aval != 0 || !sparseSafe) + kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j)); + } + lnnz += ((lc[i] = kbuff._sum)!=0) ? 1 : 0; } - lnnz += ((c[i] = kbuff._sum)!=0) ? 1 : 0; } + return lnnz; } private long executeDenseRowAggMxx(DenseBlock a, SideInput[] b, double[] scalars, - double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double[] data = (a != null) ? a.valuesAt(0) : null; + double[] lc = c.valuesAt(0); //single block double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; ValueFunction vfun = getAggFunction(); long lnnz = 0; - if( data == null && !sparseSafe ) { //empty + if( a == null && !sparseSafe ) { //empty for( int i=rl; i<ru; i++ ) { double tmp = initialVal; for( int j=0; j<n; j++ ) tmp = vfun.execute(tmp, genexec(0, b, scalars, m, n, i, j)); - lnnz += ((c[i] = tmp)!=0) ? 1 : 0; + lnnz += ((lc[i] = tmp)!=0) ? 1 : 0; } } - else if( data != null ) { //general case - for( int i=rl, ix=rl*n; i<ru; i++ ) { + else if( a != null ) { //general case + for( int i=rl; i<ru; i++ ) { double tmp = initialVal; - for( int j=0; j<n; j++, ix++ ) - if( data[ix] != 0 || !sparseSafe) - tmp = vfun.execute(tmp, genexec(data[ix], b, scalars, m, n, i, j)); - if( sparseSafe && UtilFunctions.containsZero(data, ix-n, n) ) + double[] avals = a.values(i); + int aix = a.pos(i); + for( int j=0; j<n; j++ ) { + double aval = avals[aix + j]; + if( aval != 0 || !sparseSafe) + tmp = vfun.execute(tmp, genexec(aval, b, scalars, m, n, i, j)); + } + if( sparseSafe && UtilFunctions.containsZero(avals, aix, n) ) tmp = vfun.execute(tmp, 0); - lnnz += ((c[i] = tmp)!=0) ? 1 : 0; + lnnz += ((lc[i] = tmp)!=0) ? 1 : 0; } } return lnnz; } private long executeDenseColAggSum(DenseBlock a, SideInput[] b, double[] scalars, - double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru) { - double[] data = (a != null) ? a.valuesAt(0) : null; + double[] lc = c.valuesAt(0); //single block KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); double[] corr = new double[n]; - for( int i=rl, ix=rl*n; i<ru; i++ ) - for( int j=0; j<n; j++, ix++ ) { - double aval = (data != null) ? data[ix] : 0; - if( aval != 0 || !sparseSafe) { - kbuff.set(c[j], corr[j]); - kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j)); - c[j] = kbuff._sum; + if( a == null && !sparseSafe ) { + for( int i=rl; i<ru; i++ ) + for( int j=0; j<n; j++ ) { + kbuff.set(lc[j], corr[j]); + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + lc[j] = kbuff._sum; corr[j] = kbuff._correction; } + } + else if( a != null ) { + for( int i=rl; i<ru; i++ ) { + double[] avals = a.values(i); + int aix = a.pos(i); + for( int j=0; j<n; j++ ) { + double aval = avals[aix + j]; + if( aval != 0 || !sparseSafe ) { + kbuff.set(lc[j], corr[j]); + kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j)); + lc[j] = kbuff._sum; + corr[j] = kbuff._correction; + } + } } + } + return -1; } private long executeDenseColAggMxx(DenseBlock a, SideInput[] b, double[] scalars, - double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + DenseBlock c, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double[] data = (a != null) ? a.valuesAt(0) : null; + double[] lc = c.valuesAt(0); //single block double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; ValueFunction vfun = getAggFunction(); - Arrays.fill(c, initialVal); + Arrays.fill(lc, initialVal); - if( data == null && !sparseSafe ) { //empty + if( a == null && !sparseSafe ) { //empty for( int i=rl; i<ru; i++ ) for( int j=0; j<n; j++ ) - c[j] = vfun.execute(c[j], genexec(0, b, scalars, m, n, i, j)); + lc[j] = vfun.execute(lc[j], genexec(0, b, scalars, m, n, i, j)); } - else if( data != null ) { //general case + else if( a != null ) { //general case int[] counts = new int[n]; - for( int i=rl, ix=rl*n; i<ru; i++ ) - for( int j=0; j<n; j++, ix++ ) - if( data[ix] != 0 || !sparseSafe) { - c[j] = vfun.execute(c[j], genexec(data[ix], b, scalars, m, n, i, j)); + for( int i=rl; i<ru; i++ ) { + double[] avals = a.values(i); + int aix = a.pos(i); + for( int j=0; j<n; j++ ) { + double aval = avals[aix + j]; + if( aval != 0 || !sparseSafe ) { + lc[j] = vfun.execute(lc[j], genexec(aval, b, scalars, m, n, i, j)); counts[j] ++; } + } + } if( sparseSafe ) for(int j=0; j<n; j++) if( counts[j] != ru-rl ) - c[j] = vfun.execute(c[j], 0); + lc[j] = vfun.execute(lc[j], 0); } return -1; } @@ -544,17 +593,26 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double[] data = (a != null) ? a.valuesAt(0) : null; - KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); - for( int i=rl, ix=rl*n; i<ru; i++ ) - for( int j=0; j<n; j++, ix++ ) { - double aval = (data != null) ? data[ix] : 0; - if( aval != 0 || !sparseSafe) - kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j)); + if( a == null && !sparseSafe ) { + for( int i=rl; i<ru; i++ ) + for( int j=0; j<n; j++ ) + kplus.execute2(kbuff, genexec(0, b, scalars, m, n, i, j)); + } + else if( a != null ) { + for( int i=rl; i<ru; i++ ) { + double[] avals = a.values(i); + int aix = a.pos(i); + for( int j=0; j<n; j++ ) { + double aval = avals[aix + j]; + if( aval != 0 || !sparseSafe) + kplus.execute2(kbuff, genexec(aval, b, scalars, m, n, i, j)); + } } + } + return kbuff._sum; } @@ -562,19 +620,28 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double[] data = (a != null) ? a.valuesAt(0) : null; - //safe aggregation for min/max w/ handling of zero entries //note: sparse safe with zero value as min/max handled outside double ret = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; ValueFunction vfun = getAggFunction(); - for( int i=rl, ix=rl*n; i<ru; i++ ) - for( int j=0; j<n; j++, ix++ ) { - double aval = (data != null) ? data[ix] : 0; - if( aval != 0 || !sparseSafe) - ret = vfun.execute(ret, genexec(aval, b, scalars, m, n, i, j)); + if( a == null && !sparseSafe ) { + for( int i=rl; i<ru; i++ ) + for( int j=0; j<n; j++ ) + ret = vfun.execute(ret, genexec(0, b, scalars, m, n, i, j)); + } + else if( a != null ) { + for( int i=rl; i<ru; i++ ) { + double[] avals = a.values(i); + int aix = a.pos(i); + for( int j=0; j<n; j++ ) { + double aval = avals[aix + j]; + if( aval != 0 || !sparseSafe) + ret = vfun.execute(ret, genexec(aval, b, scalars, m, n, i, j)); + } } + } + return ret; } @@ -621,9 +688,9 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl { //note: sequential scan algorithm for both sparse-safe and -unsafe //in order to avoid binary search for sparse-unsafe - double[] c = out.getDenseBlockValues(); + DenseBlock c = out.getDenseBlock(); long lnnz = 0; - for(int i=rl, cix=rl*n; i<ru; i++, cix+=n) { + for(int i=rl; i<ru; i++) { int lastj = -1; //handle non-empty rows if( sblock != null && !sblock.isEmpty(i) ) { @@ -631,20 +698,25 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl int alen = sblock.size(i); int[] aix = sblock.indexes(i); double[] avals = sblock.values(i); + double[] cvals = c.values(i); + int cix = c.pos(i); for(int k=apos; k<apos+alen; k++) { //process zeros before current non-zero if( !sparseSafe ) for(int j=lastj+1; j<aix[k]; j++) - lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0; + lnnz += ((cvals[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0; //process current non-zero lastj = aix[k]; - lnnz += ((c[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0; + lnnz += ((cvals[cix+lastj]=genexec(avals[k], b, scalars, m, n, i, lastj))!=0)?1:0; } } //process empty rows or remaining zeros if( !sparseSafe ) - for(int j=lastj+1; j<n; j++) - lnnz += ((c[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0; + for(int j=lastj+1; j<n; j++) { + double[] cvals = c.values(i); + int cix = c.pos(i); + lnnz += ((cvals[cix+j]=genexec(0, b, scalars, m, n, i, j))!=0)?1:0; + } } return lnnz; } http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java index c141b2d..85c894a 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofMultiAggregate.java @@ -40,6 +40,7 @@ import org.apache.sysml.runtime.functionobjects.KahanPlusSq; import org.apache.sysml.runtime.functionobjects.ValueFunction; import org.apache.sysml.runtime.instructions.cp.KahanObject; import org.apache.sysml.runtime.instructions.cp.ScalarObject; +import org.apache.sysml.runtime.matrix.data.DenseBlock; import org.apache.sysml.runtime.matrix.data.IJV; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.SparseBlock; @@ -94,7 +95,7 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria //result allocation and preparations out.reset(1, _aggOps.length, false); out.allocateDenseBlock(); - double[] c = out.getDenseBlockValues(); + double[] c = out.getDenseBlockValues(); //1x<num_agg> setInitialOutputValues(c); //input preparation @@ -109,8 +110,8 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria if( inputs.get(0) instanceof CompressedMatrixBlock ) executeCompressed((CompressedMatrixBlock)inputs.get(0), b, scalars, c, m, n, 0, m); else if( !inputs.get(0).isInSparseFormat() ) - executeDense(inputs.get(0).getDenseBlockValues(), b, scalars, c, m, n, 0, m); - else + executeDense(inputs.get(0).getDenseBlock(), b, scalars, c, m, n, sparseSafe, 0, m); + else executeSparse(inputs.get(0).getSparseBlock(), b, scalars, c, m, n, sparseSafe, 0, m); } else //MULTI-THREADED @@ -144,15 +145,23 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria return out; } - private void executeDense(double[] a, SideInput[] b, double[] scalars, double[] c, int m, int n, int rl, int ru) throws DMLRuntimeException + private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) + throws DMLRuntimeException { SideInput[] lb = createSparseSideInputs(b); //core dense aggregation operation - for( int i=rl, ix=rl*n; i<ru; i++ ) { - for( int j=0; j<n; j++, ix++ ) { - double in = (a != null) ? a[ix] : 0; - genexec( in, lb, scalars, c, m, n, i, j ); + if( a == null && !sparseSafe ) { + for( int i=rl; i<ru; i++ ) + for( int j=0; j<n; j++ ) + genexec( 0, lb, scalars, c, m, n, i, j ); + } + else if( a != null ) { + for( int i=rl; i<ru; i++ ) { + double[] avals = a.values(i); + int aix = a.pos(i); + for( int j=0; j<n; j++ ) + genexec( avals[aix+j], lb, scalars, c, m, n, i, j ); } } } @@ -307,8 +316,8 @@ public abstract class SpoofMultiAggregate extends SpoofOperator implements Seria if( _a instanceof CompressedMatrixBlock ) executeCompressed((CompressedMatrixBlock)_a, _b, _scalars, c, _rlen, _clen, _rl, _ru); else if( !_a.isInSparseFormat() ) - executeDense(_a.getDenseBlockValues(), _b, _scalars, c, _rlen, _clen, _rl, _ru); - else + executeDense(_a.getDenseBlock(), _b, _scalars, c, _rlen, _clen, _safe, _rl, _ru); + else executeSparse(_a.getSparseBlock(), _b, _scalars, c, _rlen, _clen, _safe, _rl, _ru); return c; } http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java index 2267a47..af0a22b 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOperator.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.compress.CompressedMatrixBlock; import org.apache.sysml.runtime.instructions.cp.ScalarObject; +import org.apache.sysml.runtime.matrix.data.DenseBlock; import org.apache.sysml.runtime.matrix.data.LibMatrixReorg; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.SparseBlock; @@ -102,7 +103,7 @@ public abstract class SpoofOperator implements Serializable if( in.getNumColumns()==1 && in.isEmptyBlock(false) ) //dense empty b[i-offset] = new SideInput(null, null, clen); else { - b[i-offset] = new SideInput(DataConverter.convertToDoubleVector(in), null, clen); + b[i-offset] = new SideInput(DataConverter.convertToDenseBlock(in, false), null, clen); LOG.warn(getClass().getName()+": Converted "+in.getNumRows()+"x"+in.getNumColumns()+ ", nnz="+in.getNonZeros()+" sideways input matrix from sparse to dense."); } @@ -111,8 +112,7 @@ public abstract class SpoofOperator implements Serializable b[i-offset] = new SideInput(null, in, clen); } else { - b[i-offset] = new SideInput( - in.getDenseBlockValues(), null, clen); + b[i-offset] = new SideInput(in.getDenseBlock(), null, clen); } } @@ -142,8 +142,8 @@ public abstract class SpoofOperator implements Serializable return ret; } - public static double[][] getDenseMatrices(SideInput[] inputs) { - double[][] ret = new double[inputs.length][]; + public static DenseBlock[] getDenseMatrices(SideInput[] inputs) { + DenseBlock[] ret = new DenseBlock[inputs.length]; for( int i=0; i<inputs.length; i++ ) ret[i] = inputs[i].ddat; return ret; @@ -204,7 +204,7 @@ public abstract class SpoofOperator implements Serializable protected static double getValue(SideInput data, int rowIndex) { //note: wrapper sideinput guaranteed to exist - return (data.ddat!=null) ? data.ddat[rowIndex] : + return (data.ddat!=null) ? data.ddat.valuesAt(0)[rowIndex] : (data.mdat!=null) ? data.mdat.quickGetValue(rowIndex, 0) : 0; } @@ -216,7 +216,7 @@ public abstract class SpoofOperator implements Serializable protected static double getValue(SideInput data, int n, int rowIndex, int colIndex) { //note: wrapper sideinput guaranteed to exist - return (data.ddat!=null) ? data.ddat[rowIndex*n+colIndex] : + return (data.ddat!=null) ? data.ddat.get(rowIndex, colIndex) : (data instanceof SideInputSparseCell) ? ((SideInputSparseCell)data).next(rowIndex, colIndex) : (data.mdat!=null) ? data.mdat.quickGetValue(rowIndex, colIndex) : 0; @@ -235,19 +235,19 @@ public abstract class SpoofOperator implements Serializable } public static class SideInput { - public final double[] ddat; + public final DenseBlock ddat; public final MatrixBlock mdat; public final int clen; - public SideInput(double[] ddata, MatrixBlock mdata, int clength) { + public SideInput(DenseBlock ddata, MatrixBlock mdata, int clength) { ddat = ddata; mdat = mdata; clen = clength; } public int pos(int r) { - return r * clen; + return (ddat!=null) ? ddat.pos(r) : r * clen; } public double[] values(int r) { - return ddat; + return (ddat!=null) ? ddat.values(r) : null; } public double getValue(int r, int c) { return SpoofOperator.getValue(this, clen, r, c); http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java index 6919655..e6e93a8 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofOuterProduct.java @@ -33,6 +33,7 @@ import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.compress.CompressedMatrixBlock; import org.apache.sysml.runtime.instructions.cp.DoubleObject; import org.apache.sysml.runtime.instructions.cp.ScalarObject; +import org.apache.sysml.runtime.matrix.data.DenseBlock; import org.apache.sysml.runtime.matrix.data.IJV; import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.SparseBlock; @@ -81,7 +82,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator return new DoubleObject(0); //input preparation - double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false)); + DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false)); SideInput[] b = prepInputMatrices(inputs, 3, false); double[] scalars = prepInputScalars(scalarObjects); @@ -97,7 +98,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator if( a instanceof CompressedMatrixBlock ) executeCellwiseCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, 0, n); else if( !a.isInSparseFormat() ) - executeCellwiseDense(a.getDenseBlockValues(), ab[0], ab[1], b, scalars, out.getDenseBlockValues(), m, n, k, _outerProductType, 0, m, 0, n); + executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n); else executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n); return new DoubleObject(out.getDenseBlock().get(0, 0)); @@ -117,7 +118,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator return execute(inputs, scalarObjects); //sequential //input preparation - double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false)); + DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false)); SideInput[] b = prepInputMatrices(inputs, 3, false); double[] scalars = prepInputScalars(scalarObjects); @@ -185,7 +186,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator out.allocateBlock(); //input preparation - double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false)); + DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false)); SideInput[] b = prepInputMatrices(inputs, 3, false); double[] scalars = prepInputScalars(scalarObjects); @@ -200,19 +201,19 @@ public abstract class SpoofOuterProduct extends SpoofOperator case LEFT_OUTER_PRODUCT: case RIGHT_OUTER_PRODUCT: if( a instanceof CompressedMatrixBlock ) - executeCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, out.getDenseBlockValues(), + executeCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, ((CompressedMatrixBlock)a).getNumColGroups()); else if( !a.isInSparseFormat() ) - executeDense(a.getDenseBlockValues(), ab[0], ab[1], b, scalars, out.getDenseBlockValues(), m, n, k, _outerProductType, 0, m, 0, n); + executeDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n); else - executeSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlockValues(), m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n); + executeSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n); break; case CELLWISE_OUTER_PRODUCT: if( a instanceof CompressedMatrixBlock ) executeCellwiseCompressed((CompressedMatrixBlock)a, ab[0], ab[1], b, scalars, out, m, n, k, _outerProductType, 0, m, 0, n); else if( !a.isInSparseFormat() ) - executeCellwiseDense(a.getDenseBlockValues(), ab[0], ab[1], b, scalars, out.getDenseBlockValues(), m, n, k, _outerProductType, 0, m, 0, n); + executeCellwiseDense(a.getDenseBlock(), ab[0], ab[1], b, scalars, out.getDenseBlock(), m, n, k, _outerProductType, 0, m, 0, n); else executeCellwiseSparse(a.getSparseBlock(), ab[0], ab[1], b, scalars, out, m, n, k, a.getNonZeros(), _outerProductType, 0, m, 0, n); break; @@ -267,7 +268,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator return execute(inputs, scalarObjects, out); //sequential //input preparation - double[][] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false)); + DenseBlock[] ab = getDenseMatrices(prepInputMatrices(inputs, 1, 2, true, false)); SideInput[] b = prepInputMatrices(inputs, 3, false); double[] scalars = prepInputScalars(scalarObjects); @@ -338,8 +339,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator return UtilFunctions.roundToNext(base, k); } - private void executeDense(double[] a, double[] u, double[] v, SideInput[] b, double[] scalars, - double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) + private void executeDense(DenseBlock a, DenseBlock u, DenseBlock v, SideInput[] b, double[] scalars, + DenseBlock c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) { //approach: iterate over non-zeros of w, selective mm computation //cache-conscious blocking: due to blocksize constraint (default 1000), @@ -357,17 +358,23 @@ public abstract class SpoofOuterProduct extends SpoofOperator int bjmin = Math.min(cu, bj+blocksizeIJ); //core computation - for( int i=bi, ix=bi*n, uix=bi*k; i<bimin; i++, ix+=n, uix+=k ) - for( int j=bj, vix=bj*k; j<bjmin; j++, vix+=k) - if( a[ix+j] != 0 ) { + for( int i=bi; i<bimin; i++ ) { + double[] avals = a.values(i); + double[] uvals = u.values(i); + int aix = a.pos(i), uix = u.pos(i); + for( int j=bj; j<bjmin; j++) + if( avals[aix+j] != 0 ) { + int vix = v.pos(j); cix = (type == OutProdType.LEFT_OUTER_PRODUCT) ? vix : uix; - genexecDense( a[ix+j], u, uix, v, vix, b, scalars, c, cix, m, n, k, i, j); + genexecDense( avals[aix+j], uvals, uix, v.values(j), vix, + b, scalars, c.values(j), cix, m, n, k, i, j); } + } } } - private void executeCellwiseDense(double[] a, double[] u, double[] v, SideInput[] b, double[] scalars, - double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) + private void executeCellwiseDense(DenseBlock a, DenseBlock u, DenseBlock v, SideInput[] b, double[] scalars, + DenseBlock c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) { //approach: iterate over non-zeros of w, selective mm computation //cache-conscious blocking: due to blocksize constraint (default 1000), @@ -385,21 +392,32 @@ public abstract class SpoofOuterProduct extends SpoofOperator int bjmin = Math.min(cu, bj+blocksizeIJ); //core computation - for( int i=bi, ix=bi*n, uix=bi*k; i<bimin; i++, ix+=n, uix+=k ) - for( int j=bj, vix=bj*k; j<bjmin; j++, vix+=k) - if( a[ix+j] != 0 ) { - if(type == OutProdType.CELLWISE_OUTER_PRODUCT) - c[ix+j] = genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j ); - else - sum += genexecCellwise( a[ix+j], u, uix, v, vix, b, scalars, m, n, k, i, j); - } + for( int i=bi; i<bimin; i++ ) { + double[] avals = a.values(i); + double[] uvals = u.values(i); + int aix = a.pos(i), uix = u.pos(i); + if(type == OutProdType.CELLWISE_OUTER_PRODUCT) { + double[] cvals = c.values(i); + for( int j=bj; j<bjmin; j++) + if( avals[aix+j] != 0 ) + cvals[aix+j] = genexecCellwise( avals[aix+j], uvals, uix, + v.values(j), v.pos(j), b, scalars, m, n, k, i, j ); + } + else { + for( int j=bj; j<bjmin; j++) + if( avals[aix+j] != 0 ) + sum += genexecCellwise( avals[aix+j], uvals, uix, + v.values(j), v.pos(j), b, scalars, m, n, k, i, j); + + } + } } if( type != OutProdType.CELLWISE_OUTER_PRODUCT ) - c[0] = sum; + c.set(0, 0, sum); } - private void executeSparse(SparseBlock sblock, double[] u, double[] v, SideInput[] b, double[] scalars, - double[] c, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu) + private void executeSparse(SparseBlock sblock, DenseBlock u, DenseBlock v, SideInput[] b, double[] scalars, + DenseBlock c, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu) { boolean left = (_outerProductType== OutProdType.LEFT_OUTER_PRODUCT); @@ -414,21 +432,24 @@ public abstract class SpoofOuterProduct extends SpoofOperator //for ultra-sparse matrices, we do not allocate the index array because //its allocation and maintenance can dominate the total runtime. SideInput[] lb = createSparseSideInputs(b); - + //core wdivmm block matrix mult - for( int i=rl, uix=rl*k; i<ru; i++, uix+=k ) { + for( int i=rl; i<ru; i++ ) { if( sblock.isEmpty(i) ) continue; int wpos = sblock.pos(i); int wlen = sblock.size(i); int[] wix = sblock.indexes(i); - double[] wval = sblock.values(i); + double[] wvals = sblock.values(i); + double[] uvals = u.values(i); + int uix = u.pos(i); int index = (cl==0||sblock.isEmpty(i)) ? 0 : sblock.posFIndexGTE(i,cl); index = wpos + ((index>=0) ? index : n); for( ; index<wpos+wlen && wix[index]<cu; index++ ) { - genexecDense(wval[index], u, uix, v, wix[index]*k, lb, scalars, c, - (left ? wix[index]*k : uix), m, n, k, i, wix[index]); + int jix = wix[index]; + genexecDense(wvals[index], uvals, uix, v.values(jix), v.pos(jix), lb, scalars, + c.values(jix), (left ? v.pos(jix) : uix), m, n, k, i, wix[index]); } } } @@ -454,18 +475,21 @@ public abstract class SpoofOuterProduct extends SpoofOperator { int bjmin = Math.min(cu, bj+blocksizeJ); //core wdivmm block matrix mult - for( int i=bi, uix=bi*k; i<bimin; i++, uix+=k ) { + for( int i=bi; i<bimin; i++ ) { if( sblock.isEmpty(i) ) continue; int wpos = sblock.pos(i); int wlen = sblock.size(i); int[] wix = sblock.indexes(i); - double[] wval = sblock.values(i); + double[] wvals = sblock.values(i); + double[] uvals = u.values(i); + int uix = u.pos(i); int index = wpos + curk[i-bi]; for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) { - genexecDense(wval[index], u, uix, v, wix[index]*k, b, scalars, c, - (left ? wix[index]*k : uix), m, n, k, i, wix[index]); + int jix = wix[index]; + genexecDense(wvals[index], uvals, uix, v.values(jix), v.pos(jix), b, scalars, + c.values(jix), (left ? wix[index]*k : uix), m, n, k, i, wix[index]); } curk[i-bi] = index - wpos; } @@ -474,7 +498,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator } } - private void executeCellwiseSparse(SparseBlock sblock, double[] u, double[] v, SideInput[] b, double[] scalars, + private void executeCellwiseSparse(SparseBlock sblock, DenseBlock u, DenseBlock v, SideInput[] b, double[] scalars, MatrixBlock out, int m, int n, int k, long nnz, OutProdType type, int rl, int ru, int cl, int cu ) { //NOTE: we don't create sparse side inputs w/ row-major cursors because @@ -485,7 +509,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator if( !out.isInSparseFormat() ) //DENSE { - double[] c = out.getDenseBlockValues(); + DenseBlock c = out.getDenseBlock(); double tmp = 0; for( int bi=rl; bi<ru; bi+=blocksizeIJ ) { int bimin = Math.min(ru, bi+blocksizeIJ); @@ -494,27 +518,34 @@ public abstract class SpoofOuterProduct extends SpoofOperator //blocked execution over column blocks for( int bj=0; bj<n; bj+=blocksizeIJ ) { int bjmin = Math.min(n, bj+blocksizeIJ); - for( int i=bi, uix=bi*k; i<bimin; i++, uix+=k ) { + for( int i=bi; i<bimin; i++ ) { if( sblock.isEmpty(i) ) continue; int wpos = sblock.pos(i); int wlen = sblock.size(i); int[] wix = sblock.indexes(i); - double[] wval = sblock.values(i); + double[] wvals = sblock.values(i); + double[] cvals = c.values(i); + double[] uvals = u.values(i); + int uix = u.pos(i); int index = wpos + curk[i-bi]; if( type == OutProdType.CELLWISE_OUTER_PRODUCT ) - for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) - c[wix[index]] = genexecCellwise( wval[index], - u, uix, v, wix[index]*k, b, scalars, m, n, k, i, wix[index] ); + for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) { + int jix = wix[index]; + cvals[jix] = genexecCellwise( wvals[index], + uvals, uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index] ); + } else - for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) - tmp += genexecCellwise( wval[index], - u, uix, v, wix[index]*k, b, scalars, m, n, k, i, wix[index]); + for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) { + int jix = wix[index]; + tmp += genexecCellwise( wvals[index], + uvals, uix, v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index]); + } curk[i-bi] = index - wpos; } } } if( type != OutProdType.CELLWISE_OUTER_PRODUCT ) - c[0] = tmp; + c.set(0, 0, tmp); } else //SPARSE { @@ -526,16 +557,19 @@ public abstract class SpoofOuterProduct extends SpoofOperator //blocked execution over column blocks for( int bj=0; bj<n; bj+=blocksizeIJ ) { int bjmin = Math.min(n, bj+blocksizeIJ); - for( int i=bi, uix=bi*k; i<bimin; i++, uix+=k ) { + for( int i=bi; i<bimin; i++ ) { if( sblock.isEmpty(i) ) continue; int wpos = sblock.pos(i); int wlen = sblock.size(i); int[] wix = sblock.indexes(i); double[] wval = sblock.values(i); + double[] uvals = u.values(i); + int uix = u.pos(i); int index = wpos + curk[i-bi]; for( ; index<wpos+wlen && wix[index]<bjmin; index++ ) { - c.append(i, wix[index], genexecCellwise( wval[index], u, uix, v, - wix[index]*k, b, scalars, m, n, k, i, wix[index] )); + int jix = wix[index]; + c.append(i, wix[index], genexecCellwise( wval[index], uvals, uix, + v.values(jix), v.pos(jix), b, scalars, m, n, k, i, wix[index] )); } curk[i-bi] = index - wpos; } @@ -544,8 +578,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator } } - private void executeCompressed(CompressedMatrixBlock a, double[] u, double[] v, SideInput[] b, double[] scalars, - double[] c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu) + private void executeCompressed(CompressedMatrixBlock a, DenseBlock u, DenseBlock v, SideInput[] b, double[] scalars, + DenseBlock c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu) { //NOTE: we don't create sparse side inputs w/ row-major cursors because //compressed data is access in a column-major order @@ -556,14 +590,17 @@ public abstract class SpoofOuterProduct extends SpoofOperator a.getIterator(rl, ru, cl, cu, false); //cl/cu -> colgroups while( iter.hasNext() ) { IJV cell = iter.next(); - int uix = cell.getI() * k; - int vix = cell.getJ() * k; - genexecDense(cell.getV(), u, uix, v, vix, b, scalars, c, + double[] uvals = u.values(cell.getI()); + double[] vvals = v.values(cell.getJ()); + double[] cvals = c.values(left?cell.getJ():cell.getI()); + int uix = u.pos(cell.getI()); + int vix = v.pos(cell.getJ()); + genexecDense(cell.getV(), uvals, uix, vvals, vix, b, scalars, cvals, left ? vix : uix, m, n, k, cell.getI(), cell.getJ()); } } - private void executeCellwiseCompressed(CompressedMatrixBlock a, double[] u, double[] v, SideInput[] b, double[] scalars, + private void executeCellwiseCompressed(CompressedMatrixBlock a, DenseBlock u, DenseBlock v, SideInput[] b, double[] scalars, MatrixBlock out, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) { //NOTE: we don't create sparse side inputs w/ row-major cursors because @@ -576,21 +613,23 @@ public abstract class SpoofOuterProduct extends SpoofOperator Iterator<IJV> iter = a.getIterator(rl, ru, false); while( iter.hasNext() ) { IJV cell = iter.next(); - int uix = cell.getI() * k; - int vix = cell.getJ() * k; + double[] uvals = u.values(cell.getI()); + double[] vvals = v.values(cell.getJ()); + int uix = u.pos(cell.getI()); + int vix = v.pos(cell.getJ()); if( type == OutProdType.CELLWISE_OUTER_PRODUCT ) { if( out.isInSparseFormat() ) { csblock.allocate(cell.getI()); csblock.append(cell.getI(), cell.getJ(), - genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ())); + genexecCellwise(cell.getV(), uvals, uix, vvals, vix, b, scalars, m, n, k, cell.getI(), cell.getJ())); } else { c[cell.getI()*n+cell.getJ()] = - genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ()); + genexecCellwise(cell.getV(), uvals, uix, vvals, vix, b, scalars, m, n, k, cell.getI(), cell.getJ()); } } else { - c[0] += genexecCellwise(cell.getV(), u, uix, v, vix, b, scalars, m, n, k, cell.getI(), cell.getJ()); + c[0] += genexecCellwise(cell.getV(), uvals, uix, vvals, vix, b, scalars, m, n, k, cell.getI(), cell.getJ()); } } } @@ -604,8 +643,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator private class ParExecTask implements Callable<Long> { private final MatrixBlock _a; - private final double[] _u; - private final double[] _v; + private final DenseBlock _u; + private final DenseBlock _v; private final SideInput[] _b; private final double[] _scalars; private final MatrixBlock _c; @@ -618,7 +657,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator private final int _cl; private final int _cu; - protected ParExecTask( MatrixBlock a, double[] u, double[] v, SideInput[] b, double[] scalars , MatrixBlock c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) { + protected ParExecTask( MatrixBlock a, DenseBlock u, DenseBlock v, SideInput[] b, double[] scalars , MatrixBlock c, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) { _a = a; _u = u; _v = v; @@ -642,17 +681,17 @@ public abstract class SpoofOuterProduct extends SpoofOperator case LEFT_OUTER_PRODUCT: case RIGHT_OUTER_PRODUCT: if( _a instanceof CompressedMatrixBlock ) - executeCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, _c.getDenseBlockValues(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); + executeCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, _c.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); else if( !_a.isInSparseFormat() ) - executeDense(_a.getDenseBlockValues(), _u, _v, _b, _scalars, _c.getDenseBlockValues(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); + executeDense(_a.getDenseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); else - executeSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c.getDenseBlockValues(), _rlen, _clen, _k, _a.getNonZeros(), _type, _rl, _ru, _cl, _cu); + executeSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), _rlen, _clen, _k, _a.getNonZeros(), _type, _rl, _ru, _cl, _cu); break; case CELLWISE_OUTER_PRODUCT: if( _a instanceof CompressedMatrixBlock ) executeCellwiseCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, _c, _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); else if( !_c.isInSparseFormat() ) - executeCellwiseDense(_a.getDenseBlockValues(), _u, _v, _b, _scalars, _c.getDenseBlockValues(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); + executeCellwiseDense(_a.getDenseBlock(), _u, _v, _b, _scalars, _c.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); else executeCellwiseSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, _c, _rlen, _clen, _k, _a.getNonZeros(), _type, _rl, _ru, _cl, _cu); break; @@ -671,8 +710,8 @@ public abstract class SpoofOuterProduct extends SpoofOperator private class ParOuterProdAggTask implements Callable<Double> { private final MatrixBlock _a; - private final double[] _u; - private final double[] _v; + private final DenseBlock _u; + private final DenseBlock _v; private final SideInput[] _b; private final double[] _scalars; private final int _rlen; @@ -684,7 +723,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator private final int _cl; private final int _cu; - protected ParOuterProdAggTask( MatrixBlock a, double[] u, double[] v, SideInput[] b, double[] scalars, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) { + protected ParOuterProdAggTask( MatrixBlock a, DenseBlock u, DenseBlock v, SideInput[] b, double[] scalars, int m, int n, int k, OutProdType type, int rl, int ru, int cl, int cu ) { _a = a; _u = u; _v = v; @@ -707,7 +746,7 @@ public abstract class SpoofOuterProduct extends SpoofOperator if( _a instanceof CompressedMatrixBlock ) executeCellwiseCompressed((CompressedMatrixBlock)_a, _u, _v, _b, _scalars, out, _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); else if( !_a.isInSparseFormat() ) - executeCellwiseDense(_a.getDenseBlockValues(), _u, _v, _b, _scalars, out.getDenseBlockValues(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); + executeCellwiseDense(_a.getDenseBlock(), _u, _v, _b, _scalars, out.getDenseBlock(), _rlen, _clen, _k, _type, _rl, _ru, _cl, _cu); else executeCellwiseSparse(_a.getSparseBlock(), _u, _v, _b, _scalars, out, _rlen, _clen, _k, _a.getNonZeros(), _type, _rl, _ru, _cl, _cu); return out.quickGetValue(0, 0); http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java index 39dd2ed..6e098b9 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java @@ -289,14 +289,15 @@ public abstract class SpoofRowwise extends SpoofOperator private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, double[] c, int n, int rl, int ru) { - double[] data = (a != null) ? a.valuesAt(0) : null; - if( data == null ) + //TODO handle large dense outputs (potentially misaligned) + if( a == null ) return; SideInput[] lb = createSparseSideInputs(b, true); - for( int i=rl, aix=rl*n; i<ru; i++, aix+=n ) { - //call generated method - genexec( data, aix, lb, scalars, c, n, i ); + for( int i=rl; i<ru; i++ ) { + double[] avals = a.values(i); + int aix = a.pos(i); + genexec( avals, aix, lb, scalars, c, n, i ); } } http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java index c371c5a..3602534 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java @@ -285,7 +285,7 @@ public class LibMatrixAgg //filter empty input blocks (incl special handling for sparse-unsafe operations) if( in.isEmptyBlock(false) ){ return aggregateUnaryMatrixEmpty(in, out, aggtype, null); - } + } //allocate output arrays (if required) out.reset(m2, n2, false); //always dense @@ -1499,7 +1499,7 @@ public class LibMatrixAgg final int n = in.clen; DenseBlock da = in.getDenseBlock(); - DenseBlock dc = in.getDenseBlock(); + DenseBlock dc = out.getDenseBlock(); double[] a = in.getDenseBlockValues(); double[] c = out.getDenseBlockValues(); http://git-wip-us.apache.org/repos/asf/systemml/blob/92f3b7fc/src/main/java/org/apache/sysml/runtime/util/DataConverter.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java index fdc7af0..5c3ad25 100644 --- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java +++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java @@ -40,6 +40,8 @@ import org.apache.sysml.runtime.io.MatrixWriterFactory; import org.apache.sysml.runtime.io.ReadProperties; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.data.CTableMap; +import org.apache.sysml.runtime.matrix.data.DenseBlock; +import org.apache.sysml.runtime.matrix.data.DenseBlockFactory; import org.apache.sysml.runtime.matrix.data.FileFormatProperties; import org.apache.sysml.runtime.matrix.data.FrameBlock; import org.apache.sysml.runtime.matrix.data.IJV; @@ -321,8 +323,34 @@ public class DataConverter } return ret; } + + public static DenseBlock convertToDenseBlock(MatrixBlock mb) { + return convertToDenseBlock(mb, true); + } + + public static DenseBlock convertToDenseBlock(MatrixBlock mb, boolean deep) { + int rows = mb.getNumRows(); + int cols = mb.getNumColumns(); + DenseBlock ret = (!mb.isInSparseFormat() && mb.isAllocated() && !deep) ? + mb.getDenseBlock() : DenseBlockFactory.createDenseBlock(rows, cols); //0-initialized + + if( !mb.isEmptyBlock(false) ) { + if( mb.isInSparseFormat() ) { + Iterator<IJV> iter = mb.getSparseBlockIterator(); + while( iter.hasNext() ) { + IJV cell = iter.next(); + ret.set(cell.getI(), cols+cell.getJ(), cell.getV()); + } + } + else if( deep ) { + ret.set(mb.getDenseBlock()); + } + } + + return ret; + } - public static double[] convertToDoubleVector( MatrixBlock mb ) { + public static double[] convertToDoubleVector(MatrixBlock mb) { return convertToDoubleVector(mb, true); } @@ -333,8 +361,7 @@ public class DataConverter double[] ret = (!mb.isInSparseFormat() && mb.isAllocated() && !deep) ? mb.getDenseBlockValues() : new double[rows*cols]; //0-initialized - if( !mb.isEmptyBlock(false) ) - { + if( !mb.isEmptyBlock(false) ) { if( mb.isInSparseFormat() ) { Iterator<IJV> iter = mb.getSparseBlockIterator(); while( iter.hasNext() ) {
