Repository: systemml Updated Branches: refs/heads/master 7b56f6131 -> 0c3a46ba1
[SYSTEMML-2046] Large dense blocks in left/right indexing operations This patch modifies all left/right indexing operations involving dense blocks to properly handle the new dense block abstraction with potentially large blocks. Since left indexing relies on the copy primitives, this modifications also applies to all other code paths that leverage these primitives, such as all readers from HDFS and RDDs. Additionally, this patch also includes (1) extensions to the dense block abstraction, (2) performance improvements for block fill operations, (3) a fix for fill operations on large blocks, (4) fixes for full aggregates over large blocks, (5) a fix for the selection of large sparse update inplace blocks, and (6) a cleanup of misleading statistics maintenance in leftindexing operations. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/0c3a46ba Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/0c3a46ba Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/0c3a46ba Branch: refs/heads/master Commit: 0c3a46ba12d50c220ec29fd5b301f2f23b463aee Parents: 7b56f61 Author: Matthias Boehm <[email protected]> Authored: Fri Dec 29 22:49:46 2017 -0800 Committer: Matthias Boehm <[email protected]> Committed: Fri Dec 29 22:49:46 2017 -0800 ---------------------------------------------------------------------- .../cp/MatrixIndexingCPInstruction.java | 13 +- .../sysml/runtime/matrix/data/DenseBlock.java | 13 + .../runtime/matrix/data/DenseBlockDRB.java | 19 +- .../runtime/matrix/data/DenseBlockLDRB.java | 24 +- .../sysml/runtime/matrix/data/LibMatrixAgg.java | 12 +- .../sysml/runtime/matrix/data/MatrixBlock.java | 340 ++++++++----------- 6 files changed, 201 insertions(+), 220 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/0c3a46ba/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixIndexingCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixIndexingCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixIndexingCPInstruction.java index 4a636e1..73f7f72 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixIndexingCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/MatrixIndexingCPInstruction.java @@ -85,8 +85,7 @@ public final class MatrixIndexingCPInstruction extends IndexingCPInstruction { else if ( opcode.equalsIgnoreCase(LeftIndex.OPCODE)) { UpdateType updateType = mo.getUpdateType(); - if(DMLScript.STATISTICS) - { + if(DMLScript.STATISTICS) { if( updateType.isInPlace() ) Statistics.incrementTotalLixUIP(); Statistics.incrementTotalLix(); @@ -95,19 +94,17 @@ public final class MatrixIndexingCPInstruction extends IndexingCPInstruction { MatrixBlock matBlock = ec.getMatrixInput(input1.getName(), getExtendedOpcode()); MatrixBlock resultBlock = null; - if(input2.getDataType() == DataType.MATRIX) //MATRIX<-MATRIX - { + if(input2.getDataType() == DataType.MATRIX) { //MATRIX<-MATRIX MatrixBlock rhsMatBlock = ec.getMatrixInput(input2.getName(), getExtendedOpcode()); - resultBlock = matBlock.leftIndexingOperations(rhsMatBlock, ixrange, new MatrixBlock(), updateType, getExtendedOpcode()); + resultBlock = matBlock.leftIndexingOperations(rhsMatBlock, ixrange, new MatrixBlock(), updateType); ec.releaseMatrixInput(input2.getName(), getExtendedOpcode()); } - else //MATRIX<-SCALAR - { + else { //MATRIX<-SCALAR if(!ixrange.isScalar()) throw new DMLRuntimeException("Invalid index range of scalar leftindexing: "+ixrange.toString()+"." ); ScalarObject scalar = ec.getScalarInput(input2.getName(), ValueType.DOUBLE, input2.isLiteral()); resultBlock = (MatrixBlock) matBlock.leftIndexingOperations(scalar, - (int)ixrange.rowStart, (int)ixrange.colStart, new MatrixBlock(), updateType); + (int)ixrange.rowStart, (int)ixrange.colStart, new MatrixBlock(), updateType); } //unpin lhs input http://git-wip-us.apache.org/repos/asf/systemml/blob/0c3a46ba/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java index cf3f6db..e2177e2 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java @@ -238,6 +238,19 @@ public abstract class DenseBlock implements Serializable public abstract void set(DenseBlock db); /** + * Copy the given dense block into the specified + * index range. + * + * @param rl row lower index + * @param ru row upper index (exclusive) + * @param cl column lower index + * @param cu column upper index (exclusive) + * @param db dense block + */ + public abstract void set(int rl, int ru, int cl, int cu, DenseBlock db); + + + /** * Copy the given kahan object sum and correction. * * @param kbuff kahan object http://git-wip-us.apache.org/repos/asf/systemml/blob/0c3a46ba/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java index ae526eb..f1520c0 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java @@ -162,8 +162,11 @@ public class DenseBlockDRB extends DenseBlock @Override public void set(int rl, int ru, int cl, int cu, double v) { - for(int i=rl, ix=rl*clen; i<ru; i++, ix+=clen) - Arrays.fill(data, ix+cl, ix+cu, v); + if( cl==0 && cu == clen ) + Arrays.fill(data, rl*clen, ru*clen, v); + else + for(int i=rl, ix=rl*clen; i<ru; i++, ix+=clen) + Arrays.fill(data, ix+cl, ix+cu, v); } @Override @@ -175,6 +178,18 @@ public class DenseBlockDRB extends DenseBlock public void set(DenseBlock db) { System.arraycopy(db.valuesAt(0), 0, data, 0, rlen*clen); } + + @Override + public void set(int rl, int ru, int cl, int cu, DenseBlock db) { + double[] a = db.valuesAt(0); + if( cl == 0 && cu == clen) + System.arraycopy(a, 0, data, rl*clen+cl, (int)db.size()); + else { + int len = cu - cl; + for(int i=rl, ix1=0, ix2=rl*clen+cl; i<ru; i++, ix1+=len, ix2+=clen) + System.arraycopy(a, ix1, data, ix2, len); + } + } @Override public void set(int r, double[] v) { http://git-wip-us.apache.org/repos/asf/systemml/blob/0c3a46ba/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java index cd25516..ff3357b 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java @@ -177,11 +177,17 @@ public class DenseBlockLDRB extends DenseBlock @Override public void set(int rl, int ru, int cl, int cu, double v) { - for(int bi=index(rl); bi<=index(ru); bi++) { - int imin = pos(bi); - int imax = Math.min(ru, (bi+1)*blen)-bi; - for(int i=imin, ix=imin*clen; i<imax; i++, ix+=clen) - Arrays.fill(data, ix+cl, ix+cu, v); + boolean rowBlock = (cl == 0 && cu == clen); + final int bil = index(rl); + final int biu = index(ru-1); + for(int bi=bil; bi<=biu; bi++) { + int lpos = (bi==bil) ? pos(rl) : 0; + int len = (bi==biu) ? pos(ru-1)-lpos+clen : blockSize(bi)*clen; + if( rowBlock ) + Arrays.fill(data[bi], lpos, lpos+len, v); + else + for(int i=lpos; i<lpos+len; i+=clen) + Arrays.fill(data[bi], i+cl, i+cu, v); } } @@ -200,6 +206,14 @@ public class DenseBlockLDRB extends DenseBlock for(int bi=0; bi<numBlocks(); bi++) System.arraycopy(db.valuesAt(bi), 0, data[bi], 0, size(bi)); } + + @Override + public void set(int rl, int ru, int cl, int cu, DenseBlock db) { + for(int i=rl; i<ru; i++) { + System.arraycopy(db.values(i-rl), + db.pos(i-rl), values(i), pos(i, cl), cu-cl); + } + } @Override public double get(int r, int c) { http://git-wip-us.apache.org/repos/asf/systemml/blob/0c3a46ba/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java index 3602534..b56397e 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixAgg.java @@ -1639,7 +1639,7 @@ public class LibMatrixAgg final int biu = a.index(ru-1); for(int bi=bil; bi<=biu; bi++) { int lpos = (bi==bil) ? a.pos(rl) : 0; - int len = (bi==biu) ? a.pos(ru-1)-lpos+n : a.blockSize(bi); + int len = (bi==biu) ? a.pos(ru-1)-lpos+n : a.blockSize(bi)*n; sum(a.valuesAt(bi), lpos, len, kbuff, kplus); } c.set(kbuff); @@ -1701,7 +1701,7 @@ public class LibMatrixAgg final int biu = a.index(ru-1); for(int bi=bil; bi<=biu; bi++) { int lpos = (bi==bil) ? a.pos(rl) : 0; - int len = (bi==biu) ? a.pos(ru-1)-lpos+n : a.blockSize(bi); + int len = (bi==biu) ? a.pos(ru-1)-lpos+n : a.blockSize(bi)*n; sum(a.valuesAt(bi), lpos, len, kbuff, kplusSq); } c.set(kbuff); @@ -1859,7 +1859,7 @@ public class LibMatrixAgg final int biu = a.index(ru-1); for(int bi=bil; bi<=biu; bi++) { int lpos = (bi==bil) ? a.pos(rl) : 0; - int len = (bi==biu) ? a.pos(ru-1)-lpos+n : a.blockSize(bi); + int len = (bi==biu) ? a.pos(ru-1)-lpos+n : a.blockSize(bi)*n; tmp = builtin(a.valuesAt(bi), lpos, tmp, len, builtin); } c.set(0, 0, tmp); @@ -1957,7 +1957,7 @@ public class LibMatrixAgg int tlen = 0; for(int bi=bil; bi<=biu; bi++) { int lpos = (bi==bil) ? a.pos(rl) : 0; - int len = (bi==biu) ? a.pos(ru-1)-lpos+n : a.blockSize(bi); + int len = (bi==biu) ? a.pos(ru-1)-lpos+n : a.blockSize(bi)*n; mean(a.valuesAt(bi), lpos, len, 0, kbuff, kmean); tlen += len; } @@ -2027,7 +2027,7 @@ public class LibMatrixAgg final int biu = a.index(ru-1); for(int bi=bil; bi<=biu; bi++) { int lpos = (bi==bil) ? a.pos(rl) : 0; - int len = (bi==biu) ? a.pos(ru-1)-lpos+n : a.blockSize(bi); + int len = (bi==biu) ? a.pos(ru-1)-lpos+n : a.blockSize(bi)*n; var(a.valuesAt(bi), lpos, len, cbuff, cm); } // store results: { var | mean, count, m2 correction, mean correction } @@ -2109,7 +2109,7 @@ public class LibMatrixAgg double tmp = 1; for(int bi=bil; bi<=biu; bi++) { int lpos = (bi==bil) ? a.pos(rl) : 0; - int len = (bi==biu) ? a.pos(ru-1)-lpos+n : a.blockSize(bi); + int len = (bi==biu) ? a.pos(ru-1)-lpos+n : a.blockSize(bi)*n; tmp *= product( a.valuesAt(bi), lpos, len ); } c.set(0, 0, tmp); http://git-wip-us.apache.org/repos/asf/systemml/blob/0c3a46ba/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index caf024e..cbeef5e 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -35,7 +35,6 @@ import java.util.stream.LongStream; import org.apache.commons.math3.random.Well1024a; import org.apache.hadoop.io.DataInputBuffer; -import org.apache.sysml.api.DMLScript; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.hops.OptimizerUtils; import org.apache.sysml.lops.MMTSJ.MMTSJType; @@ -61,7 +60,6 @@ import org.apache.sysml.runtime.functionobjects.RevIndex; import org.apache.sysml.runtime.functionobjects.SortIndex; import org.apache.sysml.runtime.functionobjects.SwapIndex; import org.apache.sysml.runtime.instructions.cp.CM_COV_Object; -import org.apache.sysml.runtime.instructions.cp.CPInstruction; import org.apache.sysml.runtime.instructions.cp.KahanObject; import org.apache.sysml.runtime.instructions.cp.ScalarObject; import org.apache.sysml.runtime.io.IOUtilFunctions; @@ -87,11 +85,9 @@ import org.apache.sysml.runtime.util.FastBufferedDataInputStream; import org.apache.sysml.runtime.util.FastBufferedDataOutputStream; import org.apache.sysml.runtime.util.IndexRange; import org.apache.sysml.runtime.util.UtilFunctions; -import org.apache.sysml.utils.GPUStatistics; import org.apache.sysml.utils.NativeHelper; - public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizable { private static final long serialVersionUID = 7319972089143154056L; @@ -1320,19 +1316,17 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab return this; } - private void copySparseToSparse(MatrixBlock that) - { + private void copySparseToSparse(MatrixBlock that) { this.nonZeros=that.nonZeros; if( that.isEmptyBlock(false) ) { resetSparse(); return; } - + allocateSparseRowsBlock(false); - for(int i=0; i<Math.min(that.sparseBlock.numRows(), rlen); i++) - { + for(int i=0; i<Math.min(that.sparseBlock.numRows(), rlen); i++) { if(!that.sparseBlock.isEmpty(i)) { - sparseBlock.set(i, that.sparseBlock.get(i), true); + sparseBlock.set(i, that.sparseBlock.get(i), true); } else if(!this.sparseBlock.isEmpty(i)) { this.sparseBlock.reset(i,estimatedNNzsPerRow, clen); @@ -1340,8 +1334,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab } } - private void copyDenseToDense(MatrixBlock that) - { + private void copyDenseToDense(MatrixBlock that) { nonZeros = that.nonZeros; //plain reset to 0 for empty input @@ -1351,15 +1344,12 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab return; } - //allocate and init dense block (w/o overwriting nnz) + //allocate and copy dense block allocateDenseBlock(false); - - //actual copy - System.arraycopy(that.getDenseBlockValues(), 0, getDenseBlockValues(), 0, rlen*clen); + denseBlock.set(that.denseBlock); } - private void copySparseToDense(MatrixBlock that) - { + private void copySparseToDense(MatrixBlock that) { this.nonZeros=that.nonZeros; if( that.isEmptyBlock(false) ) { if(denseBlock!=null) @@ -1369,41 +1359,41 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab //allocate and init dense block (w/o overwriting nnz) allocateDenseBlock(false); - - int start=0; - double[] c = getDenseBlockValues(); - for(int r=0; r<Math.min(that.sparseBlock.numRows(), rlen); r++, start+=clen) { - if(that.sparseBlock.isEmpty(r)) - continue; - int pos = that.sparseBlock.pos(r); - int len = that.sparseBlock.size(r); - int[] aix = that.sparseBlock.indexes(r); - double[] avals = that.sparseBlock.values(r); - for(int i=pos; i<pos+len; i++) - c[start+aix[i]]=avals[i]; + SparseBlock a = that.getSparseBlock(); + DenseBlock c = getDenseBlock(); + for( int i=0; i<Math.min(a.numRows(), rlen); i++ ) { + if( a.isEmpty(i) ) continue; + int pos = a.pos(i); + int len = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + double[] cvals = c.values(i); + int cix = c.pos(i); + for( int j=pos; j<pos+len; j++ ) + cvals[cix+aix[j]] = avals[j]; } } - private void copyDenseToSparse(MatrixBlock that) - { + private void copyDenseToSparse(MatrixBlock that) { nonZeros = that.nonZeros; if( that.isEmptyBlock(false) ) { resetSparse(); return; } - allocateSparseRowsBlock(false); - - double[] b = that.getDenseBlockValues(); - for(int i=0, ix=0; i<rlen; i++) { - sparseBlock.reset(i, estimatedNNzsPerRow, clen); - for(int j=0; j<clen; j++) { - double val = b[ix++]; - if( val != 0 ) { - //create sparse row only if required - sparseBlock.allocate(i, estimatedNNzsPerRow, clen); - sparseBlock.append(i, j, val); - } + if( !allocateSparseRowsBlock(false) ) + resetSparse(); + DenseBlock a = that.getDenseBlock(); + SparseBlock c = getSparseBlock(); + for(int i=0; i<rlen; i++) { + double[] avals = a.values(i); + int aix = a.pos(i); + for( int j=0; j<clen; j++ ) { + double val = avals[aix+j]; + if( val == 0 ) continue; + //create sparse row only if required + c.allocate(i, estimatedNNzsPerRow, clen); + c.append(i, j, val); } } } @@ -1440,13 +1430,12 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab } private void copySparseToSparse(int rl, int ru, int cl, int cu, MatrixBlock src, boolean awareDestNZ) - { + { //handle empty src and dest - if( src.isEmptyBlock(false) ) - { + if( src.isEmptyBlock(false) ) { if( awareDestNZ && sparseBlock != null ) copyEmptyToSparse(rl, ru, cl, cu, true); - return; + return; } if(sparseBlock==null) @@ -1468,14 +1457,12 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab int apos = a.pos(i); int alen = a.size(i); int[] aix = a.indexes(i); - double[] avals = a.values(i); + double[] avals = a.values(i); - if( b.isEmpty(rl+i) ) - { + if( b.isEmpty(rl+i) ) { b.allocate(rl+i, estimatedNNzsPerRow, clen); for( int j=apos; j<apos+alen; j++ ) b.append(rl+i, cl+aix[j], avals[j]); - if( awareDestNZ ) nonZeros += b.size(rl+i); } @@ -1492,13 +1479,13 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab b.set(rl+i, cl+aix[j], avals[j]); } nonZeros += (b.size(rl+i) - lnnz); - } + } else //general case (w/o awareness NNZ) - { - //TODO perf sparse row + { + //TODO perf sparse row for( int j=apos; j<apos+alen; j++ ) b.set(rl+i, cl+aix[j], avals[j]); - } + } } } } @@ -1510,30 +1497,31 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab if( src.isEmptyBlock(false) ) { if( awareDestNZ && denseBlock != null ) { nonZeros -= recomputeNonZeros(rl, ru, cl, cu); - copyEmptyToDense(rl, ru, cl, cu); + denseBlock.set(rl, ru+1, cl, cu+1, 0); } - return; + return; } if(denseBlock==null) allocateDenseBlock(); else if( awareDestNZ ) { nonZeros -= recomputeNonZeros(rl, ru, cl, cu); - copyEmptyToDense(rl, ru, cl, cu); + denseBlock.set(rl, ru+1, cl, cu+1, 0); } - + //copy values SparseBlock a = src.sparseBlock; - double[] c = getDenseBlockValues(); - for( int i=0, ix=rl*clen; i<src.rlen; i++, ix+=clen ) { + DenseBlock c = getDenseBlock(); + for( int i=0; i<src.rlen; i++ ) { if( a.isEmpty(i) ) continue; int apos = a.pos(i); int alen = a.size(i); int[] aix = a.indexes(i); double[] avals = a.values(i); + double[] cvals = c.values(rl+i); + int cix = c.pos(rl+i, cl); for( int j=apos; j<apos+alen; j++ ) - c[ix+cl+aix[j]] = avals[j]; - if(awareDestNZ) - nonZeros += alen; + cvals[cix+aix[j]] = avals[j]; + nonZeros += awareDestNZ ? alen : 0; } } @@ -1551,22 +1539,24 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab allocateSparseRowsBlock(false); //copy values - SparseBlock c = sparseBlock; - double[] a = src.getDenseBlockValues(); - for( int i=0, ix=0; i<src.rlen; i++, ix+=src.clen ) + DenseBlock a = src.getDenseBlock(); + SparseBlock c = getSparseBlock(); + for( int i=0; i<src.rlen; i++ ) { int rix = rl + i; + double[] avals = a.values(i); + int aix = a.pos(i); if( c instanceof SparseBlockMCSR && c.isEmpty(rix) ) //special case MCSR append { //count nnz per row (fits likely in L1 cache) - int lnnz = UtilFunctions.computeNnz(a, ix, src.clen); + int lnnz = UtilFunctions.computeNnz(avals, aix, src.clen); //allocate row once and copy values if( lnnz > 0 ) { c.allocate(rix, lnnz); for( int j=0; j<src.clen; j++ ) { - double val = a[ix+j]; + double val = avals[aix+j]; if( val != 0 ) c.append(rix, cl+j, val); } @@ -1574,22 +1564,20 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab nonZeros += lnnz; } } - else if( awareDestNZ ) //general case (w/ awareness NNZ) - { + else if( awareDestNZ ) { //general case (w/ awareness NNZ) int lnnz = c.size(rix); if( cl==cu ) { - double val = a[ix]; + double val = avals[aix]; c.set(rix, cl, val); } else { - c.setIndexRange(rix, cl, cu+1, a, ix, src.clen); + c.setIndexRange(rix, cl, cu+1, avals, aix, src.clen); } nonZeros += (c.size(rix) - lnnz); - } - else //general case (w/o awareness NNZ) - { + } + else { //general case (w/o awareness NNZ) for( int j=0; j<src.clen; j++ ) { - double val = a[ix+j]; + double val = avals[aix+j]; if( val != 0 ) c.set(rix, cl+j, val); } @@ -1604,7 +1592,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab if( src.isEmptyBlock(false) ) { if( awareDestNZ && denseBlock != null ) { nonZeros -= recomputeNonZeros(rl, ru, cl, cu); - copyEmptyToDense(rl, ru, cl, cu); + denseBlock.set(rl, ru+1, cl, cu+1, 0); } return; } @@ -1617,23 +1605,14 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab nonZeros = nonZeros - recomputeNonZeros(rl, ru, cl, cu) + src.nonZeros; //copy values - double[] a = src.getDenseBlockValues(); - double[] c = getDenseBlockValues(); - int rowLen = cu-cl+1; - if(clen == src.clen) //optimization for equal width - System.arraycopy(a, 0, c, rl*clen+cl, src.rlen*src.clen); - else - for( int i=0, ix1=0, ix2=rl*clen+cl; i<src.rlen; i++, ix1+=src.clen, ix2+=clen ) { - System.arraycopy(a, ix1, c, ix2, rowLen); - } + DenseBlock a = src.getDenseBlock(); + DenseBlock c = getDenseBlock(); + c.set(rl, ru+1, cl, cu+1, a); } - private void copyEmptyToSparse(int rl, int ru, int cl, int cu, boolean updateNNZ ) - { + private void copyEmptyToSparse(int rl, int ru, int cl, int cu, boolean updateNNZ ) { SparseBlock a = sparseBlock; - - if( cl==cu ) //specific case: column vector - { + if( cl==cu ) { //specific case: column vector for( int i=rl; i<=ru; i++ ) if( !a.isEmpty(i) ) { boolean update = a.set(i, cl, 0); @@ -1641,8 +1620,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab nonZeros -= update ? 1 : 0; } } - else - { + else { for( int i=rl; i<=ru; i++ ) if( !a.isEmpty(i) ) { int lnnz = a.size(i); @@ -1652,20 +1630,8 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab } } } - - private void copyEmptyToDense(int rl, int ru, int cl, int cu) { - int rowLen = cu-cl+1; - double[] a = getDenseBlockValues(); - if(clen == rowLen) //optimization for equal width - Arrays.fill(a, rl*clen+cl, ru*clen+cu+1, 0); - else - for( int i=rl, ix2=rl*clen+cl; i<=ru; i++, ix2+=clen ) - Arrays.fill(a, ix2, ix2+rowLen, 0); - } - public void merge(CacheBlock that, boolean appendOnly) - throws DMLRuntimeException - { + public void merge(CacheBlock that, boolean appendOnly) throws DMLRuntimeException { merge((MatrixBlock)that, appendOnly); } @@ -2540,8 +2506,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab return est; } - private boolean estimateSparsityOnSlice(int selectRlen, int selectClen, int finalRlen, int finalClen) - { + private boolean estimateSparsityOnSlice(int selectRlen, int selectClen, int finalRlen, int finalClen) { long ennz = (long)((double)nonZeros/rlen/clen*selectRlen*selectClen); return evalSparseFormatInMemory(finalRlen, finalClen, ennz); } @@ -2554,6 +2519,12 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab return evalSparseFormatInMemory(rlenm1, clenm1, ennz); } + private boolean requiresInplaceSparseBlockOnLeftIndexing(boolean sparse, UpdateType update, long nnz) { + return sparse && update != UpdateType.INPLACE_PINNED + && !isShallowSerialize() && (nnz <= Integer.MAX_VALUE + || DEFAULT_INPLACE_SPARSEBLOCK==SparseBlock.Type.MCSR); + } + private static boolean estimateSparsityOnGroupedAgg( long rlen, long groups ) { long ennz = Math.min(groups, rlen); return evalSparseFormatInMemory(groups, 1, ennz); @@ -3521,28 +3492,18 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab LibMatrixMult.matrixMultPermute(this, m2, ret1, ret2, k); else LibMatrixMult.matrixMultPermute(this, m2, ret1, ret2); - } - public final MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, IndexRange ixrange, MatrixBlock ret, UpdateType update) throws DMLRuntimeException { - return leftIndexingOperations(rhsMatrix, ixrange, ret, update, null); - } - - public final MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, IndexRange ixrange, MatrixBlock ret, UpdateType update, String opcode) - throws DMLRuntimeException + public final MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, + IndexRange ixrange, MatrixBlock ret, UpdateType update) + throws DMLRuntimeException { - return leftIndexingOperations( - rhsMatrix, (int)ixrange.rowStart, (int)ixrange.rowEnd, - (int)ixrange.colStart, (int)ixrange.colEnd, ret, update, opcode); - } - - public MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, int rl, int ru, - int cl, int cu, MatrixBlock ret, UpdateType update) throws DMLRuntimeException { - return leftIndexingOperations(rhsMatrix, rl, ru, cl, cu, ret, update, null); + return leftIndexingOperations(rhsMatrix, (int)ixrange.rowStart, + (int)ixrange.rowEnd, (int)ixrange.colStart, (int)ixrange.colEnd, ret, update); } - - public MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, int rl, int ru, - int cl, int cu, MatrixBlock ret, UpdateType update, String opcode) + + public MatrixBlock leftIndexingOperations(MatrixBlock rhsMatrix, + int rl, int ru, int cl, int cu, MatrixBlock ret, UpdateType update) throws DMLRuntimeException { // Check the validity of bounds @@ -3560,22 +3521,19 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab } MatrixBlock result = ret; - boolean sp = estimateSparsityOnLeftIndexing(rlen, clen, nonZeros, + boolean sp = estimateSparsityOnLeftIndexing(rlen, clen, nonZeros, rhsMatrix.getNumRows(), rhsMatrix.getNumColumns(), rhsMatrix.getNonZeros()); - if( !update.isInPlace() ) //general case - { + if( !update.isInPlace() ) { //general case if(result==null) result=new MatrixBlock(rlen, clen, sp); else result.reset(rlen, clen, sp); result.copy(this, sp); } - else //update in-place - { + else { //update in-place //use current block as in-place result result = this; - //ensure that the current block adheres to the sparsity estimate //and thus implicitly the memory budget used by the compiler if( result.sparse && !sp ) @@ -3584,10 +3542,9 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab result.denseToSparse(); //ensure right sparse block representation to prevent serialization - if( result.sparse && update != UpdateType.INPLACE_PINNED ) { + if( requiresInplaceSparseBlockOnLeftIndexing(result.sparse, update, result.nonZeros+rhsMatrix.nonZeros) ) result.sparseBlock = SparseBlockFactory.copySparseBlock( - DEFAULT_INPLACE_SPARSEBLOCK, result.sparseBlock, false); - } + DEFAULT_INPLACE_SPARSEBLOCK, result.sparseBlock, false); } //NOTE conceptually we could directly use a zeroout and copy(..., false) but @@ -3602,33 +3559,25 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab result.quickSetValue(rl, cl, src.quickGetValue(0, 0)); } else { //general case - long t1 = opcode != null && DMLScript.STATISTICS && DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0; - boolean isCSRCopy = false; //handle csr sparse blocks separately to avoid repeated shifting on column-wise access if( !result.isEmptyBlock(false) && result.sparse && result.sparseBlock instanceof SparseBlockCSR ) { SparseBlockCSR sblock = (SparseBlockCSR) result.sparseBlock; - if( src.sparse ) + if( src.sparse || src.isEmptyBlock(false) ) sblock.setIndexRange(rl, ru+1, cl, cu+1, src.getSparseBlock()); else { //dense - double[] data = (src.getDenseBlock()!=null) ? - src.getDenseBlockValues() : null; - sblock.setIndexRange(rl, ru+1, cl, cu+1, data, - 0, src.getNumRows()*src.getNumColumns()); + for(int bi=0; bi<src.denseBlock.numBlocks(); bi++) { + int rpos = bi * src.denseBlock.blockSize(); + int blen = src.denseBlock.blockSize(bi); + sblock.setIndexRange(rl+rpos, rl+rpos+blen, cl, cu+1, + src.denseBlock.valuesAt(bi), 0, src.rlen*src.clen); + } } result.nonZeros = sblock.size(); - isCSRCopy = true; } //copy submatrix into result else { result.copy(rl, ru, cl, cu, src, true); } - if(opcode != null && DMLScript.STATISTICS && DMLScript.FINEGRAINED_STATISTICS) { - long t2 = System.nanoTime(); - if(isCSRCopy) - GPUStatistics.maintainCPMiscTimes(opcode, CPInstruction.MISC_TIMER_CSR_LIX_COPY, t2-t1); - else - GPUStatistics.maintainCPMiscTimes(opcode, CPInstruction.MISC_TIMER_LIX_COPY, t2-t1); - } } return result; @@ -3649,36 +3598,33 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab * @return matrix block * @throws DMLRuntimeException if DMLRuntimeException occurs */ - public MatrixBlock leftIndexingOperations(ScalarObject scalar, int rl, int cl, MatrixBlock ret, UpdateType update) + public MatrixBlock leftIndexingOperations(ScalarObject scalar, int rl, int cl, MatrixBlock ret, UpdateType update) throws DMLRuntimeException { double inVal = scalar.getDoubleValue(); boolean sp = estimateSparsityOnLeftIndexing(rlen, clen, nonZeros, 1, 1, (inVal!=0)?1:0); - if( !update.isInPlace() ) //general case - { + if( !update.isInPlace() ) { //general case if(ret==null) ret=new MatrixBlock(rlen, clen, sp); else ret.reset(rlen, clen, sp); ret.copy(this, sp); - } - else //update in-place - { + else { //update in-place //use current block as in-place result ret = this; - //ensure right sparse block representation to prevent serialization - if( ret.sparse && update != UpdateType.INPLACE_PINNED ) { + if( requiresInplaceSparseBlockOnLeftIndexing(ret.sparse, update, ret.nonZeros+1) ) ret.sparseBlock = SparseBlockFactory.copySparseBlock( - DEFAULT_INPLACE_SPARSEBLOCK, ret.sparseBlock, false); - } + DEFAULT_INPLACE_SPARSEBLOCK, ret.sparseBlock, false); } ret.quickSetValue(rl, cl, inVal); return ret; } + + public MatrixBlock sliceOperations(IndexRange ixrange, MatrixBlock ret) throws DMLRuntimeException { return sliceOperations( @@ -3727,7 +3673,10 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab // actual slice operation if( rl==0 && ru==rlen-1 && cl==0 && cu==clen-1 ) { // copy if entire matrix required - result.copy( this ); + if( deep ) + result.copy( this ); + else + result = this; } else //general case { @@ -3750,7 +3699,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab if( cl==cu ) //COLUMN VECTOR { - //note: always dense dest + //note: always dense single-block dest dest.allocateDenseBlock(); double[] c = dest.getDenseBlockValues(); for( int i=rl; i<=ru; i++ ) { @@ -3772,18 +3721,18 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab } else //general case (sparse/dense dest) { - for(int i=rl; i <= ru; i++) - if( !sparseBlock.isEmpty(i)) - { - int apos = sparseBlock.pos(i); - int alen = sparseBlock.size(i); - int[] aix = sparseBlock.indexes(i); - double[] avals = sparseBlock.values(i); - int astart = (cl>0)?sparseBlock.posFIndexGTE(i, cl) : 0; - if( astart != -1 ) - for( int j=apos+astart; j<apos+alen && aix[j] <= cu; j++ ) - dest.appendValue(i-rl, aix[j]-cl, avals[j]); - } + SparseBlock sblock = sparseBlock; + for(int i=rl; i <= ru; i++) { + if( sblock.isEmpty(i) ) continue; + int apos = sblock.pos(i); + int alen = sblock.size(i); + int[] aix = sblock.indexes(i); + double[] avals = sblock.values(i); + int astart = (cl>0)?sblock.posFIndexGTE(i, cl) : 0; + if( astart != -1 ) + for( int j=apos+astart; j<apos+alen && aix[j] <= cu; j++ ) + dest.appendValue(i-rl, aix[j]-cl, avals[j]); + } } } @@ -3796,32 +3745,25 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab dest.allocateDenseBlock(); //indexing operation - if( cl==cu ) //COLUMN INDEXING - { - if( clen==1 ) //vector -> vector - { - System.arraycopy(getDenseBlockValues(), rl, dest.getDenseBlockValues(), 0, ru-rl+1); + if( cl==cu ) { //COLUMN INDEXING + //note: output always single block + if( clen==1 ) { //vector -> vector + System.arraycopy(getDenseBlockValues(), rl, + dest.getDenseBlockValues(), 0, ru-rl+1); } - else //matrix -> vector - { - //IBM JVM bug (JDK7) causes crash for certain cl/cu values (e.g., divide by zero for 4) - //for( int i=rl*clen+cl, ix=0; i<=ru*clen+cu; i+=clen, ix++ ) - // dest.denseBlock[ix] = denseBlock[i]; - int len = clen; - double[] a = getDenseBlockValues(); + else { //matrix -> vector + DenseBlock a = getDenseBlock(); double[] c = dest.getDenseBlockValues(); - for( int i=rl*len+cl, ix=0; i<=ru*len+cu; i+=len, ix++ ) - c[ix] = a[i]; + for( int i=rl; i<=ru; i++ ) + c[i] = a.get(i, cl); } } - else // GENERAL RANGE INDEXING - { - int len1 = clen; - int len2 = dest.clen; - double[] a = getDenseBlockValues(); - double[] c = dest.getDenseBlockValues(); - for(int i = rl, ix1 = rl*len1+cl, ix2=0; i <= ru; i++, ix1+=len1, ix2+=len2) - System.arraycopy(a, ix1, c, ix2, len2); + else { // GENERAL RANGE INDEXING + DenseBlock a = getDenseBlock(); + DenseBlock c = dest.getDenseBlock(); + int len = dest.clen; + for(int i = rl; i <= ru; i++) + System.arraycopy(a.values(i), a.pos(i)+cl, c.values(i-rl), c.pos(i-rl), len); } //compute nnz of output (not maintained due to native calls) @@ -4723,7 +4665,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab if( !(op.binaryFn instanceof Multiply && op.aggOp.increOp.fn instanceof Plus) ) { throw new DMLRuntimeException("Unsupported binary aggregate operation: ("+op.binaryFn+", "+op.aggOp+")."); } - + //setup meta data (dimensions, sparsity) int rl = m1.rlen; int cl = m2.clen;
