Repository: systemml Updated Branches: refs/heads/master b152d73b3 -> e25800f04
[SYSTEMML-2054] Performance sparse-sparse left indexing operations This patch significantly improves the performance, and in some cases also the memory efficiency, of sparse-sparse left indexing operations such as X[a:b,c:d] = Y, where both X and Y are sparse. In detail, this includes the following improvements: (1) Shallow copy for sparse rows into empty target rows for c==0 (zero alignment allows for shallow copy because column indexes do not require shifting in this case). (2) Exact preallocation for copying sparse rows into empty target rows in case the indexing range is not zero aligned. (3) New sparse block primitive for inserting a sparse row into populated sparse rows, incl its implementation for the MCSR, CSR, and COO sparse block formats. (4) Avoid unnecessary zero out operations over the target range, which is no longer necessary with the new primitive described in (3). On a scenarios of 100 iterations of sparse-sparse left indexing operations with X being 10K x 15K and Y being 5k x 10K (sparsity 0.1), the cumulative runtime improved as follows: (1) X empty, X[2500:7400, 1:10000] = Y: 22.8s -> 0.089s (2) X empty, X[2500:7400, 5001:15000] = Y: 25.5s -> 5.3s (3) X sparsity 0.01, X[2500:7400, 1:10000] = Y: 57.1s -> 4.9s Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e25800f0 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e25800f0 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e25800f0 Branch: refs/heads/master Commit: e25800f040732c685a64d467782e04b4a04fd2f3 Parents: b152d73 Author: Matthias Boehm <[email protected]> Authored: Sat Dec 30 21:47:15 2017 -0800 Committer: Matthias Boehm <[email protected]> Committed: Sat Dec 30 22:43:26 2017 -0800 ---------------------------------------------------------------------- .../sysml/runtime/matrix/data/MatrixBlock.java | 72 ++++++++------------ .../sysml/runtime/matrix/data/SparseBlock.java | 16 ++++- .../runtime/matrix/data/SparseBlockCOO.java | 23 ++++++- .../runtime/matrix/data/SparseBlockCSR.java | 25 ++++++- .../runtime/matrix/data/SparseBlockMCSR.java | 14 +++- .../runtime/matrix/data/SparseRowVector.java | 63 +++++++++++------ 6 files changed, 141 insertions(+), 72 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/e25800f0/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index aa728b0..b34353f 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -1429,8 +1429,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab copyDenseToDense(rl, ru, cl, cu, src, awareDestNZ); } - private void copySparseToSparse(int rl, int ru, int cl, int cu, MatrixBlock src, boolean awareDestNZ) - { + private void copySparseToSparse(int rl, int ru, int cl, int cu, MatrixBlock src, boolean awareDestNZ) { //handle empty src and dest if( src.isEmptyBlock(false) ) { if( awareDestNZ && sparseBlock != null ) @@ -1438,54 +1437,37 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab return; } - if(sparseBlock==null) - allocateSparseRowsBlock(false); - else if( awareDestNZ ) { - copyEmptyToSparse(rl, ru, cl, cu, true); - //explicit clear if awareDestNZ because more efficient since - //src will have multiple columns and only few overwriting values - } + allocateSparseRowsBlock(false); + //copy values SparseBlock a = src.sparseBlock; SparseBlock b = sparseBlock; - - //copy values - for( int i=0; i<src.rlen; i++ ) - { - if( !a.isEmpty(i) ) - { - int apos = a.pos(i); - int alen = a.size(i); - int[] aix = a.indexes(i); - double[] avals = a.values(i); - - if( b.isEmpty(rl+i) ) { - b.allocate(rl+i, estimatedNNzsPerRow, clen); - for( int j=apos; j<apos+alen; j++ ) - b.append(rl+i, cl+aix[j], avals[j]); - if( awareDestNZ ) - nonZeros += b.size(rl+i); - } - else if( awareDestNZ ) //general case (w/ awareness NNZ) - { - int lnnz = b.size(rl+i); - if( cl==cu && cl==aix[apos] ) { - b.set(rl+i, cl, avals[apos] ); - } - else { - //TODO perf sparse row - b.deleteIndexRange(rl+i, cl, cu+1); - for( int j=apos; j<apos+alen; j++ ) - b.set(rl+i, cl+aix[j], avals[j]); - } - nonZeros += (b.size(rl+i) - lnnz); + for( int i=0; i<src.rlen; i++ ) { + if( a.isEmpty(i) ) { + copyEmptyToSparse(rl+i, rl+i, cl, cu, true); + continue; + } + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + //copy row into empty target row + if( b.isEmpty(rl+i) ) { + if( cl == 0 ) { //no index offset needed + appendRow(rl+i, a.get(i), false); + nonZeros -= alen; //avoid nnz corruption } - else //general case (w/o awareness NNZ) - { - //TODO perf sparse row - for( int j=apos; j<apos+alen; j++ ) - b.set(rl+i, cl+aix[j], avals[j]); + else { + b.allocate(rl+i, alen); + b.setIndexRange(rl+i, cl, cu+1, avals, aix, apos, alen); } + nonZeros += awareDestNZ ? alen : 0; + } + //insert row into non-empty target row + else { //general case + int lnnz = b.size(rl+i); + b.setIndexRange(rl+i, cl, cu+1, avals, aix, apos, alen); + nonZeros += awareDestNZ ? (b.size(rl+i) - lnnz) : 0; } } } http://git-wip-us.apache.org/repos/asf/systemml/blob/e25800f0/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java index 00b59e0..a9d75b4 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java @@ -323,7 +323,7 @@ public abstract class SparseBlock implements Serializable public abstract void append(int r, int c, double v); /** - * Sets a sorted array of non-zeros values into the column range [cl,cu) + * Sets a dense array of non-zeros values into the column range [cl,cu) * in row r. The passed value array may be larger and the relevant range * is given by [vix,vix+len). * @@ -337,6 +337,20 @@ public abstract class SparseBlock implements Serializable public abstract void setIndexRange(int r, int cl, int cu, double[] v, int vix, int vlen); /** + * Sets a sparse array of non-zeros values and indexes into the column range [cl,cu) + * in row r. The passed value array may be larger. + * + * @param r row index starting at 0 + * @param cl lower column index starting at 0 + * @param cu upper column index starting at 0 + * @param v value array + * @param vix column index array + * @param vpos start index in value and index arrays + * @param vlen number of relevant values + */ + public abstract void setIndexRange(int r, int cl, int cu, double[] v, int[] vix, int vpos, int vlen); + + /** * Deletes all non-zero values of the given column range [cl,cu) in row r. * * @param r row index starting at 0 http://git-wip-us.apache.org/repos/asf/systemml/blob/e25800f0/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java index 4f60625..bd59644 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java @@ -344,7 +344,7 @@ public class SparseBlockCOO extends SparseBlock @Override public void setIndexRange(int r, int cl, int cu, double[] v, int vix, int vlen) { - //delete existing values in range if necessary + //delete existing values in range if necessary deleteIndexRange(r, cl, cu); //determine input nnz @@ -366,6 +366,27 @@ public class SparseBlockCOO extends SparseBlock index++; } } + + @Override + public void setIndexRange(int r, int cl, int cu, double[] v, int[] vix, int vpos, int vlen) { + //delete existing values in range if necessary + deleteIndexRange(r, cl, cu); + + //prepare free space (allocate and shift) + int lsize = _size+vlen; + if( _values.length < lsize ) + resize(lsize); + int index = internPosFIndexGT(r, cl); + shiftRightByN((index>0)?index:pos(r+1), vlen); + + //insert values + for( int i=vpos; i<vpos+vlen; i++ ) { + _rindexes[ index ] = r; + _cindexes[ index ] = cl+vix[i]; + _values[ index ] = v[i]; + index++; + } + } @Override public void deleteIndexRange(int r, int cl, int cu) { http://git-wip-us.apache.org/repos/asf/systemml/blob/e25800f0/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java index 9019c06..2a2ccef 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java @@ -503,7 +503,7 @@ public class SparseBlockCSR extends SparseBlock @Override public void setIndexRange(int r, int cl, int cu, double[] v, int vix, int vlen) { - //delete existing values in range if necessary + //delete existing values in range if necessary if( !isEmpty(r) ) deleteIndexRange(r, cl, cu); @@ -528,6 +528,29 @@ public class SparseBlockCSR extends SparseBlock incrPtr(r+1, lnnz); } + @Override + public void setIndexRange(int r, int cl, int cu, double[] v, int[] vix, int vpos, int vlen) { + //delete existing values in range if necessary + if( !isEmpty(r) ) + deleteIndexRange(r, cl, cu); + + //prepare free space (allocate and shift) + int lsize = _size+vlen; + if( _values.length < lsize ) + resize(lsize); + int index = internPosFIndexGT(r, cl); + int index2 = (index>0)?index:pos(r+1); + shiftRightByN(index2, vlen); + + //insert values + for( int i=vpos; i<vpos+vlen; i++ ) { + _indexes[ index2 ] = cl+vix[i]; + _values[ index2 ] = v[i]; + index2++; + } + incrPtr(r+1, vlen); + } + /** * Inserts a sorted row-major array of non-zero values into the row and column * range [rl,ru) and [cl,cu). Note: that this is a CSR-specific method to address http://git-wip-us.apache.org/repos/asf/systemml/blob/e25800f0/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java index 9aca9e5..73db5ec 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java @@ -268,13 +268,23 @@ public class SparseBlockMCSR extends SparseBlock } @Override - public void setIndexRange(int r, int cl, int cu, double[] v, int vix, int len) { + public void setIndexRange(int r, int cl, int cu, double[] v, int vix, int vlen) { if( !isAllocated(r) ) _rows[r] = new SparseRowVector(); else if( _rows[r] instanceof SparseRowScalar ) _rows[r] = new SparseRowVector(_rows[r]); //different sparse row semantics: upper bound inclusive - ((SparseRowVector)_rows[r]).setIndexRange(cl, cu-1, v, vix, len); + ((SparseRowVector)_rows[r]).setIndexRange(cl, cu-1, v, vix, vlen); + } + + @Override + public void setIndexRange(int r, int cl, int cu, double[] v, int[] vix, int vpos, int vlen) { + if( !isAllocated(r) ) + _rows[r] = new SparseRowVector(); + else if( _rows[r] instanceof SparseRowScalar ) + _rows[r] = new SparseRowVector(_rows[r]); + //different sparse row semantics: upper bound inclusive + ((SparseRowVector)_rows[r]).setIndexRange(cl, cu-1, v, vix, vpos, vlen); } @Override http://git-wip-us.apache.org/repos/asf/systemml/blob/e25800f0/src/main/java/org/apache/sysml/runtime/matrix/data/SparseRowVector.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseRowVector.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseRowVector.java index 1510b7d..4f05af0 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseRowVector.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseRowVector.java @@ -261,35 +261,24 @@ public final class SparseRowVector extends SparseRow implements Serializable size -= (end-start); } - /** - * Inserts a dense vector into a column range; calling this methods helps to - * avoid repeated shifting of remaining values/indexes for every set value. - * - * @param lowerCol lower column index - * @param upperCol upper column index - * @param v dense vector - * @param vix ? - * @param len ? - */ - public void setIndexRange(int lowerCol, int upperCol, double[] v, int vix, int len) - { - int start = searchIndexesFirstGTE(lowerCol); + public void setIndexRange(int cl, int cu, double[] v, int vix, int vlen) { + //handle special cases + int start = searchIndexesFirstGTE(cl); if( start < 0 ) { //nothing to delete/shift - for( int i=vix; i<vix+len; i++ ) - append(lowerCol+i-vix, v[i]); + for( int i=vix; i<vix+vlen; i++ ) + append(cl+i-vix, v[i]); return; } - - int end = searchIndexesFirstGT(upperCol); + int end = searchIndexesFirstGT(cu); if( end < 0 ) { //delete all remaining size = start; - for( int i=vix; i<vix+len; i++ ) - append(lowerCol+i-vix, v[i]); + for( int i=vix; i<vix+vlen; i++ ) + append(cl+i-vix, v[i]); return; } //determine input nnz - int lnnz = UtilFunctions.computeNnz(v, vix, len); + int lnnz = UtilFunctions.computeNnz(v, vix, vlen); //prepare free space (allocate and shift) int lsize = size+lnnz-(end-start); @@ -298,14 +287,44 @@ public final class SparseRowVector extends SparseRow implements Serializable shiftRightByN(end, lnnz-(end-start)); //insert values - for( int i=vix, pos=start; i<vix+len; i++ ) + for( int i=vix, pos=start; i<vix+vlen; i++ ) if( v[i] != 0 ) { values[ pos ] = v[i]; - indexes[ pos ] = lowerCol+i-vix; + indexes[ pos ] = cl+i-vix; pos++; } } + public void setIndexRange(int cl, int cu, double[] v, int[] vix, int vpos, int vlen) { + //handle special cases + int start = searchIndexesFirstGTE(cl); + if( start < 0 ) { //nothing to delete/shift + for( int i=vpos; i<vpos+vlen; i++ ) + append(cl+vix[i], v[i]); + return; + } + int end = searchIndexesFirstGT(cu); + if( end < 0 ) { //delete all remaining + size = start; + for( int i=vpos; i<vpos+vlen; i++ ) + append(cl+vix[i], v[i]); + return; + } + + //prepare free space (allocate and shift) + int lsize = size+vlen-(end-start); + if( values.length < lsize ) + recap(lsize); + shiftRightByN(end, vlen-(end-start)); + + //insert values + for( int i=vpos, pos=start; i<vpos+vlen; i++ ) { + values[ pos ] = v[i]; + indexes[ pos ] = cl+vix[i]; + pos++; + } + } + private void resizeAndInsert(int index, int col, double v) { //allocate new arrays int newCap = newCapacity();
