Repository: incubator-systemml Updated Branches: refs/heads/master 10727e5d0 -> 4288da9ee
[SYSTEMML-1660] Performance sparse removeEmpty/order (shallow copy) Similar to dense vector transpose and dense row-wise reshape, this patch improves performance of sparse removeEmpty(rows) and order via shallow copy of sparse rows, exploiting the fact that removeEmpty(rows) and order do not modify the actual sparse rows. This shallow copy (by reference) is safe due to copy-on-write semantics and safe update-in-place initialization. Note that the recent change of shallow serialize of sparse matrices on buffer pool write (SYSTEMML-1140), enabled this optimization potential, because without shallow serialize we would still effectively copy the entire matrix. Experiments over different matrix shapes and sparsity showed huge improvements of orders of magnitude. For example, here are the results for 20 iterations of the respective single-node in-memory operations: 1) removeEmpty(rows), 20 iterations: 1M x 1K, sparsity=0.1: 10.3s -> 1.4s 100K x 10K, sparsity=0.1: 7.1s -> 160ms 10K x 100K, sparsity=0.1: 6.3s -> 21ms 2) order, 20 iterations: 1M x 1K, sparsity=0.1: 13.4s -> 3.9s 100K x 10K, sparsity=0.1: 7.7s -> 510ms 10K x 100K, sparsity=0.1: 6.5s -> 64ms Note that the improvements increase as the number of columns, or more precisely, the number of non-zeros per row increase because there are additional overheads (such as selection vector extraction or the actual sorting) in the number of rows. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/4288da9e Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/4288da9e Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/4288da9e Branch: refs/heads/master Commit: 4288da9eec006891025379b9fc723cf0e72adeaf Parents: 10727e5 Author: Matthias Boehm <mboe...@gmail.com> Authored: Sat Jun 3 13:11:14 2017 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Sat Jun 3 13:11:14 2017 -0700 ---------------------------------------------------------------------- .../runtime/matrix/data/LibMatrixReorg.java | 44 ++++++++++---------- .../sysml/runtime/matrix/data/MatrixBlock.java | 8 +++- 2 files changed, 28 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4288da9e/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java index bfaf4c8..73d50eb 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java @@ -60,9 +60,14 @@ import org.apache.sysml.runtime.util.UtilFunctions; */ public class LibMatrixReorg { - public static final long PAR_NUMCELL_THRESHOLD = 1024*1024; //Min 1M elements - public static final boolean SHALLOW_DENSE_VECTOR_TRANSPOSE = true; - public static final boolean SHALLOW_DENSE_ROWWISE_RESHAPE = true; + //minimum number of elements for multi-threaded execution + public static final long PAR_NUMCELL_THRESHOLD = 1024*1024; //1M + + //allow shallow dense/sparse copy for unchanged data (which is + //safe due to copy-on-write and safe update-in-place handling) + public static final boolean SHALLOW_COPY_REORG = true; + + //allow reuse of temporary blocks for certain operations public static final boolean ALLOW_BLOCK_REUSE = false; private enum ReorgType { @@ -126,7 +131,7 @@ public class LibMatrixReorg //since the physical representation of dense vectors is always the same, //we don't need to create a copy, given our copy on write semantics. //however, note that with update in-place this would be an invalid optimization - if( SHALLOW_DENSE_VECTOR_TRANSPOSE && !in.sparse && !out.sparse && (in.rlen==1 || in.clen==1) ) { + if( SHALLOW_COPY_REORG && !in.sparse && !out.sparse && (in.rlen==1 || in.clen==1) ) { out.denseBlock = in.denseBlock; return out; } @@ -160,7 +165,7 @@ public class LibMatrixReorg { //redirect small or special cases to sequential execution if( in.isEmptyBlock(false) || (in.rlen * in.clen < PAR_NUMCELL_THRESHOLD) || k == 1 - || (SHALLOW_DENSE_VECTOR_TRANSPOSE && !in.sparse && !out.sparse && (in.rlen==1 || in.clen==1) ) + || (SHALLOW_COPY_REORG && !in.sparse && !out.sparse && (in.rlen==1 || in.clen==1) ) || (in.sparse && !out.sparse && in.rlen==1) || (!in.sparse && out.sparse && in.rlen==1) || (!in.sparse && out.sparse) || !out.isThreadSafe()) { @@ -395,12 +400,11 @@ public class LibMatrixReorg else //SPARSE { out.allocateSparseRowsBlock(false); - for( int i=0; i<rlen; i++ ) { - int ix = vix[i]; - if( !in.sparseBlock.isEmpty(ix) ) { - out.sparseBlock.set(i, in.sparseBlock.get(ix), true); + for( int i=0; i<rlen; i++ ) + if( !in.sparseBlock.isEmpty(vix[i]) ) { + out.sparseBlock.set(i, in.sparseBlock.get(vix[i]), + !SHALLOW_COPY_REORG); //row remains unchanged } - } } } else @@ -1085,7 +1089,7 @@ public class LibMatrixReorg return; //shallow dense by-row reshape (w/o result allocation) - if( SHALLOW_DENSE_ROWWISE_RESHAPE && rowwise ) { + if( SHALLOW_COPY_REORG && rowwise ) { //since the physical representation of dense matrices is always the same, //we don't need to create a copy, given our copy on write semantics. //however, note that with update in-place this would be an invalid optimization @@ -1677,25 +1681,19 @@ public class LibMatrixReorg { SparseBlock a = in.sparseBlock; for ( int i=0; i < m; i++ ) - if ( !a.isEmpty(i) ) { - flags[i] = true; - rlen2++; - } + rlen2 += (flags[i] = !a.isEmpty(i)) ? 1 : 0; } else //DENSE { double[] a = in.denseBlock; - - for(int i=0, aix=0; i<m; i++, aix+=n) { + for(int i=0, aix=0; i<m; i++, aix+=n) for(int j=0; j<n; j++) - if( a[aix+j] != 0 ) - { + if( a[aix+j] != 0 ) { flags[i] = true; rlen2++; //early abort for current row break; } - } } } else { @@ -1716,8 +1714,10 @@ public class LibMatrixReorg { //note: output dense or sparse for( int i=0, cix=0; i<m; i++ ) - if( flags[i] ) - ret.appendRow(cix++, in.sparseBlock.get(i)); + if( flags[i] ) { + ret.appendRow(cix++, in.sparseBlock.get(i), + !SHALLOW_COPY_REORG); + } } else if( !in.sparse && !ret.sparse ) //DENSE <- DENSE { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4288da9e/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index e61c6a2..43adfc6 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -693,7 +693,11 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab } } - public void appendRow(int r, SparseRow row) + public void appendRow(int r, SparseRow row) { + appendRow(r, row, true); + } + + public void appendRow(int r, SparseRow row, boolean deep) { if(row == null) return; @@ -701,7 +705,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab if(sparse) { //allocation on demand allocateSparseRowsBlock(false); - sparseBlock.set(r, row, true); + sparseBlock.set(r, row, deep); nonZeros += row.size(); } else {