[SYSTEMML-1754] Performance removeEmpty (shallow copy if unmodified) This patch improves the performance and memory efficiency of removeEmpty rows/columns for both dense and sparse inputs by using a shallow output copy if the determined output dimensions match the input dimensions.
Basic experiments (cumulative runtime for 100 iterations): 1) removeEmpty rows 10Kx10K, sp=1.0 (dense): 33.9 -> 0.027s 2) removeEmpty rows 100Kx1K, sp=1.0 (dense): 33.2 -> 0.137s 3) removeEmpty cols 10Kx10K, sp=1.0 (dense): 57.8 -> 14.8s 4) removeEmpty cols 1Kx100K, sp=1.0 (dense): 61.5 -> 14.3s 5) removeEmpty rows 10Kx10K, sp=0.1 (sparse): 0.055 -> 0.036s 6) removeEmpty rows 100Kx1K, sp=0.1 (sparse): 0.470 -> 0.127s 7) removeEmpty cols 10Kx10K, sp=0.1 (sparse): 55.3 -> 1.3s 8) removeEmpty cols 1Kx100K, sp=0.1 (sparse): 39.8 -> 1.1s Note that this patch improves performance by orders of magnitude, except for sparse removeEmpty rows (which already used shallow row copy). Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/b84a4933 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/b84a4933 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/b84a4933 Branch: refs/heads/master Commit: b84a4933c11a25d064216bc0606938bcf0d792e6 Parents: 73f7c6a Author: Matthias Boehm <mboe...@gmail.com> Authored: Sat Jul 8 21:44:51 2017 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Sat Jul 8 22:32:05 2017 -0700 ---------------------------------------------------------------------- .../runtime/matrix/data/LibMatrixReorg.java | 103 +++++++++++-------- 1 file changed, 61 insertions(+), 42 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/b84a4933/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java index 73d50eb..205f787 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java @@ -1710,7 +1710,14 @@ public class LibMatrixReorg if( in.isEmptyBlock(false) ) return ret; - if( in.sparse ) //* <- SPARSE + if( SHALLOW_COPY_REORG && m == rlen2 ) { + ret.sparse = in.sparse; + if( ret.sparse ) + ret.sparseBlock = in.sparseBlock; + else + ret.denseBlock = in.denseBlock; + } + else if( in.sparse ) //* <- SPARSE { //note: output dense or sparse for( int i=0, cix=0; i<m; i++ ) @@ -1798,14 +1805,7 @@ public class LibMatrixReorg clen2 += flags[j] ? 1 : 0; } - //Step 3: create mapping of flags to target indexes - int[] cix = new int[n]; - for( int j=0, pos=0; j<n; j++ ) { - if( flags[j] ) - cix[j] = pos++; - } - - //Step 3: reset result and copy cols + //Step 3: reset result and copy columns //dense stays dense if correct input representation (but robust for any input), // sparse might be dense/sparse clen2 = Math.max(clen2, 1); //ensure valid output @@ -1814,42 +1814,61 @@ public class LibMatrixReorg if( in.isEmptyBlock(false) ) return ret; - if( in.sparse ) //* <- SPARSE - { - //note: output dense or sparse - SparseBlock a = in.sparseBlock; - - for( int i=0; i<m; i++ ) - if ( !a.isEmpty(i) ) { - int apos = a.pos(i); - int alen = a.size(i); - int[] aix = a.indexes(i); - double[] avals = a.values(i); - for( int j=apos; j<apos+alen; j++ ) - if( flags[aix[j]] ) - ret.appendValue(i, cix[aix[j]], avals[j]); - } - } - else if( !in.sparse && !ret.sparse ) //DENSE <- DENSE - { - ret.allocateDenseBlock(); - double[] a = in.denseBlock; - double[] c = ret.denseBlock; - - for(int i=0, aix=0, lcix=0; i<m; i++, lcix+=clen2) - for(int j=0; j<n; j++, aix++) - if( flags[j] ) - c[ lcix+cix[j] ] = a[aix]; + if( SHALLOW_COPY_REORG && n == clen2 ) { + //quick path: shallow copy if unmodified + ret.sparse = in.sparse; + if( ret.sparse ) + ret.sparseBlock = in.sparseBlock; + else + ret.denseBlock = in.denseBlock; } - else //SPARSE <- DENSE + else { - ret.allocateSparseRowsBlock(); - double[] a = in.denseBlock; + //create mapping of flags to target indexes + int[] cix = new int[n]; + for( int j=0, pos=0; j<n; j++ ) { + if( flags[j] ) + cix[j] = pos++; + } - for(int i=0, aix=0; i<m; i++) - for(int j=0; j<n; j++, aix++) - if( flags[j] && a[aix]!=0 ) - ret.appendValue(i, cix[j], a[aix]); + //deep copy of modified outputs + if( in.sparse ) //* <- SPARSE + { + //note: output dense or sparse + SparseBlock a = in.sparseBlock; + + for( int i=0; i<m; i++ ) + if ( !a.isEmpty(i) ) { + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + for( int j=apos; j<apos+alen; j++ ) + if( flags[aix[j]] ) + ret.appendValue(i, cix[aix[j]], avals[j]); + } + } + else if( !in.sparse && !ret.sparse ) //DENSE <- DENSE + { + ret.allocateDenseBlock(); + double[] a = in.denseBlock; + double[] c = ret.denseBlock; + + for(int i=0, aix=0, lcix=0; i<m; i++, lcix+=clen2) + for(int j=0; j<n; j++, aix++) + if( flags[j] ) + c[ lcix+cix[j] ] = a[aix]; + } + else //SPARSE <- DENSE + { + ret.allocateSparseRowsBlock(); + double[] a = in.denseBlock; + + for(int i=0, aix=0; i<m; i++) + for(int j=0; j<n; j++, aix++) + if( flags[j] && a[aix]!=0 ) + ret.appendValue(i, cix[j], a[aix]); + } } //check sparsity