[SYSTEMML-1754] Performance removeEmpty (shallow copy if unmodified)

This patch improves the performance and memory efficiency of removeEmpty
rows/columns for both dense and sparse inputs by using a shallow output
copy if the determined output dimensions match the input dimensions. 

Basic experiments (cumulative runtime for 100 iterations):
1) removeEmpty rows 10Kx10K, sp=1.0 (dense): 33.9 -> 0.027s
2) removeEmpty rows 100Kx1K, sp=1.0 (dense): 33.2 -> 0.137s
3) removeEmpty cols 10Kx10K, sp=1.0 (dense): 57.8 -> 14.8s
4) removeEmpty cols 1Kx100K, sp=1.0 (dense): 61.5 -> 14.3s
5) removeEmpty rows 10Kx10K, sp=0.1 (sparse): 0.055 -> 0.036s
6) removeEmpty rows 100Kx1K, sp=0.1 (sparse): 0.470 -> 0.127s
7) removeEmpty cols 10Kx10K, sp=0.1 (sparse): 55.3 -> 1.3s
8) removeEmpty cols 1Kx100K, sp=0.1 (sparse): 39.8 -> 1.1s

Note that this patch improves performance by orders of magnitude, except
for  sparse removeEmpty rows (which already used shallow row copy).


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/b84a4933
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/b84a4933
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/b84a4933

Branch: refs/heads/master
Commit: b84a4933c11a25d064216bc0606938bcf0d792e6
Parents: 73f7c6a
Author: Matthias Boehm <mboe...@gmail.com>
Authored: Sat Jul 8 21:44:51 2017 -0700
Committer: Matthias Boehm <mboe...@gmail.com>
Committed: Sat Jul 8 22:32:05 2017 -0700

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixReorg.java     | 103 +++++++++++--------
 1 file changed, 61 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/b84a4933/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
index 73d50eb..205f787 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
@@ -1710,7 +1710,14 @@ public class LibMatrixReorg
                if( in.isEmptyBlock(false) )
                        return ret;
                
-               if( in.sparse ) //* <- SPARSE
+               if( SHALLOW_COPY_REORG && m == rlen2 ) {
+                       ret.sparse = in.sparse;
+                       if( ret.sparse )
+                               ret.sparseBlock = in.sparseBlock;
+                       else
+                               ret.denseBlock = in.denseBlock;
+               }
+               else if( in.sparse ) //* <- SPARSE
                {
                        //note: output dense or sparse
                        for( int i=0, cix=0; i<m; i++ )
@@ -1798,14 +1805,7 @@ public class LibMatrixReorg
                        clen2 += flags[j] ? 1 : 0;
                }
                
-               //Step 3: create mapping of flags to target indexes
-               int[] cix = new int[n];
-               for( int j=0, pos=0; j<n; j++ ) {
-                       if( flags[j] )
-                               cix[j] = pos++; 
-               }
-               
-               //Step 3: reset result and copy cols
+               //Step 3: reset result and copy columns
                //dense stays dense if correct input representation (but robust 
for any input), 
                // sparse might be dense/sparse
                clen2 = Math.max(clen2, 1); //ensure valid output
@@ -1814,42 +1814,61 @@ public class LibMatrixReorg
                if( in.isEmptyBlock(false) )
                        return ret;
                
-               if( in.sparse ) //* <- SPARSE 
-               {
-                       //note: output dense or sparse
-                       SparseBlock a = in.sparseBlock;
-                       
-                       for( int i=0; i<m; i++ ) 
-                               if ( !a.isEmpty(i) ) {
-                                       int apos = a.pos(i);
-                                       int alen = a.size(i);
-                                       int[] aix = a.indexes(i);
-                                       double[] avals = a.values(i);
-                                       for( int j=apos; j<apos+alen; j++ )
-                                               if( flags[aix[j]] )
-                                                       ret.appendValue(i, 
cix[aix[j]], avals[j]);
-                               }
-               }
-               else if( !in.sparse && !ret.sparse )  //DENSE <- DENSE
-               {
-                       ret.allocateDenseBlock();
-                       double[] a = in.denseBlock;
-                       double[] c = ret.denseBlock;
-                       
-                       for(int i=0, aix=0, lcix=0; i<m; i++, lcix+=clen2)
-                               for(int j=0; j<n; j++, aix++)
-                                       if( flags[j] )
-                                                c[ lcix+cix[j] ] = a[aix];     
+               if( SHALLOW_COPY_REORG && n == clen2 ) {
+                       //quick path: shallow copy if unmodified
+                       ret.sparse = in.sparse;
+                       if( ret.sparse )
+                               ret.sparseBlock = in.sparseBlock;
+                       else
+                               ret.denseBlock = in.denseBlock;
                }
-               else //SPARSE <- DENSE
+               else
                {
-                       ret.allocateSparseRowsBlock();
-                       double[] a = in.denseBlock;
+                       //create mapping of flags to target indexes
+                       int[] cix = new int[n];
+                       for( int j=0, pos=0; j<n; j++ ) {
+                               if( flags[j] )
+                                       cix[j] = pos++;
+                       }
                        
-                       for(int i=0, aix=0; i<m; i++)
-                               for(int j=0; j<n; j++, aix++)
-                                       if( flags[j] && a[aix]!=0 )
-                                                ret.appendValue(i, cix[j], 
a[aix]);    
+                       //deep copy of modified outputs
+                       if( in.sparse ) //* <- SPARSE
+                       {
+                               //note: output dense or sparse
+                               SparseBlock a = in.sparseBlock;
+                               
+                               for( int i=0; i<m; i++ )
+                                       if ( !a.isEmpty(i) ) {
+                                               int apos = a.pos(i);
+                                               int alen = a.size(i);
+                                               int[] aix = a.indexes(i);
+                                               double[] avals = a.values(i);
+                                               for( int j=apos; j<apos+alen; 
j++ )
+                                                       if( flags[aix[j]] )
+                                                               
ret.appendValue(i, cix[aix[j]], avals[j]);
+                                       }
+                       }
+                       else if( !in.sparse && !ret.sparse )  //DENSE <- DENSE
+                       {
+                               ret.allocateDenseBlock();
+                               double[] a = in.denseBlock;
+                               double[] c = ret.denseBlock;
+                               
+                               for(int i=0, aix=0, lcix=0; i<m; i++, 
lcix+=clen2)
+                                       for(int j=0; j<n; j++, aix++)
+                                               if( flags[j] )
+                                                        c[ lcix+cix[j] ] = 
a[aix];     
+                       }
+                       else //SPARSE <- DENSE
+                       {
+                               ret.allocateSparseRowsBlock();
+                               double[] a = in.denseBlock;
+                               
+                               for(int i=0, aix=0; i<m; i++)
+                                       for(int j=0; j<n; j++, aix++)
+                                               if( flags[j] && a[aix]!=0 )
+                                                        ret.appendValue(i, 
cix[j], a[aix]);
+                       }
                }
                
                //check sparsity

Reply via email to