[SYSTEMML-1956] Performance sparse N:1 matrix-matrix reshape

This patch improves performance for the special case of reshaping a
sparse m x n matrix into m2 x n2 where n%n2==0. In this case, we can
easily determine the number of non-zeros per target row from the input
row block, which allows to allocate the output once without unnecessary
repeated allocations. 

On a 58825360 x 300 sparse input, this patch improved performance from
79s to 30.6s.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/92bad9ee
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/92bad9ee
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/92bad9ee

Branch: refs/heads/master
Commit: 92bad9ee3fc5389124faad0eb61eda748871e102
Parents: 0b5480b
Author: Matthias Boehm <[email protected]>
Authored: Wed Oct 11 15:39:41 2017 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Thu Oct 12 01:13:06 2017 -0700

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixReorg.java     | 24 ++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/92bad9ee/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
index c64bbc6..0bd4402 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java
@@ -1161,7 +1161,7 @@ public class LibMatrixReorg
                        // * vector-matrix not really different from general
                        // * clen=1 and cols=1 will never be sparse.
                        
-                       if( rows==1 ) //MATRIX->VECTOR  
+                       if( rows==1 ) //MATRIX->VECTOR
                        {
                                //note: cache-friendly on a and c; append-only
                                c.allocate(0, estnnz, cols);
@@ -1177,6 +1177,26 @@ public class LibMatrixReorg
                                        }
                                }
                        }
+                       else if( cols%clen==0 ) { //SPECIAL N:1 MATRIX->MATRIX
+                               int n = cols/clen;
+                               for(int bi=0, ci=0; bi<rlen; bi+=n, ci++) {
+                                       //allocate output row once (w/o 
re-allocations)
+                                       long lnnz = a.size(bi, bi+n);
+                                       c.allocate(ci, (int)lnnz);
+                                       //copy N input rows into output row
+                                       for( int i=bi, cix=0; i<bi+n; i++, 
cix+=clen ) {
+                                               if(a.isEmpty(i)) continue;
+                                               int apos = a.pos(i);
+                                               int alen = a.size(i);
+                                               int[] aix = a.indexes(i);
+                                               double[] avals = a.values(i);
+                                               for( int j=apos; j<apos+alen; 
j++ ) {
+                                                       int cj = 
(int)((cix+aix[j])%cols);
+                                                       c.append(ci, cj, 
avals[j]);
+                                               }
+                                       }
+                               }
+                       }
                        else //GENERAL CASE: MATRIX->MATRIX
                        {
                                //note: cache-friendly on a but not c; 
append-only
@@ -1193,7 +1213,7 @@ public class LibMatrixReorg
                                                for( int j=apos; j<apos+alen; 
j++ )
                                                {
                                                        int ci = 
(int)((cix+aix[j])/cols);
-                                                       int cj = 
(int)((cix+aix[j])%cols);       
+                                                       int cj = 
(int)((cix+aix[j])%cols);
                                                        c.allocate(ci, estnnz, 
cols);
                                                        c.append(ci, cj, 
avals[j]);
                                                }

Reply via email to