[SYSTEMML-1956] Performance sparse N:1 matrix-matrix reshape This patch improves performance for the special case of reshaping a sparse m x n matrix into m2 x n2 where n%n2==0. In this case, we can easily determine the number of non-zeros per target row from the input row block, which allows to allocate the output once without unnecessary repeated allocations.
On a 58825360 x 300 sparse input, this patch improved performance from 79s to 30.6s. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/92bad9ee Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/92bad9ee Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/92bad9ee Branch: refs/heads/master Commit: 92bad9ee3fc5389124faad0eb61eda748871e102 Parents: 0b5480b Author: Matthias Boehm <[email protected]> Authored: Wed Oct 11 15:39:41 2017 -0700 Committer: Matthias Boehm <[email protected]> Committed: Thu Oct 12 01:13:06 2017 -0700 ---------------------------------------------------------------------- .../runtime/matrix/data/LibMatrixReorg.java | 24 ++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/92bad9ee/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java index c64bbc6..0bd4402 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixReorg.java @@ -1161,7 +1161,7 @@ public class LibMatrixReorg // * vector-matrix not really different from general // * clen=1 and cols=1 will never be sparse. - if( rows==1 ) //MATRIX->VECTOR + if( rows==1 ) //MATRIX->VECTOR { //note: cache-friendly on a and c; append-only c.allocate(0, estnnz, cols); @@ -1177,6 +1177,26 @@ public class LibMatrixReorg } } } + else if( cols%clen==0 ) { //SPECIAL N:1 MATRIX->MATRIX + int n = cols/clen; + for(int bi=0, ci=0; bi<rlen; bi+=n, ci++) { + //allocate output row once (w/o re-allocations) + long lnnz = a.size(bi, bi+n); + c.allocate(ci, (int)lnnz); + //copy N input rows into output row + for( int i=bi, cix=0; i<bi+n; i++, cix+=clen ) { + if(a.isEmpty(i)) continue; + int apos = a.pos(i); + int alen = a.size(i); + int[] aix = a.indexes(i); + double[] avals = a.values(i); + for( int j=apos; j<apos+alen; j++ ) { + int cj = (int)((cix+aix[j])%cols); + c.append(ci, cj, avals[j]); + } + } + } + } else //GENERAL CASE: MATRIX->MATRIX { //note: cache-friendly on a but not c; append-only @@ -1193,7 +1213,7 @@ public class LibMatrixReorg for( int j=apos; j<apos+alen; j++ ) { int ci = (int)((cix+aix[j])/cols); - int cj = (int)((cix+aix[j])%cols); + int cj = (int)((cix+aix[j])%cols); c.allocate(ci, estnnz, cols); c.append(ci, cj, avals[j]); }
