incubator-systemml git commit: [SYSTEMML-850] Cache-conscious sparse-dense wcemm block operations

mboehm7 Mon, 08 Aug 2016 02:10:11 -0700

Repository: incubator-systemml
Updated Branches:
  refs/heads/master d6990dccd -> b62a67c0e



[SYSTEMML-850] Cache-conscious sparse-dense wcemm block operations

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/b62a67c0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/b62a67c0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/b62a67c0

Branch: refs/heads/master
Commit: b62a67c0e658f1f93146a9b6609d9b0447f311d1
Parents: d6990dc
Author: Matthias Boehm <[email protected]>
Authored: Sun Aug 7 17:51:58 2016 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Sun Aug 7 17:51:58 2016 -0700

----------------------------------------------------------------------
 .../runtime/matrix/data/LibMatrixMult.java      | 36 ++++++++++++++------
 1 file changed, 26 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b62a67c0/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index 302a1df..9d878be 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -2943,19 +2943,35 @@ public class LibMatrixMult
                SparseBlock w = mW.sparseBlock;
                double[] u = mU.denseBlock;
                double[] v = mV.denseBlock;
+               final int n = mW.clen;
                final int cd = mU.clen;
                double wceval = 0; 
                
-               // approach: iterate over all cells of X and 
-               for( int i=rl, uix=rl*cd; i<ru; i++, uix+=cd ) {
-                       if( !w.isEmpty(i) ) { 
-                               int wpos = w.pos(i);
-                               int wlen = w.size(i);
-                               int[] wix = w.indexes(i);
-                               double[] wval = w.values(i);
-                               for( int k=wpos; k<wpos+wlen; k++ ) {
-                                       double uvij = dotProduct(u, v, uix, 
wix[k]*cd, cd);
-                                       wceval += wval[k] * FastMath.log(uvij + 
eps);                                   
+               // approach: iterate over W, point-wise in order to exploit 
sparsity
+               // blocked over ij, while maintaining front of column indexes, 
where the
+               // blocksize is chosen such that we reuse each vector on 
average 8 times.
+               final int blocksizeIJ = (int) (8L*mW.rlen*mW.clen/mW.nonZeros); 
+               int[] curk = new int[blocksizeIJ];                      
+               
+               for( int bi=rl; bi<ru; bi+=blocksizeIJ ) {
+                       int bimin = Math.min(ru, bi+blocksizeIJ);
+                       //prepare starting indexes for block row
+                       Arrays.fill(curk, 0); 
+                       //blocked execution over column blocks
+                       for( int bj=0; bj<n; bj+=blocksizeIJ ) {
+                               int bjmin = Math.min(n, bj+blocksizeIJ);
+                               for( int i=bi, uix=bi*cd; i<bimin; i++, uix+=cd 
) {
+                                       if( w.isEmpty(i) ) continue;
+                                       int wpos = w.pos(i);
+                                       int wlen = w.size(i);
+                                       int[] wix = w.indexes(i);
+                                       double[] wval = w.values(i);
+                                       int k = wpos + curk[i-bi];
+                                       for( ; k<wpos+wlen && wix[k]<bjmin; k++ 
) {
+                                               double uvij = dotProduct(u, v, 
uix, wix[k]*cd, cd);
+                                               wceval += wval[k] * 
FastMath.log(uvij + eps);   
+                                       }
+                                       curk[i-bi] = k - wpos;
                                }
                        }
                }

incubator-systemml git commit: [SYSTEMML-850] Cache-conscious sparse-dense wcemm block operations

Reply via email to