[SYSTEMML-382] Fix LibMatrixMult sparse block runtime integration The issue was caused by an existing vectMultiplyAdd primitive with start offset but different semantic of maxpos instead of len. For our default MCSR the behavior is equivalent but for CSR and COO this led to incorrect results.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/c359e6c2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/c359e6c2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/c359e6c2 Branch: refs/heads/master Commit: c359e6c21fd7cdd77726e54fa736f29581c1dcc0 Parents: da7edc9 Author: Matthias Boehm <[email protected]> Authored: Thu Jan 21 08:57:37 2016 -0800 Committer: Matthias Boehm <[email protected]> Committed: Thu Jan 21 08:59:14 2016 -0800 ---------------------------------------------------------------------- .../runtime/matrix/data/LibMatrixMult.java | 22 +++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c359e6c2/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java index 01dd8c0..f81ea44 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java @@ -1862,7 +1862,7 @@ public class LibMatrixMult double val = avals[i]; if( val != 0 ) { int ix2 = aix[i]*n; - vectMultiplyAdd(val, avals, c, aix, i, ix2, alen); + vectMultiplyAdd(val, avals, c, aix, i, ix2, alen-i); } } } @@ -1925,7 +1925,7 @@ public class LibMatrixMult double val = avals[i]; if( val != 0 ) { int ix2 = aix[i]*m; - vectMultiplyAdd(val, avals, c, aix, i, ix2, alen); + vectMultiplyAdd(val, avals, c, aix, i, ix2, alen-i); } } } @@ -3277,16 +3277,26 @@ public class LibMatrixMult } } + /** + * + * @param aval + * @param b + * @param c + * @param bix + * @param bi + * @param ci + * @param len + */ private static void vectMultiplyAdd( final double aval, double[] b, double[] c, int[] bix, final int bi, final int ci, final int len ) { - final int bn = (len-bi)%8; + final int bn = len%8; //rest, not aligned to 8-blocks for( int j = bi; j < bi+bn; j++ ) c[ ci + bix[j] ] += aval * b[ j ]; //unrolled 8-block (for better instruction-level parallelism) - for( int j = bi+bn; j < len; j+=8 ) + for( int j = bi+bn; j < bi+len; j+=8 ) { //read 64B cacheline of b //read 64B of c via 'gather' @@ -3302,9 +3312,7 @@ public class LibMatrixMult c[ ci+bix[j+7] ] += aval * b[ j+7 ]; } } - - - + /** * * @param aval
