Repository: systemml Updated Branches: refs/heads/master 2cf78819d -> 26d63806e
[SYSTEMML-2426] Fix integer overflow in tsmm over dense blocks >16GB. This patch fixes a severe integer overflow in dense transpose-self matrix multiplications (tsmm) over large dense blocks >16GB (scale-up). Specifically, the start index computation for 4-way vector-multiply-add, was done in terms of overall matrix indexes (which can overflow for large dense blocks) instead of chunk-wise index offsets. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/26d63806 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/26d63806 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/26d63806 Branch: refs/heads/master Commit: 26d63806e663c6e5c08143bc944bcd26fac9431c Parents: 2cf7881 Author: Matthias Boehm <[email protected]> Authored: Wed Jun 27 22:30:38 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Wed Jun 27 22:30:38 2018 -0700 ---------------------------------------------------------------------- .../sysml/runtime/matrix/data/LibMatrixMult.java | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/26d63806/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java index c6189ab..1b7b8f4 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java @@ -1077,7 +1077,7 @@ public class LibMatrixMult int bkpos = b.pos(bk, bj); //determine nnz of a (for sparsity-aware skipping of rows) - int knnz = copyNonZeroElements(avals, aixi, bkpos, bj, n, ta, tbi, bklen); + int knnz = copyNonZeroElements(avals, aixi, bkpos, n, ta, tbi, bklen); //rest not aligned to blocks of 4 rows final int bn = knnz % 4; @@ -1743,9 +1743,10 @@ public class LibMatrixMult if( a.isContiguous(bk, bkmin-1) ) { double[] avals = a.values(bk); int aixi = a.pos(bk, i); + int bkpos = a.pos(bk, bj); //determine nnz of a (for sparsity-aware skipping of rows) - int knnz = copyNonZeroElements(avals, aixi, bk, bj, n, nx, ta, tbi, bklen); + int knnz = copyNonZeroElements(avals, aixi, bkpos, n, nx, ta, tbi, bklen); //rest not aligned to blocks of 4 rows final int bn = knnz % 4; @@ -3797,8 +3798,8 @@ public class LibMatrixMult return ret; } - private static int copyNonZeroElements( double[] a, final int aixi, final int bixk, final int bj, final int n, double[] tmpa, int[] tmpbi, final int bklen ) - { + //cp non-zeros for dense-dense mm + private static int copyNonZeroElements( double[] a, final int aixi, final int bixk, final int n, double[] tmpa, int[] tmpbi, final int bklen ) { int knnz = 0; for( int k = 0; k < bklen; k++ ) if( a[ aixi+k ] != 0 ) { @@ -3806,20 +3807,18 @@ public class LibMatrixMult tmpbi[ knnz ] = bixk + k*n; knnz ++; } - return knnz; } - private static int copyNonZeroElements( double[] a, int aixi, final int bk, final int bj, final int n, final int nx, double[] tmpa, int[] tmpbi, final int bklen ) - { + //cp non-zeros for dense tsmm + private static int copyNonZeroElements( double[] a, int aixi, int bixk, final int n, final int nx, double[] tmpa, int[] tmpbi, final int bklen ) { int knnz = 0; - for( int k = 0; k < bklen; k++, aixi+=n ) + for( int k = 0; k < bklen; k++, aixi+=n, bixk+=nx ) if( a[ aixi ] != 0 ) { tmpa[ knnz ] = a[ aixi ]; - tmpbi[ knnz ] = (bk+k) * nx + bj; //scan index on b + tmpbi[ knnz ] = bixk; knnz ++; } - return knnz; }
