Repository: systemml
Updated Branches:
  refs/heads/master 2cf78819d -> 26d63806e


[SYSTEMML-2426] Fix integer overflow in tsmm over dense blocks >16GB.

This patch fixes a severe integer overflow in dense transpose-self
matrix multiplications (tsmm) over large dense blocks >16GB (scale-up).
Specifically, the start index computation for 4-way vector-multiply-add,
was done in terms of overall matrix indexes (which can overflow for
large dense blocks) instead of chunk-wise index offsets.
 

Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/26d63806
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/26d63806
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/26d63806

Branch: refs/heads/master
Commit: 26d63806e663c6e5c08143bc944bcd26fac9431c
Parents: 2cf7881
Author: Matthias Boehm <[email protected]>
Authored: Wed Jun 27 22:30:38 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Wed Jun 27 22:30:38 2018 -0700

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/LibMatrixMult.java | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/26d63806/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index c6189ab..1b7b8f4 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -1077,7 +1077,7 @@ public class LibMatrixMult
                                                        int bkpos = b.pos(bk, 
bj);
                                                        
                                                        //determine nnz of a 
(for sparsity-aware skipping of rows)
-                                                       int knnz = 
copyNonZeroElements(avals, aixi, bkpos, bj, n, ta, tbi, bklen);
+                                                       int knnz = 
copyNonZeroElements(avals, aixi, bkpos, n, ta, tbi, bklen);
                                                        
                                                        //rest not aligned to 
blocks of 4 rows
                                                        final int bn = knnz % 4;
@@ -1743,9 +1743,10 @@ public class LibMatrixMult
                                                                        if( 
a.isContiguous(bk, bkmin-1) ) {
                                                                                
double[] avals = a.values(bk);
                                                                                
int aixi = a.pos(bk, i);
+                                                                               
int bkpos = a.pos(bk, bj);
                                                                                
                                                                                
//determine nnz of a (for sparsity-aware skipping of rows)
-                                                                               
int knnz = copyNonZeroElements(avals, aixi, bk, bj, n, nx, ta, tbi, bklen);
+                                                                               
int knnz = copyNonZeroElements(avals, aixi, bkpos, n, nx, ta, tbi, bklen);
                                                                                
                                                                                
//rest not aligned to blocks of 4 rows
                                                                                
final int bn = knnz % 4;
@@ -3797,8 +3798,8 @@ public class LibMatrixMult
                return ret;
        }
 
-       private static int copyNonZeroElements( double[] a, final int aixi, 
final int bixk, final int bj, final int n, double[] tmpa, int[] tmpbi, final 
int bklen )
-       {
+       //cp non-zeros for dense-dense mm
+       private static int copyNonZeroElements( double[] a, final int aixi, 
final int bixk, final int n, double[] tmpa, int[] tmpbi, final int bklen ) {
                int knnz = 0;
                for( int k = 0; k < bklen; k++ )
                        if( a[ aixi+k ] != 0 ) {
@@ -3806,20 +3807,18 @@ public class LibMatrixMult
                                tmpbi[ knnz ] = bixk + k*n;
                                knnz ++;
                        }
-               
                return knnz;
        }
 
-       private static int copyNonZeroElements( double[] a, int aixi, final int 
bk, final int bj, final int n, final int nx, double[] tmpa, int[] tmpbi, final 
int bklen )
-       {
+       //cp non-zeros for dense tsmm
+       private static int copyNonZeroElements( double[] a, int aixi, int bixk, 
final int n, final int nx, double[] tmpa, int[] tmpbi, final int bklen ) {
                int knnz = 0;
-               for( int k = 0; k < bklen; k++, aixi+=n )
+               for( int k = 0; k < bklen; k++, aixi+=n, bixk+=nx )
                        if( a[ aixi ] != 0 ) {
                                tmpa[ knnz ] = a[ aixi ];
-                               tmpbi[ knnz ] = (bk+k) * nx + bj; //scan index 
on b
+                               tmpbi[ knnz ] = bixk;
                                knnz ++;
                        }
-               
                return knnz;
        }
        

Reply via email to