[SYSTEMML-382] Fix performance CSR sparse block matrix-vector multiply The new CSR sparse block showed on LinregCG w/ 5M x 100K, sp=0.2 inputs a performance regression compared to the plain MCSR sparse block (without serialization). Matrices in both formats did fit in aggregated memory and MCSR was consumed in deserialized form. Interestingly this regression did only show up on Open JDK but not IBM JDK. The issue boiled down to a JIT-related difference: By doing MCSR to CSR conversion on checkpoints, two subclasses of SparseBlock are loaded into the JVM at the same time, making it harder to inline virtual function calls during runtime. By refactoring the code in libmatrixmult (for important matrix-vector and vector-matrix), we were able to overcome this effect.
This patch also does some code cleanup wrt partially allocated sparse block (which can never happen in the current code base). Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/281a32e4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/281a32e4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/281a32e4 Branch: refs/heads/master Commit: 281a32e46165cd13776742b06122a8d2508e9d24 Parents: e50a1cd Author: Matthias Boehm <[email protected]> Authored: Sat Mar 5 02:42:43 2016 -0800 Committer: Matthias Boehm <[email protected]> Committed: Mon Mar 7 12:22:51 2016 -0800 ---------------------------------------------------------------------- .../runtime/matrix/data/LibMatrixMult.java | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/281a32e4/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java index 682f272..14024b3 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java @@ -1255,12 +1255,9 @@ public class LibMatrixMult } else if( n==1 ) //MATRIX-VECTOR { - for( int i=rl; i<Math.min(ru, a.numRows()); i++ ) - { - if( !a.isEmpty(i) ) { + for( int i=rl; i<ru; i++ ) + if( !a.isEmpty(i) ) c[i] = dotProduct(a.values(i), b, a.indexes(i), a.pos(i), 0, a.size(i)); - } - } } else if( pm2 && m==1 ) //VECTOR-MATRIX { @@ -1283,7 +1280,8 @@ public class LibMatrixMult } else if( pm2 && m<=16 ) //MATRIX-MATRIX (short lhs) { - for( int i=0, cix=0; i<a.numRows(); i++, cix+=n ) + int arlen = a.numRows(); + for( int i=0, cix=0; i<arlen; i++, cix+=n ) if( !a.isEmpty(i) ) { int apos = a.pos(i); @@ -1313,7 +1311,7 @@ public class LibMatrixMult } else //MATRIX-MATRIX { - for( int i=rl, cix=rl*n; i<Math.min(ru, a.numRows()); i++, cix+=n ) + for( int i=rl, cix=rl*n; i<ru; i++, cix+=n ) { if( !a.isEmpty(i) ) { @@ -1350,7 +1348,7 @@ public class LibMatrixMult else { SparseBlock a = m1.sparseBlock; - for( int i=rl, cix=rl*n; i<Math.min(ru, a.numRows()); i++, cix+=n ) + for( int i=rl, cix=rl*n; i<ru; i++, cix+=n ) { if( !a.isEmpty(i) ) { @@ -1413,7 +1411,7 @@ public class LibMatrixMult } else //MATRIX-MATRIX { - for( int i=rl, cix=rl*n; i<Math.min(ru, a.numRows()); i++, cix+=n ) + for( int i=rl, cix=rl*n; i<ru; i++, cix+=n ) { if( !a.isEmpty(i) ) { @@ -1847,7 +1845,8 @@ public class LibMatrixMult //algorithm: scan rows, foreach row self join (KIJ) if( LOW_LEVEL_OPTIMIZATION ) { - for( int r=0; r<a.numRows(); r++ ) + int arlen = a.numRows(); + for( int r=0; r<arlen; r++ ) if( !a.isEmpty(r) ) { int apos = a.pos(r); @@ -1910,7 +1909,8 @@ public class LibMatrixMult //algorithm: scan rows, foreach row self join (KIJ) if( LOW_LEVEL_OPTIMIZATION ) { - for( int r=0; r<a.numRows(); r++ ) + int arlen = a.numRows(); + for( int r=0; r<arlen; r++ ) if( !a.isEmpty(r) ) { int apos = a.pos(r);
