Repository: incubator-systemml Updated Branches: refs/heads/master 2a0e41029 -> 35f7d4140
[SYSTEMML-641] Fix select column-parallel matrix-matrix block multiply This patch fixes the codepath selection for column-wise parallelization over wide rhs matrices by taking the decision on row parallelization of rhs matrices into account. With this modification, we now also apply column-wise parallelization if basic row parallelization constraints are met but rejected due to memory overhead. On a scenario of [4x1K] x [1Kx1M], the observed improvement was 2-3x. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/2a241c9c Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/2a241c9c Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/2a241c9c Branch: refs/heads/master Commit: 2a241c9cf632bef34cd91f6126399f7cd7f49e58 Parents: 2a0e410 Author: Matthias Boehm <[email protected]> Authored: Sun Jun 5 20:49:56 2016 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sun Jun 5 23:05:52 2016 -0700 ---------------------------------------------------------------------- .../org/apache/sysml/runtime/matrix/data/LibMatrixMult.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2a241c9c/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java index 782987a..e54abec 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java @@ -178,7 +178,7 @@ public class LibMatrixMult //prepare row-upper for special cases of vector-matrix / matrix-matrix boolean pm2r = checkParMatrixMultRightInputRows(m1, m2, k); - boolean pm2c = checkParMatrixMultRightInputCols(m1, m2, k); + boolean pm2c = checkParMatrixMultRightInputCols(m1, m2, k, pm2r); int num = pm2r ? m2.rlen : pm2c ? m2.clen : m1.rlen; //core multi-threaded matrix mult computation @@ -3978,10 +3978,10 @@ public class LibMatrixMult * @param k * @return */ - private static boolean checkParMatrixMultRightInputCols( MatrixBlock m1, MatrixBlock m2, int k ) { + private static boolean checkParMatrixMultRightInputCols( MatrixBlock m1, MatrixBlock m2, int k, boolean pm2r ) { //parallelize over cols in rhs matrix if dense, number of cols in rhs is large, and lhs fits in l2 return (LOW_LEVEL_OPTIMIZATION && !m1.sparse && !m2.sparse - && m2.clen > k * 1024 && m1.rlen < k * 32 && m1.rlen > 16 && m1.clen > 1 + && m2.clen > k * 1024 && m1.rlen < k * 32 && !pm2r && 8*m1.rlen*m1.clen < 256*1024 ); //lhs fits in L2 cache }
