Repository: systemml Updated Branches: refs/heads/master d1bf80ac5 -> 09b1533de
[SYSTEMML-2255] Improved heuristics for calls to native BLAS libraries This patch fine-tunes the heuristics for deciding upon forwarding classes of matrix multiplications to native BLAS libraries. So far we excluded matrix-vector multiplications because these are usually memory-bandwidth-bound. However, on modern processors with wide SIMD registers and fused-multiply-add, this is counter productive in certain cases. We now extended the heuristics by allowing native BLAS calls for matrix-vector with the input sizes fit into typical L3 cache sizes. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/02e5ba56 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/02e5ba56 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/02e5ba56 Branch: refs/heads/master Commit: 02e5ba5649b23470bb6fc1726604badde5f5da8e Parents: d1bf80a Author: Matthias Boehm <[email protected]> Authored: Wed Apr 18 17:38:48 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Wed Apr 18 21:40:15 2018 -0700 ---------------------------------------------------------------------- .../java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java | 3 ++- .../org/apache/sysml/runtime/matrix/data/LibMatrixNative.java | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/02e5ba56/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java index 91adc93..d9e741a 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java @@ -59,7 +59,8 @@ public class LibMatrixMult private static final long MEM_OVERHEAD_THRESHOLD = 2L*1024*1024; //MAX 2 MB private static final long PAR_MINFLOP_THRESHOLD1 = 2L*1024*1024; //MIN 2 MFLOP private static final long PAR_MINFLOP_THRESHOLD2 = 128L*1024; //MIN 2 MFLOP - private static final int L2_CACHESIZE = 256 *1024; //256KB (common size) + public static final int L2_CACHESIZE = 256 * 1024; //256KB (common size) + public static final int L3_CACHESIZE = 16 * 1024 * 1024; //16MB (common size) private LibMatrixMult() { //prevent instantiation via private constructor http://git-wip-us.apache.org/repos/asf/systemml/blob/02e5ba56/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java index c0922da..1d46927 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java @@ -49,7 +49,9 @@ public class LibMatrixNative // We could encapsulate heuristics in this function // For now, we only consider matrix-vector operation to be memory bound private static boolean isMatMultMemoryBound(int m1Rlen, int m1Clen, int m2Clen) { - return m1Rlen == 1 || m1Clen == 1 || m2Clen == 1; + return (m1Rlen == 1 || m1Clen == 1 || m2Clen == 1) + && (8L*m1Rlen*m1Clen > 16 * LibMatrixMult.L3_CACHESIZE + || 8L*m1Clen*m2Clen > 16 * LibMatrixMult.L3_CACHESIZE); } /**
