Repository: systemml
Updated Branches:
  refs/heads/master d1bf80ac5 -> 09b1533de


[SYSTEMML-2255] Improved heuristics for calls to native BLAS libraries

This patch fine-tunes the heuristics for deciding upon forwarding
classes of matrix multiplications to native BLAS libraries. So far we
excluded matrix-vector multiplications because these are usually
memory-bandwidth-bound. However, on modern processors with wide SIMD
registers and fused-multiply-add, this is counter productive in certain
cases. We now extended the heuristics by allowing native BLAS calls for
matrix-vector with the input sizes fit into typical L3 cache sizes.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/02e5ba56
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/02e5ba56
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/02e5ba56

Branch: refs/heads/master
Commit: 02e5ba5649b23470bb6fc1726604badde5f5da8e
Parents: d1bf80a
Author: Matthias Boehm <[email protected]>
Authored: Wed Apr 18 17:38:48 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Wed Apr 18 21:40:15 2018 -0700

----------------------------------------------------------------------
 .../java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java | 3 ++-
 .../org/apache/sysml/runtime/matrix/data/LibMatrixNative.java    | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/02e5ba56/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index 91adc93..d9e741a 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -59,7 +59,8 @@ public class LibMatrixMult
        private static final long MEM_OVERHEAD_THRESHOLD = 2L*1024*1024; //MAX 
2 MB
        private static final long PAR_MINFLOP_THRESHOLD1 = 2L*1024*1024; //MIN 
2 MFLOP
        private static final long PAR_MINFLOP_THRESHOLD2 = 128L*1024; //MIN 2 
MFLOP
-       private static final int L2_CACHESIZE = 256 *1024; //256KB (common size)
+       public static final int L2_CACHESIZE = 256 * 1024; //256KB (common size)
+       public static final int L3_CACHESIZE = 16 * 1024 * 1024; //16MB (common 
size)
        
        private LibMatrixMult() {
                //prevent instantiation via private constructor

http://git-wip-us.apache.org/repos/asf/systemml/blob/02e5ba56/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
index c0922da..1d46927 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
@@ -49,7 +49,9 @@ public class LibMatrixNative
        // We could encapsulate heuristics in this function
        // For now, we only consider matrix-vector operation to be memory bound
        private static boolean isMatMultMemoryBound(int m1Rlen, int m1Clen, int 
m2Clen) {
-               return m1Rlen == 1 || m1Clen == 1 || m2Clen == 1;
+               return (m1Rlen == 1 || m1Clen == 1 || m2Clen == 1)
+                       && (8L*m1Rlen*m1Clen > 16 * LibMatrixMult.L3_CACHESIZE 
+                               || 8L*m1Clen*m2Clen > 16 * 
LibMatrixMult.L3_CACHESIZE);
        }
 
        /**

Reply via email to