[SYSTEMML-2260] New native tsmm matrix mult and its integration

This patch fixes the existing native tsmm implementation to properly use
a BLAS dsyrk instead of BLAS dgemm to performance the computation only
for the upper triangular output matrix. Subsequently, we use the
existing cache-conscious primitive to flip the upper to the lower
triangle. 

Furthermore, this patch also integrates the native tsmm implementation,
which was not used at all so far.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/2b8161db
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/2b8161db
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/2b8161db

Branch: refs/heads/master
Commit: 2b8161db97b55b612e70280d45fcec53421dc813
Parents: fde708f
Author: Matthias Boehm <[email protected]>
Authored: Thu Apr 19 22:22:07 2018 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Thu Apr 19 22:22:07 2018 -0700

----------------------------------------------------------------------
 .../cpp/lib/libsystemml_mkl-Linux-x86_64.so     | Bin 32168 -> 32208 bytes
 .../lib/libsystemml_openblas-Linux-x86_64.so    | Bin 31240 -> 31288 bytes
 src/main/cpp/libmatrixmult.cpp                  |  10 +++----
 src/main/cpp/libmatrixmult.h                    |   2 +-
 src/main/cpp/systemml.cpp                       |   4 +--
 .../runtime/matrix/data/LibMatrixNative.java    |  27 +++++++++++++++++++
 .../sysml/runtime/matrix/data/MatrixBlock.java  |   4 ++-
 .../org/apache/sysml/utils/NativeHelper.java    |   2 +-
 8 files changed, 38 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/2b8161db/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so
----------------------------------------------------------------------
diff --git a/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so 
b/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so
index faaf5f4..adc3bbe 100755
Binary files a/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so and 
b/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so differ

http://git-wip-us.apache.org/repos/asf/systemml/blob/2b8161db/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so
----------------------------------------------------------------------
diff --git a/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so 
b/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so
index 16b0b5d..0b39eaa 100755
Binary files a/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so and 
b/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so differ

http://git-wip-us.apache.org/repos/asf/systemml/blob/2b8161db/src/main/cpp/libmatrixmult.cpp
----------------------------------------------------------------------
diff --git a/src/main/cpp/libmatrixmult.cpp b/src/main/cpp/libmatrixmult.cpp
index 3c669b6..773a85a 100644
--- a/src/main/cpp/libmatrixmult.cpp
+++ b/src/main/cpp/libmatrixmult.cpp
@@ -51,11 +51,9 @@ void smatmult(float* m1Ptr, float* m2Ptr, float* retPtr, int 
m, int k, int n, in
   cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1, m1Ptr, k, 
m2Ptr, n, 0, retPtr, n);
 }
 
-void tsmm(double* m1Ptr, double* retPtr, int m1rlen, int m1clen, bool 
isLeftTranspose, int numThreads) {
-  int m = isLeftTranspose ? m1clen : m1rlen;
-  int n = isLeftTranspose ? m1clen : m1rlen;
-  int k = isLeftTranspose ? m1rlen : m1clen;
-  
+void tsmm(double* m1Ptr, double* retPtr, int m1rlen, int m1clen, bool 
isLeftTrans, int numThreads) {
+  int n = isLeftTrans ? m1clen : m1rlen;
+  int k = isLeftTrans ? m1rlen : m1clen;
   setNumThreadsForBLAS(numThreads);
-  cblas_dgemm(CblasRowMajor, isLeftTranspose ? CblasTrans : CblasNoTrans, 
isLeftTranspose ? CblasNoTrans : CblasTrans, m, n, k, 1, m1Ptr, k, m1Ptr, n, 0, 
retPtr, n);
+  cblas_dsyrk(CblasRowMajor, CblasUpper, isLeftTrans ? CblasTrans : 
CblasNoTrans, n, k, 1, m1Ptr, n, 0, retPtr, n);
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/2b8161db/src/main/cpp/libmatrixmult.h
----------------------------------------------------------------------
diff --git a/src/main/cpp/libmatrixmult.h b/src/main/cpp/libmatrixmult.h
index b6ea1c4..1c7fcd9 100644
--- a/src/main/cpp/libmatrixmult.h
+++ b/src/main/cpp/libmatrixmult.h
@@ -56,6 +56,6 @@ void setNumThreadsForBLAS(int numThreads);
 void dmatmult(double* m1Ptr, double* m2Ptr, double* retPtr, int m, int k, int 
n, int numThreads);
 void smatmult(float* m1Ptr, float* m2Ptr, float* retPtr, int m, int k, int n, 
int numThreads);
 
-void tsmm(double* m1Ptr, double* retPtr, int m1rlen, int m1clen, bool 
isLeftTranspose,  int numThreads);
+void tsmm(double* m1Ptr, double* retPtr, int m1rlen, int m1clen, bool 
isLeftTrans, int numThreads);
 
 #endif

http://git-wip-us.apache.org/repos/asf/systemml/blob/2b8161db/src/main/cpp/systemml.cpp
----------------------------------------------------------------------
diff --git a/src/main/cpp/systemml.cpp b/src/main/cpp/systemml.cpp
index b404cc9..fae0c1e 100644
--- a/src/main/cpp/systemml.cpp
+++ b/src/main/cpp/systemml.cpp
@@ -109,13 +109,13 @@ JNIEXPORT jboolean JNICALL 
Java_org_apache_sysml_utils_NativeHelper_smmdd(
 }
 
 JNIEXPORT jboolean JNICALL Java_org_apache_sysml_utils_NativeHelper_tsmm
-  (JNIEnv * env, jclass cls, jdoubleArray m1, jdoubleArray ret, jint m1rlen, 
jint m1clen, jboolean isLeftTranspose, jint numThreads) {
+  (JNIEnv * env, jclass cls, jdoubleArray m1, jdoubleArray ret, jint m1rlen, 
jint m1clen, jboolean leftTrans, jint numThreads) {
   double* m1Ptr = GET_DOUBLE_ARRAY(env, m1, numThreads);
   double* retPtr = GET_DOUBLE_ARRAY(env, ret, numThreads);
   if(m1Ptr == NULL || retPtr == NULL)
        return (jboolean) false;
 
-  tsmm(m1Ptr, retPtr, (int) m1rlen, (int) m1clen, (bool) isLeftTranspose, 
(int) numThreads);
+  tsmm(m1Ptr, retPtr, (int)m1rlen, (int)m1clen, (bool)leftTrans, 
(int)numThreads);
   
   RELEASE_INPUT_ARRAY(env, m1, m1Ptr, numThreads);
   RELEASE_ARRAY(env, ret, retPtr, numThreads);

http://git-wip-us.apache.org/repos/asf/systemml/blob/2b8161db/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
index eade43f..cf4501f 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java
@@ -118,6 +118,33 @@ public class LibMatrixNative
                        LibMatrixMult.matrixMult(m1, m2, ret, k);
        }
        
+       public static void tsmm(MatrixBlock m1, MatrixBlock ret, boolean 
leftTrans, int k) {
+               if( m1.isEmptyBlock(false) )
+                       return;
+               if( NativeHelper.isNativeLibraryLoaded() && ret.clen > 1 
+                       && (!m1.sparse && m1.getDenseBlock().isContiguous() ) ) 
{
+                       ret.sparse = false;
+                       ret.allocateDenseBlock();
+                       if( NativeHelper.tsmm(m1.getDenseBlockValues(), 
+                               ret.getDenseBlockValues(), m1.rlen, m1.clen, 
leftTrans, k) ) 
+                       {
+                               long nnz = (ret.clen==1) ? 
ret.recomputeNonZeros() :
+                                       
LibMatrixMult.copyUpperToLowerTriangle(ret);
+                               ret.setNonZeros(nnz);
+                               ret.examSparsity();
+                               return;
+                       }
+                       else {
+                               Statistics.incrementNativeFailuresCounter();
+                               //fallback to default java implementation
+                       }
+               }
+               if( k > 1 )
+                       LibMatrixMult.matrixMultTransposeSelf(m1, ret, 
leftTrans, k);
+               else
+                       LibMatrixMult.matrixMultTransposeSelf(m1, ret, 
leftTrans);
+       }
+       
        /**
         * This method performs convolution (i.e. cross-correlation) operation 
on input
         * 

http://git-wip-us.apache.org/repos/asf/systemml/blob/2b8161db/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index f306a7a..97f883b 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -3434,7 +3434,9 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                        out.reset(dim, dim, false);
                
                //compute matrix mult
-               if( k > 1 )
+               if( NativeHelper.isNativeLibraryLoaded() )
+                       LibMatrixNative.tsmm(this, out, leftTranspose, k);
+               else if( k > 1 )
                        LibMatrixMult.matrixMultTransposeSelf(this, out, 
leftTranspose, k);
                else
                        LibMatrixMult.matrixMultTransposeSelf(this, out, 
leftTranspose);

http://git-wip-us.apache.org/repos/asf/systemml/blob/2b8161db/src/main/java/org/apache/sysml/utils/NativeHelper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/NativeHelper.java 
b/src/main/java/org/apache/sysml/utils/NativeHelper.java
index 86d849f..1a02e00 100644
--- a/src/main/java/org/apache/sysml/utils/NativeHelper.java
+++ b/src/main/java/org/apache/sysml/utils/NativeHelper.java
@@ -322,7 +322,7 @@ public class NativeHelper {
        //single-precision matrix multiply dense-dense
        public static native boolean smmdd(FloatBuffer m1, FloatBuffer m2, 
FloatBuffer ret, int m1rlen, int m1clen, int m2clen, int numThreads);
        //transpose-self matrix multiply
-       private static native boolean tsmm(double [] m1, double [] ret, int 
m1rlen, int m1clen, boolean isLeftTranspose, int numThreads);
+       public static native boolean tsmm(double[] m1, double[] ret, int 
m1rlen, int m1clen, boolean leftTrans, int numThreads);
 
        // 
----------------------------------------------------------------------------------------------------------------
        // LibMatrixDNN operations:

Reply via email to