This is an automated email from the ASF dual-hosted git repository. jxie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push: new 00ddcf4 support for lapack functions with mkl (#8577) 00ddcf4 is described below commit 00ddcf44299a1f88c8d6448f30cf002c8942a2fa Author: moin <asmushet...@yahoo.de> AuthorDate: Sat Nov 11 00:37:17 2017 +0100 support for lapack functions with mkl (#8577) --- make/config.mk | 6 ++++ make/osx.mk | 6 ++++ src/operator/c_lapack_api.h | 81 ++++++++++++++++++++++++++++++++++++++++++--- src/operator/linalg_impl.h | 22 ++++++------ 4 files changed, 100 insertions(+), 15 deletions(-) diff --git a/make/config.mk b/make/config.mk index d47d4d6..a4774f0 100644 --- a/make/config.mk +++ b/make/config.mk @@ -105,6 +105,12 @@ USE_LAPACK = 1 # path to lapack library in case of a non-standard installation USE_LAPACK_PATH = +# by default, disable lapack when using MKL +# switch on when there is a full installation of MKL available (not just MKL2017/MKL_ML) +ifeq ($(USE_BLAS), mkl) +USE_LAPACK = 0 +endif + # add path to intel library, you may need it for MKL, if you did not add the path # to environment variable USE_INTEL_PATH = NONE diff --git a/make/osx.mk b/make/osx.mk index d9ce6f2..b17b04c 100644 --- a/make/osx.mk +++ b/make/osx.mk @@ -67,6 +67,12 @@ USE_BLAS = apple # only effective when compiled with blas versions openblas/apple/atlas/mkl USE_LAPACK = 1 +# by default, disable lapack when using MKL +# switch on when there is a full installation of MKL available (not just MKL2017/MKL_ML) +ifeq ($(USE_BLAS), mkl) +USE_LAPACK = 0 +endif + # add path to intel library, you may need it for MKL, if you did not add the path # to environment variable USE_INTEL_PATH = NONE diff --git a/src/operator/c_lapack_api.h b/src/operator/c_lapack_api.h index 53b0bf2..293c3f2 100644 --- a/src/operator/c_lapack_api.h +++ b/src/operator/c_lapack_api.h @@ -70,6 +70,9 @@ using namespace mshadow; +// Will cause clash with MKL fortran layer headers +#if MSHADOW_USE_MKL == 0 + extern "C" { // Fortran signatures @@ -118,15 +121,14 @@ extern "C" { MXNET_LAPACK_FSIG_SYEVD(dsyevd, double) } -#define MXNET_LAPACK_ROW_MAJOR 101 -#define MXNET_LAPACK_COL_MAJOR 102 +#endif // MSHADOW_USE_MKL == 0 + #define CHECK_LAPACK_UPLO(a) \ CHECK(a == 'U' || a == 'L') << "neither L nor U specified as triangle in lapack call"; inline char loup(char uplo, bool invert) { return invert ? (uplo == 'U' ? 'L' : 'U') : uplo; } - /*! * \brief Transpose matrix data in memory * @@ -160,7 +162,75 @@ inline void flip<cpu, double>(int m, int n, } -#if MXNET_USE_LAPACK +#if (MSHADOW_USE_MKL && MXNET_USE_LAPACK) + + // We interface with the C-interface of MKL + // as this is the preferred way. + #include <mkl_lapacke.h> + + #define MXNET_LAPACK_ROW_MAJOR LAPACK_ROW_MAJOR + #define MXNET_LAPACK_COL_MAJOR LAPACK_COL_MAJOR + + // These function have already matching signature. + #define MXNET_LAPACK_spotrf LAPACKE_spotrf + #define MXNET_LAPACK_dpotrf LAPACKE_dpotrf + #define MXNET_LAPACK_spotri LAPACKE_spotri + #define MXNET_LAPACK_dpotri LAPACKE_dpotri + #define mxnet_lapack_sposv LAPACKE_sposv + #define mxnet_lapack_dposv LAPACKE_dposv + + // The following functions differ in signature from the + // MXNET_LAPACK-signature and have to be wrapped. + #define MXNET_LAPACK_CWRAP_GELQF(prefix, dtype) \ + inline int MXNET_LAPACK_##prefix##gelqf(int matrix_layout, int m, int n, \ + dtype *a, int lda, dtype* tau, \ + dtype* work, int lwork) { \ + if (lwork != -1) { \ + return LAPACKE_##prefix##gelqf(matrix_layout, m, n, a, lda, tau); \ + } \ + *work = 0; \ + return 0; \ + } + MXNET_LAPACK_CWRAP_GELQF(s, float) + MXNET_LAPACK_CWRAP_GELQF(d, double) + + #define MXNET_LAPACK_CWRAP_ORGLQ(prefix, dtype) \ + inline int MXNET_LAPACK_##prefix##orglq(int matrix_layout, int m, int n, \ + dtype *a, int lda, dtype* tau, \ + dtype* work, int lwork) { \ + if (lwork != -1) { \ + return LAPACKE_##prefix##orglq(matrix_layout, m, n, m, a, lda, tau); \ + } \ + *work = 0; \ + return 0; \ + } + MXNET_LAPACK_CWRAP_ORGLQ(s, float) + MXNET_LAPACK_CWRAP_ORGLQ(d, double) + + // This has to be called internally in COL_MAJOR format even when matrix_layout + // is row-major as otherwise the eigenvectors would be returned as cols in a + // row-major matrix layout (see MKL documentation). + // We also have to allocate at least one DType element as workspace as the + // calling code assumes that the workspace has at least that size. + #define MXNET_LAPACK_CWRAP_SYEVD(prefix, dtype) \ + inline int MXNET_LAPACK_##prefix##syevd(int matrix_layout, char uplo, int n, dtype *a, \ + int lda, dtype *w, dtype *work, int lwork, \ + int *iwork, int liwork) { \ + if (lwork != -1) { \ + char o(loup(uplo, (matrix_layout == MXNET_LAPACK_ROW_MAJOR))); \ + return LAPACKE_##prefix##syevd(LAPACK_COL_MAJOR, 'V', o, n, a, lda, w); \ + } \ + *work = 1; \ + *iwork = 0; \ + return 0; \ + } + MXNET_LAPACK_CWRAP_SYEVD(s, float) + MXNET_LAPACK_CWRAP_SYEVD(d, double) + +#elif MXNET_USE_LAPACK + + #define MXNET_LAPACK_ROW_MAJOR 101 + #define MXNET_LAPACK_COL_MAJOR 102 // These functions can be called with either row- or col-major format. #define MXNET_LAPACK_CWRAPPER1(func, dtype) \ @@ -271,6 +341,9 @@ inline void flip<cpu, double>(int m, int n, " Ensure that lapack library is installed and build with USE_LAPACK=1 to get lapack" \ " functionalities.") + #define MXNET_LAPACK_ROW_MAJOR 101 + #define MXNET_LAPACK_COL_MAJOR 102 + // Define compilable stubs. #define MXNET_LAPACK_CWRAPPER1(func, dtype) \ inline int MXNET_LAPACK_##func(int matrix_layout, char uplo, int n, dtype* a, int lda) { \ diff --git a/src/operator/linalg_impl.h b/src/operator/linalg_impl.h index b1b35cf..b3e6573 100644 --- a/src/operator/linalg_impl.h +++ b/src/operator/linalg_impl.h @@ -56,7 +56,7 @@ inline void check_gemm(const Tensor<xpu, 2, DType>& A, const Tensor<xpu, 2, DTyp << "Non compatible matrix dimensions between inputs A and B for gemm"; } -#if MSHADOW_USE_CBLAS == 1 +#if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1) #define LINALG_CPU_GEMM(fname, DType) \ template<> inline \ @@ -98,7 +98,7 @@ void linalg_batch_gemm<cpu, DType>(const Tensor<cpu, 3, DType>& A, const Tensor< LOG(FATAL) << "linalg_batch_gemm not implemented by mxnet for cpu, needs cblas!"; \ } -#endif // MSHADOW_USE_CBLAS == 1 +#endif // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1 LINALG_CPU_GEMM(sgemm, float) LINALG_CPU_GEMM(dgemm, double) @@ -253,7 +253,7 @@ inline void check_trsm(const Tensor<xpu, 2, DType>& A, const Tensor<xpu, 2, DTyp << "Non compatible matrix dimensions between inputs A and B for trsm"; } -#if MSHADOW_USE_CBLAS == 1 +#if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1) #define LINALG_CPU_TRSM(fname, DType) \ template<> inline \ @@ -292,7 +292,7 @@ void linalg_batch_trsm<cpu, DType>(const Tensor<cpu, 3, DType>& A, const Tensor< LOG(FATAL) << "linalg_batch_trsm not implemented, needs cblas!"; \ } -#endif // MSHADOW_USE_CBLAS == 1 +#endif // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1 LINALG_CPU_TRSM(strsm, float) LINALG_CPU_TRSM(dtrsm, double) @@ -389,7 +389,7 @@ inline void linalg_gemm(const Tensor<xpu, 2, DType>& A, } } -#if MSHADOW_USE_CBLAS == 0 +#if (MSHADOW_USE_CBLAS == 0 && MSHADOW_USE_MKL == 0) // A template for a cpu linalg_gemm implementation using mshadow::dot() #define LINALG_CPU_GEMM_NO_CBLAS(DType) \ @@ -443,7 +443,7 @@ void linalg_gemm<cpu, DType>(const Tensor<cpu, 2, DType>& A, \ LINALG_CPU_GEMM_NO_CBLAS(float) LINALG_CPU_GEMM_NO_CBLAS(double) -#endif // (MSHADOW_USE_CBLAS == 0) +#endif // (MSHADOW_USE_CBLAS == 0 && MSHADOW_USE_MKL == 0) //////////////////////////////// TRMM //////////////////////////////////////////// @@ -463,7 +463,7 @@ inline void check_trmm(const Tensor<xpu, 2, DType>& A, const Tensor<xpu, 2, DTyp << "Non compatible matrix dimensions between inputs A and B for trmm"; } -#if MSHADOW_USE_CBLAS == 1 +#if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1) #define LINALG_CPU_TRMM(fname, DType) \ template<> inline \ @@ -485,7 +485,7 @@ void linalg_trmm<cpu, DType>(const Tensor<cpu, 2, DType>& A, const Tensor<cpu, 2 LOG(FATAL) << "linalg_trmm not implemented, needs cblas!"; \ } -#endif // MSHADOW_USE_CBLAS == 1 +#endif // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1 #define LINALG_XPU_BATCH_TRMM(xpu, DType) \ template<> inline \ @@ -735,7 +735,7 @@ void check_syrk(const Tensor<xpu, 2, DType>& A, const Tensor<xpu, 2, DType>& B, << "Non compatible matrix dimensions between inputs A and B for syrk"; } -#if MSHADOW_USE_CBLAS == 1 +#if (MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1) #define LINALG_CPU_SYRK(fname, DType) \ template<> inline \ @@ -758,7 +758,7 @@ void linalg_syrk<cpu, DType>(const Tensor<cpu, 2, DType>& A, \ LOG(FATAL) << "linalg_syrk not implemented by mxnet for cpu, needs cblas!"; \ } -#endif // MSHADOW_USE_CBLAS == 1 +#endif // MSHADOW_USE_CBLAS == 1 || MSHADOW_USE_MKL == 1 #define LINALG_XPU_BATCH_SYRK(xpu, DType) \ template<> inline \ @@ -811,7 +811,7 @@ void check_gelqf(const Tensor<xpu, 2, DType>& A, // Any checking that helps user debug potential problems. CHECK_LE(A.size(0), A.size(1)) << "A must have num(rows) <= num(columns)"; - CHECK_LT(A.size(0), work.size(0)) + CHECK_LE(A.size(0), work.size(0)) << "Size of work is too small"; } -- To stop receiving notification emails like this one, please contact ['"comm...@mxnet.apache.org" <comm...@mxnet.apache.org>'].