This is an automated email from the ASF dual-hosted git repository.
haibin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new c437d5b use mkl sparse matrix to improve performance (#14492)
c437d5b is described below
commit c437d5b789c9d9892522a9c32ef6e6f20785a3a2
Author: triplekings <[email protected]>
AuthorDate: Sat Apr 13 12:16:53 2019 +0800
use mkl sparse matrix to improve performance (#14492)
* use mkl sparse matrix to improve performance
* fix build fail issue
* add 3rdparty/sparse matrix in Makefile
* add macro for variable
* fix lib not find error
* fix gpu R test error
* fix Mac build error
* add lib/libsparse_matrix.so to CI
* fix indentation
* retrigger CI
---
3rdparty/sparse-matrix/Makefile | 21 +++++++++++++++
3rdparty/sparse-matrix/sparse_matrix.cc | 45 +++++++++++++++++++++++++++++++
3rdparty/sparse-matrix/sparse_matrix.h | 48 +++++++++++++++++++++++++++++++++
Makefile | 34 +++++++++++++++++++++++
ci/jenkins/Jenkins_steps.groovy | 2 +-
src/operator/tensor/dot-inl.h | 28 +++++++++++++++++--
6 files changed, 175 insertions(+), 3 deletions(-)
diff --git a/3rdparty/sparse-matrix/Makefile b/3rdparty/sparse-matrix/Makefile
new file mode 100644
index 0000000..214312f
--- /dev/null
+++ b/3rdparty/sparse-matrix/Makefile
@@ -0,0 +1,21 @@
+CC = g++
+C = gcc
+MKLROOT = /opt/intel/mkl
+
+ifneq ($(USE_INTEL_PATH),)
+ MKLROOT = $(USE_INTEL_PATH)/mkl
+endif
+
+CFLAGS = -fpic -O2 -I/opt/intel/mkl/include -c -Wall -Werror -DMKL_ILP64
-m64 -std=c++11
+LDFLAGS = -Wl,--start-group -L${MKLROOT}/../compiler/lib/intel64
${MKLROOT}/lib/intel64/libmkl_intel_ilp64.a
${MKLROOT}/lib/intel64/libmkl_intel_thread.a
${MKLROOT}/lib/intel64/libmkl_core.a -Wl,--end-group -liomp5 -lpthread -lm -ldl
+
+default: libsparse_matrix.so
+
+libsparse_matrix.so: sparse_matrix.o
+ $(CC) -shared -o libsparse_matrix.so sparse_matrix.o $(LDFLAGS)
+
+sparse_matrix.o: sparse_matrix.cc sparse_matrix.h
+ $(CC) $(CFLAGS) sparse_matrix.cc
+
+clean:
+ $(RM) libsparse_matrix.so *.o *~
diff --git a/3rdparty/sparse-matrix/sparse_matrix.cc
b/3rdparty/sparse-matrix/sparse_matrix.cc
new file mode 100644
index 0000000..fa362f0
--- /dev/null
+++ b/3rdparty/sparse-matrix/sparse_matrix.cc
@@ -0,0 +1,45 @@
+#include <iostream>
+#include <string>
+#include <fstream>
+#include <mkl_spblas.h>
+#include "sparse_matrix.h"
+
+
+
+bool mkl_DotCsrDnsDns(SP_INT64* rows_start, SP_INT64* col_indx,
+ float* values, float* X, float* y,
+ int rows, int cols, int X_columns)
+{
+
+ sparse_index_base_t indexing = SPARSE_INDEX_BASE_ZERO;
+ sparse_status_t status;
+ sparse_matrix_t A = NULL;
+ sparse_layout_t layout = SPARSE_LAYOUT_ROW_MAJOR;
+ float one, zero;
+ one = (float)1.0;
+ zero = (float)0.0;
+
+ MKL_INT* rows_end = rows_start + 1;
+ status = mkl_sparse_s_create_csr(&A, indexing, rows, cols, rows_start,
rows_end, col_indx, values);
+
+ if (status != SPARSE_STATUS_SUCCESS)
+ {
+ std::cout << "mkl_sparse_s_create_csr status :" << status << std::endl;
+ return false;
+ }
+ sparse_operation_t operation = SPARSE_OPERATION_NON_TRANSPOSE;
+ struct matrix_descr descrA;
+ descrA.type = SPARSE_MATRIX_TYPE_GENERAL;
+
+ status = mkl_sparse_s_mm(operation, one, A, descrA, layout, X,
X_columns, X_columns, zero, y, X_columns);
+ if (status != SPARSE_STATUS_SUCCESS)
+ {
+ std::cout << "mkl_sparse_s_create_csr status :" << status << std::endl;
+ return false;
+ }
+
+ mkl_sparse_destroy(A);
+
+ return true;
+
+}
diff --git a/3rdparty/sparse-matrix/sparse_matrix.h
b/3rdparty/sparse-matrix/sparse_matrix.h
new file mode 100644
index 0000000..93054a8
--- /dev/null
+++ b/3rdparty/sparse-matrix/sparse_matrix.h
@@ -0,0 +1,48 @@
+#ifndef MXNET_OPERATOR_SPARSE_MATRIX_INL_H_
+#define MXNET_OPERATOR_SPARSE_MATRIX_INL_H_
+
+
+#if (!defined(__INTEL_COMPILER)) & defined(_MSC_VER)
+#define SP_INT64 __int64
+#define SP_UINT64 unsigned __int64
+#else
+#define SP_INT64 long long int
+#define SP_UINT64 unsigned long long int
+#endif
+
+
+#if defined _WIN32 || defined __CYGWIN__
+ #ifdef BUILDING_DLL
+ #ifdef __GNUC__
+ #define SPM_API_PUBLIC __attribute__ ((dllexport))
+ #else
+ #define SPM_API_PUBLIC __declspec(dllexport) // Note: actually gcc seems
to also supports this syntax.
+ #endif
+ #else
+ #ifdef __GNUC__
+ #define SPM_API_PUBLIC __attribute__ ((dllimport))
+ #else
+ #define SPM_API_PUBLIC __declspec(dllimport) // Note: actually gcc seems
to also supports this syntax.
+ #endif
+ #endif
+ #define SPM_API_LOCAL
+#else
+ #if __GNUC__ >= 4
+ #define SPM_API_PUBLIC __attribute__ ((visibility ("default")))
+ #define SPM_API_LOCAL __attribute__ ((visibility ("hidden")))
+ #else
+ #define SPM_API_PUBLIC
+ #define SPM_API_LOCAL
+ #endif
+#endif
+
+
+
+extern "C"
+{
+ extern SPM_API_PUBLIC bool mkl_DotCsrDnsDns(SP_INT64* rows_start,
SP_INT64* col_indx,
+ float* values, float* X, float* y, int rows, int cols, int
X_columns);
+
+}
+
+#endif //MXNET_OPERATOR_SPARSE_MATRIX_INL_H_
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 43d212e..53998ac 100644
--- a/Makefile
+++ b/Makefile
@@ -144,6 +144,7 @@ ifeq ($(USE_MKLDNN), 1)
LDFLAGS += -L$(MKLDNNROOT)/lib -lmkldnn -Wl,-rpath,'$${ORIGIN}'
endif
+
# setup opencv
ifeq ($(USE_OPENCV), 1)
CFLAGS += -DMXNET_USE_OPENCV=1
@@ -410,6 +411,14 @@ ifeq ($(USE_DIST_KVSTORE), 1)
LDFLAGS += $(PS_LDFLAGS_A)
endif
+#sparse-matrix
+ifeq ($(USE_BLAS), mkl)
+ SPARSE_MATRIX_DIR = $(ROOTDIR)/3rdparty/sparse-matrix
+ LIB_DEP += $(SPARSE_MATRIX_DIR)/libsparse_matrix.so
+ CFLAGS += -I$(SPARSE_MATRIX_DIR)
+ LDFLAGS += -L$(SPARSE_MATRIX_DIR) -lsparse_matrix
-Wl,-rpath,'$${ORIGIN}'
+endif
+
.PHONY: clean all extra-packages test lint docs clean_all rcpplint rcppexport
roxygen\
cython2 cython3 cython cyclean
@@ -547,11 +556,30 @@ ifeq ($(UNAME_S), Darwin)
endif
endif
+ifeq ($(USE_BLAS), mkl)
+ifeq ($(UNAME_S), Darwin)
+ install_name_tool -change '@rpath/libsparse_matrix.dylib'
'@loader_path/libsparse_matrix.dylib' $@
+endif
+endif
+
$(PS_PATH)/build/libps.a: PSLITE
PSLITE:
$(MAKE) CXX="$(CXX)" DEPS_PATH="$(DEPS_PATH)" -C $(PS_PATH) ps
+ifeq ($(USE_BLAS), mkl)
+$(SPARSE_MATRIX_DIR)/libsparse_matrix.so: SPARSE_MATRIX
+
+SPARSE_MATRIX:
+ifeq ($(USE_INTEL_PATH), NONE)
+ $(MAKE) -C $(SPARSE_MATRIX_DIR)
+else
+ $(MAKE) -C $(SPARSE_MATRIX_DIR) USE_INTEL_PATH=$(USE_INTEL_PATH)
+endif
+ mkdir -p $(ROOTDIR)/lib
+ cp $(SPARSE_MATRIX_DIR)/libsparse_matrix.so $(ROOTDIR)/lib/
+endif
+
$(DMLC_CORE)/libdmlc.a: DMLCCORE
DMLCCORE:
@@ -628,6 +656,10 @@ rpkg:
cp -rf lib/libmklml_intel.so R-package/inst/libs; \
fi
+ if [ -e "lib/libsparse_matrix.so" ]; then \
+ cp -rf lib/libsparse_matrix.so R-package/inst/libs; \
+ fi
+
mkdir -p R-package/inst/include
cp -rl include/* R-package/inst/include
Rscript -e "if(!require(devtools)){install.packages('devtools', repo =
'https://cloud.r-project.org/')}"
@@ -673,6 +705,7 @@ clean: rclean cyclean $(EXTRA_PACKAGES_CLEAN)
(cd scala-package && mvn clean) || true
cd $(DMLC_CORE); $(MAKE) clean; cd -
cd $(PS_PATH); $(MAKE) clean; cd -
+ cd $(SPARSE_MATRIX_DIR); $(MAKE) clean; cd -
cd $(NNVM_PATH); $(MAKE) clean; cd -
cd $(AMALGAMATION_PATH); $(MAKE) clean; cd -
$(RM) -r $(patsubst %, %/*.d, $(EXTRA_OPERATORS)) $(patsubst %,
%/*/*.d, $(EXTRA_OPERATORS))
@@ -683,6 +716,7 @@ clean: rclean mkldnn_clean cyclean testclean
$(EXTRA_PACKAGES_CLEAN)
(cd scala-package && mvn clean) || true
cd $(DMLC_CORE); $(MAKE) clean; cd -
cd $(PS_PATH); $(MAKE) clean; cd -
+ cd $(SPARSE_MATRIX_DIR); $(MAKE) clean; cd -
cd $(NNVM_PATH); $(MAKE) clean; cd -
cd $(AMALGAMATION_PATH); $(MAKE) clean; cd -
endif
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index e34b25d..5b9ad47 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -33,7 +33,7 @@ mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a,
build/3rdparty/dmlc-core/li
// mxnet cmake libraries, in cmake builds we do not produce a libnvvm static
library by default.
mx_cmake_lib_debug = 'build/libmxnet.so, build/libmxnet.a,
build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests'
mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a,
build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests,
build/3rdparty/openmp/runtime/src/libomp.so,
build/3rdparty/mkldnn/src/libmkldnn.so.0'
-mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so,
lib/libmkldnn.so.0, lib/libmklml_intel.so, 3rdparty/dmlc-core/libdmlc.a,
3rdparty/tvm/nnvm/lib/libnnvm.a'
+mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so,
lib/libmkldnn.so.0, lib/libmklml_intel.so, lib/libsparse_matrix.so,
3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a'
mx_tensorrt_lib = 'build/libmxnet.so, lib/libnvonnxparser_runtime.so.0,
lib/libnvonnxparser.so.0, lib/libonnx_proto.so, lib/libonnx.so'
mx_lib_cpp_examples = 'lib/libmxnet.so, lib/libmxnet.a,
3rdparty/dmlc-core/libdmlc.a, 3rdparty/tvm/nnvm/lib/libnnvm.a,
3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a,
build/cpp-package/example/*'
mx_lib_cpp_examples_cpu = 'build/libmxnet.so, build/cpp-package/example/*'
diff --git a/src/operator/tensor/dot-inl.h b/src/operator/tensor/dot-inl.h
index 163b442..8a1eda0 100644
--- a/src/operator/tensor/dot-inl.h
+++ b/src/operator/tensor/dot-inl.h
@@ -38,7 +38,9 @@
#ifdef __CUDACC__
#include "./dot-inl.cuh"
#endif // __CUDACC__
-
+#if (MSHADOW_USE_MKL == 1)
+#include "sparse_matrix.h"
+#endif
namespace mxnet {
namespace op {
@@ -775,13 +777,35 @@ inline void DotCsrDnsDnsImpl(const OpContext& ctx,
}
using nnvm::dim_t;
-
+#if (MSHADOW_USE_MKL == 1)
+ TShape lhs_shape = lhs.shape();
+ TShape rhs_shape = rhs.shape_;
+#endif
const TBlob data_l = lhs.data();
const TBlob indptr_l = lhs.aux_data(csr::kIndPtr);
const TBlob col_idx_l = lhs.aux_data(csr::kIdx);
const TBlob& data_r = rhs;
const TBlob data_out = *ret;
+#if (MSHADOW_USE_MKL == 1)
+ if (data_l.type_flag_ == mshadow::kFloat32
+ && indptr_l.type_flag_ == mshadow::kInt64
+ && col_idx_l.type_flag_ == mshadow::kInt64
+ && !trans_lhs) {
+ bool ret = mkl_DotCsrDnsDns(static_cast<SP_INT64*>(indptr_l.dptr_),
+ static_cast<SP_INT64*>(col_idx_l.dptr_),
+ data_l.dptr<float>(),
+ data_r.dptr<float>(),
+ data_out.dptr<float>(),
+ lhs_shape[0],
+ lhs_shape[1],
+ rhs_shape[1]);
+ if (ret) {
+ return;
+ }
+ }
+#endif
+
MSHADOW_SGL_DBL_TYPE_SWITCH(data_l.type_flag_, DType, { // data type
MSHADOW_IDX_TYPE_SWITCH(indptr_l.type_flag_, IType, { // indptr type
MSHADOW_IDX_TYPE_SWITCH(col_idx_l.type_flag_, CType, { // col idx type