SINGA-181 Add NVCC supporting for .cu files Use nvcc to compile math_kernel.cu. The output file is cuda_compile_generated_math_kernel.cu.o, which is linked to libsinga_core.so later. Also fix some bugs/typos in source code.
fix bugs from kernel functions by using std::sqrt to differentiate with cuda::sqrt. error in linking libsinga_core.so, the kernel functions are not included in the link arg list Fix compilation bugs. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/668ae167 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/668ae167 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/668ae167 Branch: refs/heads/dev Commit: 668ae1679be1975fd153c30b522797988863dff7 Parents: d680079 Author: xiezl <[email protected]> Authored: Wed May 25 23:10:43 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Thu May 26 14:11:18 2016 +0800 ---------------------------------------------------------------------- CMakeLists.txt | 2 +- src/CMakeLists.txt | 12 ++++++++++-- src/core/tensor/math_kernel.cu | 19 +++++++++++-------- src/core/tensor/math_kernel.h | 7 ++++++- src/model/metric/accuracy.h | 1 + test/singa/test_cudnn_dropout.cc | 4 ++-- 6 files changed, 31 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/668ae167/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cb42fb..e08fb98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,6 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.6) PROJECT(singa) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") -#message(STATUS "${CMAKE_CXX_FLAGS}") LIST(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Thirdparty) #message(STATUS "module path: ${CMAKE_MODULE_PATH}") @@ -12,6 +11,7 @@ IF(UNIX OR APPLE) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall") ENDIF() +#message(STATUS "${CMAKE_CXX_FLAGS}") SET(SINGA_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/include;${PROJECT_BINARY_DIR}") #message(STATUS "include path: ${SINGA_INCLUDE_DIR}") INCLUDE_DIRECTORIES(${SINGA_INCLUDE_DIR}) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/668ae167/src/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 92e7fe5..df8b22b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,8 +18,16 @@ AUX_SOURCE_DIRECTORY(core/device core_source) AUX_SOURCE_DIRECTORY(core/memory core_source) AUX_SOURCE_DIRECTORY(core/scheduler core_source) AUX_SOURCE_DIRECTORY(core/tensor core_source) -#message(STATUS "CORE ${core_source}") -ADD_LIBRARY(singa_core SHARED ${core_source}) +FILE(GLOB_RECURSE cuda_source core "*.cu") +set(FLAGS_BACKUP ${CMAKE_CXX_FLAGS}) +set(CMAKE_CXX_FLAGS "") +CUDA_COMPILE(cuda_objs SHARED ${cuda_source} OPTIONS "-Xcompiler -fPIC") +#message(STATUS "FLAGS ${CMAKE_CXX_FLAGS}") +#message(STATUS "CORE ${cuda_source}") +#message(STATUS "OBJ ${cuda_objs}") +include_directories("${CMAKE_CURRENT_SOURCE_DIR}/core/tensor") +set(CMAKE_CXX_FLAGS ${FLAGS_BACKUP}) +ADD_LIBRARY(singa_core SHARED ${core_source} ${cuda_objs}) TARGET_LINK_LIBRARIES(singa_core ${SINGA_LINKER_LIBS}) LIST(APPEND SINGA_LINKER_LIBS singa_core) #MESSAGE(STATUS "link libs " ${SINGA_LINKER_LIBS}) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/668ae167/src/core/tensor/math_kernel.cu ---------------------------------------------------------------------- diff --git a/src/core/tensor/math_kernel.cu b/src/core/tensor/math_kernel.cu index 585d65d..30863a1 100644 --- a/src/core/tensor/math_kernel.cu +++ b/src/core/tensor/math_kernel.cu @@ -19,9 +19,11 @@ * *************************************************************/ +#include "singa_config.h" #ifdef USE_CUDA #include <cmath> #include <algorithm> +#include <cfloat> #include "./math_kernel.h" #define CU2DBLOCK_X 32 @@ -30,6 +32,7 @@ #define CU1DBLOCK 1024 #define CU1DBLOCKF 1024.0 +namespace singa{ // Cuda Kernel Functions namespace cuda { __global__ void kernel_softmax_loss(const float *prob, const int *label, @@ -38,7 +41,7 @@ __global__ void kernel_softmax_loss(const float *prob, const int *label, int num_threads = blockDim.x * gridDim.x; for (; index < n; index += num_threads) { float prob_of_truth = prob[index * dim + label[index]]; - loss[index] -= log(max(prob_of_truth, FLT_MIN)); + loss[index] -= std::log(max(prob_of_truth, FLT_MIN)); } } @@ -52,7 +55,7 @@ __global__ void kernel_softmax_gradient(float *grad, const int *label, int n, } } -__global__ void kernel_sum_vec(float *data, float *sum, int n) { +__global__ void kernel_sum_vec(const float *data, float *sum, int n) { int THREADS = blockDim.x; __shared__ float aux[CU1DBLOCK]; @@ -149,7 +152,7 @@ __global__ void kernel_exp(const float *src_data, float *des_data, int n) { int index = blockIdx.x * blockDim.x + threadIdx.x; int num_threads = blockDim.x * gridDim.x; for (; index < n; index += num_threads) { - des_data[index] = exp(src_data[index]); + des_data[index] = std::exp(src_data[index]); } } @@ -157,7 +160,7 @@ __global__ void kernel_log(const float *src_data, float *des_data, int n) { int index = blockIdx.x * blockDim.x + threadIdx.x; int num_threads = blockDim.x * gridDim.x; for (; index < n; index += num_threads) { - des_data[index] = log(src_data[index]); + des_data[index] = std::log(src_data[index]); } } @@ -242,7 +245,7 @@ __global__ void kernel_square_grad(const float *src_data, float *des_data, int index = blockIdx.x * blockDim.x + threadIdx.x; int num_threads = blockDim.x * gridDim.x; for (; index < n; index += num_threads) { - des_data[index] = 2 * sqrt(src_data[index]); + des_data[index] = 2 * src_data[index]; } } @@ -250,7 +253,7 @@ __global__ void kernel_sqrt(const float *src_data, float *des_data, int n) { int index = blockIdx.x * blockDim.x + threadIdx.x; int num_threads = blockDim.x * gridDim.x; for (; index < n; index += num_threads) { - des_data[index] = sqrt(src_data[index]); + des_data[index] = std::sqrt(src_data[index]); } } @@ -259,7 +262,7 @@ __global__ void kernel_pow(const float *src_data_a, const float *src_data_b, int index = blockIdx.x * blockDim.x + threadIdx.x; int num_threads = blockDim.x * gridDim.x; for (; index < n; index += num_threads) { - des_data[index] = pow(src_data_a[index], src_data_b[index]); + des_data[index] = std::pow(src_data_a[index], src_data_b[index]); } } @@ -331,7 +334,7 @@ void sum_col(int rows, int cols, int stride, const float *in, float *out) { int threads_per_block = cols > CU1DBLOCK ? CU1DBLOCK : cols; int num_blocks = rows; - kernel_sum_col<<<num_blocks, threads_per_block>>>(src_mat_data, dst_vec_data, + kernel_sum_col<<<num_blocks, threads_per_block>>>(in, out, rows, cols, stride); } void add_row(int rows, int cols, int stride, const float *in_row, http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/668ae167/src/core/tensor/math_kernel.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/math_kernel.h b/src/core/tensor/math_kernel.h index 7629ac8..f5da772 100644 --- a/src/core/tensor/math_kernel.h +++ b/src/core/tensor/math_kernel.h @@ -21,8 +21,11 @@ #ifndef SRC_CORE_TENSOR__MATH_KERNEL_H_ #define SRC_CORE_TENSOR__MATH_KERNEL_H_ -namespace singa { +#include "singa_config.h" +#ifdef USE_CUDA + +namespace singa { /* void softmaxloss_forward(int n, int dim, const float *prob, const int *label, float *loss); @@ -77,6 +80,8 @@ void set_value(int n, float v, float *out); void threshold(int n, float alpha, const float *in, float *out); } // cuda + } // namespace singa +#endif #endif // SRC_CORE_TENSOR__MATH_KERNEL_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/668ae167/src/model/metric/accuracy.h ---------------------------------------------------------------------- diff --git a/src/model/metric/accuracy.h b/src/model/metric/accuracy.h index 05c1643..fb23634 100644 --- a/src/model/metric/accuracy.h +++ b/src/model/metric/accuracy.h @@ -19,6 +19,7 @@ #ifndef SINGA_MODEL_METRIC_ACCURACY_H_ #define SINGA_MODEL_METRIC_ACCURACY_H_ #include "singa/model/metric.h" +#include <algorithm> namespace singa { /// Compute the accuray of the prediction, which is matched against the http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/668ae167/test/singa/test_cudnn_dropout.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_cudnn_dropout.cc b/test/singa/test_cudnn_dropout.cc index 393d555..e1a6333 100644 --- a/test/singa/test_cudnn_dropout.cc +++ b/test/singa/test_cudnn_dropout.cc @@ -21,7 +21,7 @@ #include "../src/model/layer/cudnn_dropout.h" #ifdef USE_CUDNN // cudnn dropout is added in cudnn 5 -//#if CUDNN_MAJOR_VERSION >= 5 +#if CUDNN_MAJOR_VERSION >= 5 #include "gtest/gtest.h" @@ -123,5 +123,5 @@ TEST(CudnnDropout, Backward) { EXPECT_FLOAT_EQ(dx[1], dy[1] * GetBitValue(mptr, 1) * scale); EXPECT_FLOAT_EQ(dx[7], dy[7] * GetBitValue(mptr, 7) * scale); } -//#endif // CUDNN_VERSION_MAJOR>=5 +#endif // CUDNN_VERSION_MAJOR>=5 #endif // USE_CUDNN
