Patches imported from SINGA-257 to update the OpenCL API.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6b70dfc8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6b70dfc8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6b70dfc8 Branch: refs/heads/master Commit: 6b70dfc8ad5d188859973cff9b4cdeb270f8b68c Parents: 2d5f696 Author: Tan Li Boon <[email protected]> Authored: Mon Jan 23 00:13:49 2017 +0800 Committer: root <wangwei> Committed: Mon Jan 23 09:43:24 2017 +0000 ---------------------------------------------------------------------- cmake/Dependencies.cmake | 7 ++-- include/singa/core/device.h | 45 +++++++++++++++---------- src/api/config.i.in | 1 + src/api/core_device.i | 17 ++++++++-- src/core/device/platform.cc | 53 ++++++++++++++++++++++++++---- src/core/tensor/tensor_math_opencl.h | 45 ++++++++----------------- src/model/layer/opencl_convolution.cc | 2 +- src/model/layer/opencl_pooling.cc | 2 +- 8 files changed, 107 insertions(+), 65 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/cmake/Dependencies.cmake ---------------------------------------------------------------------- diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index d5bfbd9..708628b 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -91,12 +91,11 @@ ENDIF() IF(USE_OPENCL) FIND_PACKAGE(OpenCL REQUIRED) - IF(NOT OpenCL_FOUND) + IF(NOT OPENCL_FOUND) MESSAGE(SEND_ERROR "OpenCL was requested, but not found.") ELSE() - #MESSAGE(STATUS "Found OpenCL headers at ${OpenCL_INCLUDE_DIRS}") - INCLUDE_DIRECTORIES(SYSTEM ${OpenCL_INCLUDE_DIR}) - LIST(APPEND SINGA_LINKER_LIBS ${OpenCL_LIBRARIES}) + INCLUDE_DIRECTORIES(SYSTEM ${OPENCL_INCLUDE_DIR}) + LIST(APPEND SINGA_LINKER_LIBS ${OPENCL_LIBRARIES}) FIND_PACKAGE(ViennaCL REQUIRED) IF(NOT ViennaCL_FOUND) MESSAGE(SEND_ERROR "ViennaCL is required if OpenCL is enabled.") http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/include/singa/core/device.h ---------------------------------------------------------------------- diff --git a/include/singa/core/device.h b/include/singa/core/device.h index 0fecc6d..f6a490c 100644 --- a/include/singa/core/device.h +++ b/include/singa/core/device.h @@ -262,7 +262,7 @@ private: class Platform { public: - /// Return the defualt host device + /// Return the default host device static std::shared_ptr<Device> GetDefaultDevice() { return defaultDevice; } @@ -290,23 +290,7 @@ public: /// Create a set of CudaGPU Device using given GPU IDs. static const std::vector<std::shared_ptr<Device>> CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0); -#endif // USE_CUDA - - /// Create a \p num_devices set of valid OpenCL devices, regardless of - /// platforms. If there are fewer valid devices than requested, then this - /// method will return as many as possible.If OpenCL is not in use, this - /// method will return an empty array. - const std::vector<std::shared_ptr<Device> > CreateOpenclDevices( - const size_t num_devices); - - /// Create a set of valid OpenCL devices, regardless of platforms, assigning - /// \p id to each device in sequence. - /// If there are fewer valid devices than requested, then this method will - /// return as many as possible. - /// If OpenCL is not in use, this method will return an empty array. - const std::vector<std::shared_ptr<Device> > - CreateOpenclDevices(const vector<int> &id); - + /// This function is implementd by Caffe (http://caffe.berkeleyvision.org/). /// This function checks the availability of GPU #device_id. /// It attempts to create a context on the device by calling cudaFree(0). @@ -322,6 +306,31 @@ public: /// the permission. cudaFree(0) is one of those with no side effect, /// except the context initialization. static bool CheckDevice(const int device_id); +#endif // USE_CUDA + +#ifdef USE_OPENCL + + const int GetNumOpenclPlatforms(); + + const int GetNumOpenclDevices(); + + static const std::shared_ptr<Device> GetDefaultOpenclDevice(); + + /// Create a \p num_devices set of valid OpenCL devices, regardless of + /// platforms. If there are fewer valid devices than requested, then this + /// method will return as many as possible. If OpenCL is not in use, this + /// method will return an empty array. +// static const std::vector<std::shared_ptr<Device>> +// CreateOpenclDevices(const size_t num_devices); + + /// Create a set of valid OpenCL devices, regardless of platforms, assigning + /// \p id to each device in sequence. + /// If there are fewer valid devices than requested, then this method will + /// return as many as possible. + /// If OpenCL is not in use, this method will return an empty array. +// const std::vector<std::shared_ptr<Device>> +// CreateOpenclDevices(const vector<int> &id); +#endif // USE_OPENCL }; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/src/api/config.i.in ---------------------------------------------------------------------- diff --git a/src/api/config.i.in b/src/api/config.i.in index cea3517..05ddf6e 100644 --- a/src/api/config.i.in +++ b/src/api/config.i.in @@ -1,6 +1,7 @@ // Pass in cmake configurations to swig #cmakedefine01 USE_CUDA #cmakedefine01 USE_CUDNN +#cmakedefine01 USE_OPENCL #cmakedefine01 USE_PYTHON #cmakedefine01 USE_JAVA #cmakedefine CUDNN_VERSION ${CUDNN_VERSION} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/src/api/core_device.i ---------------------------------------------------------------------- diff --git a/src/api/core_device.i b/src/api/core_device.i index a9bb840..a5b7de6 100644 --- a/src/api/core_device.i +++ b/src/api/core_device.i @@ -44,7 +44,7 @@ namespace std{ namespace singa{ class Device { - public: + public: virtual void SetRandSeed(unsigned seed) = 0; std::shared_ptr<Device> host(); int id() const; @@ -58,11 +58,24 @@ class Platform { static const std::pair<size_t, size_t> GetGPUMemSize(const int device); static const std::vector<std::pair<size_t, size_t>> GetGPUMemSize(); static const std::string DeviceQuery(int id, bool verbose = false); - static const std::vector<std::shared_ptr<Device> > + static const std::vector<std::shared_ptr<Device>> CreateCudaGPUs(const size_t num_devices, size_t init_size = 0); static const std::vector<std::shared_ptr<Device>> CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0); #endif // USE_CUDA + +#if USE_OPENCL + + const int GetNumOpenclPlatforms(); + const int GetNumOpenclDevices(); + static const std::shared_ptr<Device> GetDefaultOpenclDevice(); +// static const std::vector<std::shared_ptr<Device>> +// CreateOpenclDevices(const size_t num_devices); +// static const std::vector<std::shared_ptr<Device>> +// CreateOpenclDevices(); + +#endif // USE_OPENCL + static std::shared_ptr<Device> GetDefaultDevice(); }; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/src/core/device/platform.cc ---------------------------------------------------------------------- diff --git a/src/core/device/platform.cc b/src/core/device/platform.cc index eb02c5b..8ae15f8 100644 --- a/src/core/device/platform.cc +++ b/src/core/device/platform.cc @@ -19,11 +19,12 @@ #include "singa/core/device.h" #include "singa/singa_config.h" - -#ifdef USE_CUDA +#include "singa/utils/opencl_utils.h" namespace singa { +#ifdef USE_CUDA + int Platform::GetNumGPUs() { int count; CUDA_CHECK(cudaGetDeviceCount(&count)); @@ -109,7 +110,7 @@ const string Platform::DeviceQuery(int device, bool verbose) { return out.str(); } -const vector<shared_ptr<Device> > +const vector<shared_ptr<Device>> Platform::CreateCudaGPUs(const size_t num_devices, size_t init_size) { const vector<int> gpus = GetGPUIDs(); CHECK_LE(num_devices, gpus.size()); @@ -117,7 +118,7 @@ Platform::CreateCudaGPUs(const size_t num_devices, size_t init_size) { return CreateCudaGPUsOn(use_gpus, init_size); } -const vector<shared_ptr<Device> > +const vector<shared_ptr<Device>> Platform::CreateCudaGPUsOn(const vector<int> &devices, size_t init_size) { MemPoolConf conf; if (init_size > 0) @@ -137,8 +138,46 @@ Platform::CreateCudaGPUsOn(const vector<int> &devices, size_t init_size) { return ret; } -} // namespace singa - #endif // USE_CUDA -#endif \ No newline at end of file +#ifdef USE_OPENCL + +const int Platform::GetNumOpenclPlatforms() { + auto all_platforms = viennacl::ocl::get_platforms(); + return (int)all_platforms.size(); +} + +const int Platform::GetNumOpenclDevices() { + auto all_platforms = viennacl::ocl::get_platforms(); + unsigned int total_num_devices = 0; + for (auto plat : all_platforms) { + auto all_devices = plat.devices(CL_DEVICE_TYPE_ALL); + total_num_devices += all_devices.size(); + } + return (int)total_num_devices; +} + +const std::shared_ptr<Device> Platform::GetDefaultOpenclDevice() { + return std::make_shared<OpenclDevice>(); +} +/* +static const std::vector<std::shared_ptr<Device>> +Platform::CreateOpenclDevices(const size_t num_devices) { + auto all_platforms = viennacl::ocl::get_platforms(); + for (auto plat : all_platforms) { + auto all_devices = plat.devices(CL_DEVICE_TYPE_ALL); + total_num_devices += all_devices.size(); + } + return (int)total_num_devices; +} + +static const std::vector<std::shared_ptr<Device>> +Platform::CreateOpenclDevices(const std::vector<int> &id) { + +} +*/ +#endif // USE_OPENCL + +} // namespace singa + +#endif http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/src/core/tensor/tensor_math_opencl.h ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor_math_opencl.h b/src/core/tensor/tensor_math_opencl.h index bc876b3..55acb09 100644 --- a/src/core/tensor/tensor_math_opencl.h +++ b/src/core/tensor/tensor_math_opencl.h @@ -440,36 +440,17 @@ void Amin<float, lang::Opencl>(const size_t num, const Block* in, size_t* out, C out[0] = temp[0]; delete temp; } - - +*/ + template<> void Asum<float, lang::Opencl>(const size_t num, const Block* in, float* out, Context* ctx) { - cl_int status = CL_SUCCESS; - - std::string kname = "clkernel_asum"; - auto kernel = ctx->kernels->at(kname); - - cl::Buffer inbuf = *(static_cast<cl::Buffer*>(in->mutable_data())); - - size_t size = sizeof(float) * num; - cl::Buffer outval(ctx->ocl_ctx, CL_MEM_WRITE_ONLY, size, nullptr, &status); - OCL_CHECK(status, "Failed to create buffer!"); - - kernel.setArg(0, (cl_int)num); - kernel.setArg(1, inbuf); - kernel.setArg(2, outval); - kernel.setArg(3, cl::Local(size)); + viennacl::vector<float> v_in((const cl_mem)in->data(), num); - status = ctx->ocl_cmdq.enqueueNDRangeKernel(kernel, cl::NDRange(0), cl::NDRange(num)); - OCL_CHECK(status, "Failed to enqueue kernel function!"); + viennacl::vector<float> temp = viennacl::linalg::element_fabs(v_in); - float* temp = new float[num]; - status = ctx->ocl_cmdq.enqueueReadBuffer(outval, CL_TRUE, 0, size, temp); - OCL_CHECK(status, "Failed to read from buffer!"); - out[0] = temp[0]; - delete temp; + out[0] = viennacl::linalg::sum(temp); } -*/ + /// out = alpha * in + out template<> void Axpy<float, lang::Opencl>(const size_t num, const float alpha, const Block* in, Block* out, Context* ctx) { @@ -528,7 +509,7 @@ void GEMV<float, lang::Opencl>(bool trans, const size_t m, const size_t n, const } /// multiply a matrix with a diagonal matrix constructed using values from 'v'. -/// if matrix_lef_side is true, do M*v; else do v*M +/// if matrix_left_side is true, do M*v; else do v*M template<> void DGMM<float, lang::Opencl>(bool side_right, const size_t nrow, const size_t ncol, @@ -541,9 +522,9 @@ void DGMM<float, lang::Opencl>(bool side_right, auto diag = viennacl::diag(v_buf); if (side_right) { - out_buf = viennacl::linalg::prod(diag, M_buf); - } else { out_buf = viennacl::linalg::prod(M_buf, diag); + } else { + out_buf = viennacl::linalg::prod(diag, M_buf); } } @@ -577,9 +558,9 @@ void GEMM<float, lang::Opencl>(const bool transA, const bool transB, template <> -void ComputeCrossEntropy<float, lang::Opencl>(const size_t batchsize, const size_t dim, - const Block *p, const Block *t, Block *loss, - Context *ctx) { +void ComputeCrossEntropy<float, lang::Opencl>(bool int_target, const size_t batchsize, + const size_t dim, const Block *p, const Block *t, + Block *loss, Context *ctx) { auto ocl_ctx = get_context(ctx->vcl_ctx_id); auto kernel = ocl_ctx.get_kernel("tensor_math_opencl.cl", "clkernel_crossentropy"); @@ -592,7 +573,7 @@ void ComputeCrossEntropy<float, lang::Opencl>(const size_t batchsize, const size template <> -void SoftmaxCrossEntropyBwd<float, lang::Opencl>(const size_t batchsize, const size_t dim, +void SoftmaxCrossEntropyBwd<float, lang::Opencl>(bool int_target, const size_t batchsize, const size_t dim, const Block *p, const Block *t, Block *grad, Context *ctx) { auto ocl_ctx = get_context(ctx->vcl_ctx_id); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/src/model/layer/opencl_convolution.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/opencl_convolution.cc b/src/model/layer/opencl_convolution.cc index c43719f..4b70a71 100644 --- a/src/model/layer/opencl_convolution.cc +++ b/src/model/layer/opencl_convolution.cc @@ -22,7 +22,7 @@ namespace singa { -RegisterLayerClass(opencl_convolution, OpenclConvolution); +RegisterLayerClass(singacl_convolution, OpenclConvolution); /// \copydoc Layer::Forward(int flag, const Tensor&) const Tensor OpenclConvolution::Forward(int flag, const Tensor &input) { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/src/model/layer/opencl_pooling.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/opencl_pooling.cc b/src/model/layer/opencl_pooling.cc index 2e35330..f123270 100644 --- a/src/model/layer/opencl_pooling.cc +++ b/src/model/layer/opencl_pooling.cc @@ -22,7 +22,7 @@ namespace singa { -RegisterLayerClass(opencl_pooling, OpenclPooling); +RegisterLayerClass(singacl_pooling, OpenclPooling); const Tensor OpenclPooling::Forward(int flag, const Tensor &input) { CHECK(buf_.empty());
