Patches imported from SINGA-257 to update the OpenCL API.

Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6b70dfc8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6b70dfc8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6b70dfc8

Branch: refs/heads/master
Commit: 6b70dfc8ad5d188859973cff9b4cdeb270f8b68c
Parents: 2d5f696
Author: Tan Li Boon <[email protected]>
Authored: Mon Jan 23 00:13:49 2017 +0800
Committer: root <wangwei>
Committed: Mon Jan 23 09:43:24 2017 +0000

----------------------------------------------------------------------
 cmake/Dependencies.cmake              |  7 ++--
 include/singa/core/device.h           | 45 +++++++++++++++----------
 src/api/config.i.in                   |  1 +
 src/api/core_device.i                 | 17 ++++++++--
 src/core/device/platform.cc           | 53 ++++++++++++++++++++++++++----
 src/core/tensor/tensor_math_opencl.h  | 45 ++++++++-----------------
 src/model/layer/opencl_convolution.cc |  2 +-
 src/model/layer/opencl_pooling.cc     |  2 +-
 8 files changed, 107 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/cmake/Dependencies.cmake
----------------------------------------------------------------------
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index d5bfbd9..708628b 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -91,12 +91,11 @@ ENDIF()
 
 IF(USE_OPENCL)
     FIND_PACKAGE(OpenCL REQUIRED)
-    IF(NOT OpenCL_FOUND)
+    IF(NOT OPENCL_FOUND)
         MESSAGE(SEND_ERROR "OpenCL was requested, but not found.")
     ELSE()
-        #MESSAGE(STATUS "Found OpenCL headers at ${OpenCL_INCLUDE_DIRS}")
-        INCLUDE_DIRECTORIES(SYSTEM ${OpenCL_INCLUDE_DIR})
-        LIST(APPEND SINGA_LINKER_LIBS ${OpenCL_LIBRARIES})
+        INCLUDE_DIRECTORIES(SYSTEM ${OPENCL_INCLUDE_DIR})
+        LIST(APPEND SINGA_LINKER_LIBS ${OPENCL_LIBRARIES})
         FIND_PACKAGE(ViennaCL REQUIRED)
         IF(NOT ViennaCL_FOUND)
             MESSAGE(SEND_ERROR "ViennaCL is required if OpenCL is enabled.")

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/include/singa/core/device.h
----------------------------------------------------------------------
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
index 0fecc6d..f6a490c 100644
--- a/include/singa/core/device.h
+++ b/include/singa/core/device.h
@@ -262,7 +262,7 @@ private:
 class Platform {
 public:
 
-  /// Return the defualt host device
+  /// Return the default host device
   static std::shared_ptr<Device> GetDefaultDevice() {
     return defaultDevice;
   }
@@ -290,23 +290,7 @@ public:
   /// Create a set of CudaGPU Device using given GPU IDs.
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
-#endif // USE_CUDA
-
-  /// Create a \p num_devices set of valid OpenCL devices, regardless of
-  /// platforms.  If there are fewer valid devices than requested, then this
-  /// method will return as many as possible.If OpenCL is not in use, this
-  /// method will return an empty array.
-  const std::vector<std::shared_ptr<Device> > CreateOpenclDevices(
-             const size_t num_devices);
-
-  /// Create a set of valid OpenCL devices, regardless of platforms, assigning
-  /// \p id to each device in sequence.
-  /// If there are fewer valid devices than requested, then this method will
-  /// return as many as possible.
-  /// If OpenCL is not in use, this method will return an empty array.
-  const std::vector<std::shared_ptr<Device> >
-  CreateOpenclDevices(const vector<int> &id);
-
+  
   /// This function is implementd by Caffe (http://caffe.berkeleyvision.org/).
   /// This function checks the availability of GPU #device_id.
   /// It attempts to create a context on the device by calling cudaFree(0).
@@ -322,6 +306,31 @@ public:
   /// the permission. cudaFree(0) is one of those with no side effect,
   /// except the context initialization.
   static bool CheckDevice(const int device_id);
+#endif // USE_CUDA
+
+#ifdef USE_OPENCL
+
+  const int GetNumOpenclPlatforms();
+  
+  const int GetNumOpenclDevices();
+  
+  static const std::shared_ptr<Device> GetDefaultOpenclDevice();
+
+  /// Create a \p num_devices set of valid OpenCL devices, regardless of
+  /// platforms.  If there are fewer valid devices than requested, then this
+  /// method will return as many as possible. If OpenCL is not in use, this
+  /// method will return an empty array.
+//  static const std::vector<std::shared_ptr<Device>>
+//  CreateOpenclDevices(const size_t num_devices);
+
+  /// Create a set of valid OpenCL devices, regardless of platforms, assigning
+  /// \p id to each device in sequence.
+  /// If there are fewer valid devices than requested, then this method will
+  /// return as many as possible.
+  /// If OpenCL is not in use, this method will return an empty array.
+//  const std::vector<std::shared_ptr<Device>>
+//  CreateOpenclDevices(const vector<int> &id);
+#endif // USE_OPENCL
 
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/src/api/config.i.in
----------------------------------------------------------------------
diff --git a/src/api/config.i.in b/src/api/config.i.in
index cea3517..05ddf6e 100644
--- a/src/api/config.i.in
+++ b/src/api/config.i.in
@@ -1,6 +1,7 @@
 // Pass in cmake configurations to swig
 #cmakedefine01 USE_CUDA
 #cmakedefine01 USE_CUDNN
+#cmakedefine01 USE_OPENCL
 #cmakedefine01 USE_PYTHON
 #cmakedefine01 USE_JAVA
 #cmakedefine CUDNN_VERSION ${CUDNN_VERSION}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/src/api/core_device.i
----------------------------------------------------------------------
diff --git a/src/api/core_device.i b/src/api/core_device.i
index a9bb840..a5b7de6 100644
--- a/src/api/core_device.i
+++ b/src/api/core_device.i
@@ -44,7 +44,7 @@ namespace std{
 namespace singa{
 
 class Device {
-  public:
+ public:
   virtual void SetRandSeed(unsigned seed) = 0;
   std::shared_ptr<Device> host();
   int id() const;
@@ -58,11 +58,24 @@ class Platform {
   static const std::pair<size_t, size_t> GetGPUMemSize(const int device);
   static const std::vector<std::pair<size_t, size_t>> GetGPUMemSize();
   static const std::string DeviceQuery(int id, bool verbose = false);
-  static const std::vector<std::shared_ptr<Device> >
+  static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUs(const size_t num_devices, size_t init_size = 0);
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
 #endif // USE_CUDA
+
+#if USE_OPENCL
+
+  const int GetNumOpenclPlatforms();
+  const int GetNumOpenclDevices();
+  static const std::shared_ptr<Device> GetDefaultOpenclDevice();
+//  static const std::vector<std::shared_ptr<Device>>
+//  CreateOpenclDevices(const size_t num_devices);
+//  static const std::vector<std::shared_ptr<Device>>
+//  CreateOpenclDevices();
+
+#endif // USE_OPENCL
+
   static std::shared_ptr<Device> GetDefaultDevice();
 };
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/src/core/device/platform.cc
----------------------------------------------------------------------
diff --git a/src/core/device/platform.cc b/src/core/device/platform.cc
index eb02c5b..8ae15f8 100644
--- a/src/core/device/platform.cc
+++ b/src/core/device/platform.cc
@@ -19,11 +19,12 @@
 
 #include "singa/core/device.h"
 #include "singa/singa_config.h"
-
-#ifdef USE_CUDA
+#include "singa/utils/opencl_utils.h"
 
 namespace singa {
 
+#ifdef USE_CUDA
+
 int Platform::GetNumGPUs() {
   int count;
   CUDA_CHECK(cudaGetDeviceCount(&count));
@@ -109,7 +110,7 @@ const string Platform::DeviceQuery(int device, bool 
verbose) {
   return out.str();
 }
 
-const vector<shared_ptr<Device> >
+const vector<shared_ptr<Device>>
 Platform::CreateCudaGPUs(const size_t num_devices, size_t init_size) {
   const vector<int> gpus = GetGPUIDs();
   CHECK_LE(num_devices, gpus.size());
@@ -117,7 +118,7 @@ Platform::CreateCudaGPUs(const size_t num_devices, size_t 
init_size) {
   return CreateCudaGPUsOn(use_gpus, init_size);
 }
 
-const vector<shared_ptr<Device> >
+const vector<shared_ptr<Device>>
 Platform::CreateCudaGPUsOn(const vector<int> &devices, size_t init_size) {
   MemPoolConf conf;
   if (init_size > 0)
@@ -137,8 +138,46 @@ Platform::CreateCudaGPUsOn(const vector<int> &devices, 
size_t init_size) {
   return ret;
 }
 
-}  // namespace singa
-
 #endif  // USE_CUDA
 
-#endif
\ No newline at end of file
+#ifdef USE_OPENCL
+
+const int Platform::GetNumOpenclPlatforms() {
+  auto all_platforms = viennacl::ocl::get_platforms();
+  return (int)all_platforms.size();
+}
+
+const int Platform::GetNumOpenclDevices() {
+  auto all_platforms = viennacl::ocl::get_platforms();
+  unsigned int total_num_devices = 0;
+  for (auto plat : all_platforms) {
+    auto all_devices = plat.devices(CL_DEVICE_TYPE_ALL);
+    total_num_devices += all_devices.size();
+  }
+  return (int)total_num_devices;
+}
+
+const std::shared_ptr<Device> Platform::GetDefaultOpenclDevice() {
+  return std::make_shared<OpenclDevice>();
+}
+/*
+static const std::vector<std::shared_ptr<Device>>
+Platform::CreateOpenclDevices(const size_t num_devices) {
+  auto all_platforms = viennacl::ocl::get_platforms();
+  for (auto plat : all_platforms) {
+    auto all_devices = plat.devices(CL_DEVICE_TYPE_ALL);
+    total_num_devices += all_devices.size();
+  }
+  return (int)total_num_devices;
+}
+
+static const std::vector<std::shared_ptr<Device>>
+Platform::CreateOpenclDevices(const std::vector<int> &id) {
+
+}
+*/
+#endif // USE_OPENCL
+
+}  // namespace singa
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/src/core/tensor/tensor_math_opencl.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_opencl.h 
b/src/core/tensor/tensor_math_opencl.h
index bc876b3..55acb09 100644
--- a/src/core/tensor/tensor_math_opencl.h
+++ b/src/core/tensor/tensor_math_opencl.h
@@ -440,36 +440,17 @@ void Amin<float, lang::Opencl>(const size_t num, const 
Block* in, size_t* out, C
   out[0] = temp[0];
   delete temp;
 }
-
-
+*/
+       
 template<>
 void Asum<float, lang::Opencl>(const size_t num, const Block* in, float* out, 
Context* ctx) {
-  cl_int status = CL_SUCCESS;
-
-  std::string kname = "clkernel_asum";
-  auto kernel = ctx->kernels->at(kname);
-
-  cl::Buffer inbuf = *(static_cast<cl::Buffer*>(in->mutable_data()));
-
-  size_t size = sizeof(float) * num;
-  cl::Buffer outval(ctx->ocl_ctx, CL_MEM_WRITE_ONLY, size, nullptr, &status);
-  OCL_CHECK(status, "Failed to create buffer!");
-
-  kernel.setArg(0, (cl_int)num);
-  kernel.setArg(1, inbuf);
-  kernel.setArg(2, outval);
-  kernel.setArg(3, cl::Local(size));
+  viennacl::vector<float> v_in((const cl_mem)in->data(), num);
 
-  status = ctx->ocl_cmdq.enqueueNDRangeKernel(kernel, cl::NDRange(0), 
cl::NDRange(num));
-  OCL_CHECK(status, "Failed to enqueue kernel function!");
+  viennacl::vector<float> temp = viennacl::linalg::element_fabs(v_in);
 
-  float* temp = new float[num];
-  status = ctx->ocl_cmdq.enqueueReadBuffer(outval, CL_TRUE, 0, size, temp);
-  OCL_CHECK(status, "Failed to read from buffer!");
-  out[0] = temp[0];
-  delete temp;
+  out[0] = viennacl::linalg::sum(temp);
 }
-*/
+
 /// out = alpha * in + out
 template<>
 void Axpy<float, lang::Opencl>(const size_t num, const float alpha, const 
Block* in, Block* out, Context* ctx) {
@@ -528,7 +509,7 @@ void GEMV<float, lang::Opencl>(bool trans, const size_t m, 
const size_t n, const
 }
 
 /// multiply a matrix with a diagonal matrix constructed using values from 'v'.
-/// if matrix_lef_side is true, do M*v; else do v*M
+/// if matrix_left_side is true, do M*v; else do v*M
 template<>
 void DGMM<float, lang::Opencl>(bool side_right,
                  const size_t nrow, const size_t ncol,
@@ -541,9 +522,9 @@ void DGMM<float, lang::Opencl>(bool side_right,
   auto diag = viennacl::diag(v_buf);
 
   if (side_right) {
-    out_buf = viennacl::linalg::prod(diag, M_buf);
-  } else {
     out_buf = viennacl::linalg::prod(M_buf, diag);
+  } else {
+    out_buf = viennacl::linalg::prod(diag, M_buf);
   }
 }
 
@@ -577,9 +558,9 @@ void GEMM<float, lang::Opencl>(const bool transA, const 
bool transB,
 
 
 template <>
-void ComputeCrossEntropy<float, lang::Opencl>(const size_t batchsize, const 
size_t dim,
-                         const Block *p, const Block *t, Block *loss,
-                         Context *ctx) {
+void ComputeCrossEntropy<float, lang::Opencl>(bool int_target, const size_t 
batchsize,
+                         const size_t dim, const Block *p, const Block *t,
+                         Block *loss, Context *ctx) {
   auto ocl_ctx = get_context(ctx->vcl_ctx_id);
   auto kernel = ocl_ctx.get_kernel("tensor_math_opencl.cl", 
"clkernel_crossentropy");
 
@@ -592,7 +573,7 @@ void ComputeCrossEntropy<float, lang::Opencl>(const size_t 
batchsize, const size
 
 
 template <>
-void SoftmaxCrossEntropyBwd<float, lang::Opencl>(const size_t batchsize, const 
size_t dim,
+void SoftmaxCrossEntropyBwd<float, lang::Opencl>(bool int_target, const size_t 
batchsize, const size_t dim,
                             const Block *p, const Block *t, Block *grad,
                             Context *ctx) {
   auto ocl_ctx = get_context(ctx->vcl_ctx_id);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/src/model/layer/opencl_convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/opencl_convolution.cc 
b/src/model/layer/opencl_convolution.cc
index c43719f..4b70a71 100644
--- a/src/model/layer/opencl_convolution.cc
+++ b/src/model/layer/opencl_convolution.cc
@@ -22,7 +22,7 @@
 
 namespace singa {
 
-RegisterLayerClass(opencl_convolution, OpenclConvolution);
+RegisterLayerClass(singacl_convolution, OpenclConvolution);
 
 /// \copydoc Layer::Forward(int flag, const Tensor&)
 const Tensor OpenclConvolution::Forward(int flag, const Tensor &input) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6b70dfc8/src/model/layer/opencl_pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/opencl_pooling.cc 
b/src/model/layer/opencl_pooling.cc
index 2e35330..f123270 100644
--- a/src/model/layer/opencl_pooling.cc
+++ b/src/model/layer/opencl_pooling.cc
@@ -22,7 +22,7 @@
 
 namespace singa {
 
-RegisterLayerClass(opencl_pooling, OpenclPooling);
+RegisterLayerClass(singacl_pooling, OpenclPooling);
 
 const Tensor OpenclPooling::Forward(int flag, const Tensor &input) {
   CHECK(buf_.empty());

Reply via email to