SINGA-172 OpenCL device support and implementation Move opencl_device.h into device.h. Remove the option for building opencl test.
One test of OpenCL failed, TensorMult. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/464dcda6 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/464dcda6 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/464dcda6 Branch: refs/heads/dev Commit: 464dcda634f6198fe74d8932ac999e538e5f4065 Parents: 3f6b5e3 Author: Wei Wang <[email protected]> Authored: Sat Jul 30 13:09:05 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Sat Jul 30 13:09:05 2016 +0800 ---------------------------------------------------------------------- CMakeLists.txt | 2 +- include/singa/core/common.h | 6 +- include/singa/core/device.h | 101 +++++++++++++++- include/singa/core/opencl_device.h | 132 --------------------- src/core/device/opencl_device.cc | 20 ++-- test/CMakeLists.txt | 4 +- test/singa/test_opencl.cc | 198 ++++++++++++++++---------------- 7 files changed, 215 insertions(+), 248 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/464dcda6/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f862f0..23f8ef6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ OPTION(USE_OPENCV "Use opencv" OFF) OPTION(USE_LMDB "Use LMDB libs" OFF) OPTION(USE_PYTHON "Generate py wrappers" ON) OPTION(USE_OPENCL "Use OpenCL" OFF) -OPTION(BUILD_OPENCL_TESTS "Build OpenCL tests" OFF) +#OPTION(BUILD_OPENCL_TESTS "Build OpenCL tests" OFF) INCLUDE("cmake/Dependencies.cmake") INCLUDE("cmake/Utils.cmake") http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/464dcda6/include/singa/core/common.h ---------------------------------------------------------------------- diff --git a/include/singa/core/common.h b/include/singa/core/common.h index 9586286..caa7c67 100644 --- a/include/singa/core/common.h +++ b/include/singa/core/common.h @@ -39,8 +39,8 @@ #define CL_HPP_MINIMUM_OPENCL_VERSION 120 #define CL_HPP_TARGET_OPENCL_VERSION 120 #include <CL/cl2.hpp> -#include <map> -#endif +#include <unordered_map> +#endif // USE_OPENCL using std::atomic; @@ -99,7 +99,7 @@ typedef struct _Context { #endif // USE_CUDA #ifdef USE_OPENCL - std::shared_ptr<std::map<std::string, cl::Kernel>> kernels; + std::shared_ptr<std::unordered_map<std::string, cl::Kernel>> kernels; cl::CommandQueue ocl_cmdq; cl::Context ocl_ctx; #endif http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/464dcda6/include/singa/core/device.h ---------------------------------------------------------------------- diff --git a/include/singa/core/device.h b/include/singa/core/device.h index 36c9dc2..cd9a811 100644 --- a/include/singa/core/device.h +++ b/include/singa/core/device.h @@ -36,6 +36,12 @@ #endif // USE_CUDA #ifdef USE_OPENCL +// http://github.khronos.org/OpenCL-CLHPP/ +// cl2.hpp includes cl.h, do not re-include. +#define CL_HPP_MINIMUM_OPENCL_VERSION 120 +#define CL_HPP_TARGET_OPENCL_VERSION 120 +#include <unordered_map> +#include <CL/cl2.hpp> #include "singa/utils/opencl_utils.h" #endif // USE_OPENCL @@ -198,12 +204,103 @@ class CudaGPU : public Device { #endif // USE_CUDA +#ifdef USE_OPENCL +// Implement Device using OpenCL libs. +class OpenclDevice : public singa::Device { +public: + + // TODO: Constructor arguments to consider: + // Path to kernel sources? + // Select only certain device types? + OpenclDevice(int id = 0, int num_executors = 1); + ~OpenclDevice(); + + /// Get the specified kernel. + cl::Kernel GetKernel(const std::string& kname, cl_int* status = nullptr); + + /// Get the command queue associated with this device. + cl::CommandQueue GetCmdQ() { return cmdq; } + + /// Prints information about all Devices in each Platform. + void PrintAllDeviceInfo(); + + /// Prints status about CL source code builds. + void PrintClBuildInfo(cl::Program &p); + +// Overridden, inherited methods + void SetRandSeed(unsigned seed) override; + + void CopyDataToFrom(Block* dst, Block* src, size_t nBytes, + CopyDirection direction, int dst_offset = 0, + int src_offset = 0); +/* + void CopyDataFromHostPtr(Block* dst, const void* src, size_t nBytes = 0, + size_t dst_offset = 0) override;*/ + +protected: + /// The OpenCL device that this object represents. + /// Each OpenclDevice contains exactly one cl::Device for the lifetime of the + /// object. + cl::Device this_device; + + /// Each OpenclDevice has one OpenCL context. It is created along with the + /// creation of this object. + cl::Context ocl_ctx; + + /// The CommandQueue that is associated with this device. + /// Since each OpenclDevice contains only one cl::Device and one cl::Context, + /// it naturally also contains one cl::CommandQueue that is associated + /// with said Device and Context. + cl::CommandQueue cmdq; + + /// A list of kernels that has been compiled on this device. + std::shared_ptr<std::unordered_map<std::string, cl::Kernel>> kernels; + /// Searches the given paths for all .cl files and builds + /// OpenCL programs, then stores them in the Kernels map. + void BuildPrograms(const std::string &kdir = cl_src_path); + +// Overridden, inherited methods. + + void DoExec(function<void(Context*)>&& fn, int executor) override; + + void CopyToFrom(void* dst, const void* src, size_t nBytes, + CopyDirection direction, Context* ctx = nullptr) override; + + /// Allocates memory on this OpenCL device + /// by creating and returning an empty cl::Buffer object. + /// with the indicated size. + void* Malloc(int size) override; + + /// Converts the void pointer into a Buffer object, then deletes the object. + /// This has the effect of freeing up device memory. + void Free(void* ptr) override; + +private: + + /// Copies a data block from host to device. + /// src: a pointer to an array of data. + /// dst: a pointer to a cl::Buffer object. + void WriteToDevice(cl::Buffer* dst, const void* src, const size_t size); + + /// Reads a data block from device to host. + /// src: a pointer to an cl::Buffer object. + /// dst: a pointer to an malloc'ed empty array. + void ReadFromDevice(void* dst, const cl::Buffer* src, const size_t size); + + /// Duplicates a block of data on the device. + /// src: a pointer to the original cl::Buffer object. + /// dst: a pointer to the new cl::Buffer object to copy the data into. + void CopyDeviceBuffer(cl::Buffer* dst, const cl::Buffer* src, const size_t size); + + static const std::string cl_src_path; +}; +#endif // USE_OPENCL /// This class queries all available calculating devices on a given machine /// grouped according to manufacturer or device drivers. All methods should be static. /// If CUDA or OPENCL are not enabled, then the respective related methods should -/// return something that indicates their absence (for example, 0 devices); +/// return something that indicates their absence (for example, 0 devices); /// however they should always be available regardless of compile-time switches. class Platform { public: @@ -261,7 +358,7 @@ public: private: #ifdef USE_OPENCL cl::Platform clPlatform; -#endif +#endif // USE_OPENCL }; } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/464dcda6/include/singa/core/opencl_device.h ---------------------------------------------------------------------- diff --git a/include/singa/core/opencl_device.h b/include/singa/core/opencl_device.h deleted file mode 100644 index 14b6fe7..0000000 --- a/include/singa/core/opencl_device.h +++ /dev/null @@ -1,132 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef SINGA_CORE_OPENCL_DEVICE_H_ -#define SINGA_CORE_OPENCL_DEVICE_H_ - -#include "singa/core/device.h" - -#ifdef USE_OPENCL -// http://github.khronos.org/OpenCL-CLHPP/ -// cl2.hpp includes cl.h, do not re-include. -#define CL_HPP_MINIMUM_OPENCL_VERSION 120 -#define CL_HPP_TARGET_OPENCL_VERSION 120 -#include <map> -#include <memory> -#include <CL/cl2.hpp> - -#include "singa/utils/opencl_utils.h" - -namespace singa { - -// Implement Device using OpenCL libs. -class OpenclDevice : public singa::Device { -public: - - // TODO: Constructor arguments to consider: - // Path to kernel sources? - // Select only certain device types? - OpenclDevice(int id = 0, int num_executors = 1); - ~OpenclDevice(); - - /// Get the specified kernel. - cl::Kernel GetKernel(const std::string& kname, cl_int* status = nullptr); - - /// Get the command queue associated with this device. - cl::CommandQueue GetCmdQ() { return cmdq; } - - /// Prints information about all Devices in each Platform. - void PrintAllDeviceInfo(); - - /// Prints status about CL source code builds. - void PrintClBuildInfo(cl::Program &p); - -// Overridden, inherited methods - void SetRandSeed(unsigned seed) override; - - void CopyDataToFrom(Block* dst, Block* src, size_t nBytes, - CopyDirection direction, int dst_offset = 0, - int src_offset = 0); -/* - void CopyDataFromHostPtr(Block* dst, const void* src, size_t nBytes = 0, - size_t dst_offset = 0) override;*/ - -protected: - /// The OpenCL device that this object represents. - /// Each OpenclDevice contains exactly one cl::Device for the lifetime of the - /// object. - cl::Device this_device; - - /// Each OpenclDevice has one OpenCL context. It is created along with the - /// creation of this object. - cl::Context ocl_ctx; - - /// The CommandQueue that is associated with this device. - /// Since each OpenclDevice contains only one cl::Device and one cl::Context, - /// it naturally also contains one cl::CommandQueue that is associated - /// with said Device and Context. - cl::CommandQueue cmdq; - - /// A list of kernels that has been compiled on this device. - std::shared_ptr<std::map<std::string, cl::Kernel>> kernels; - - /// Searches the given paths for all .cl files and builds - /// OpenCL programs, then stores them in the Kernels map. - void BuildPrograms(const std::string &kdir = cl_src_path); - -// Overridden, inherited methods. - - void DoExec(function<void(Context*)>&& fn, int executor) override; - - void CopyToFrom(void* dst, const void* src, size_t nBytes, - CopyDirection direction, Context* ctx = nullptr) override; - - /// Allocates memory on this OpenCL device - /// by creating and returning an empty cl::Buffer object. - /// with the indicated size. - void* Malloc(int size) override; - - /// Converts the void pointer into a Buffer object, then deletes the object. - /// This has the effect of freeing up device memory. - void Free(void* ptr) override; - -private: - - /// Copies a data block from host to device. - /// src: a pointer to an array of data. - /// dst: a pointer to a cl::Buffer object. - void WriteToDevice(cl::Buffer* dst, const void* src, const size_t size); - - /// Reads a data block from device to host. - /// src: a pointer to an cl::Buffer object. - /// dst: a pointer to an malloc'ed empty array. - void ReadFromDevice(void* dst, const cl::Buffer* src, const size_t size); - - /// Duplicates a block of data on the device. - /// src: a pointer to the original cl::Buffer object. - /// dst: a pointer to the new cl::Buffer object to copy the data into. - void CopyDeviceBuffer(cl::Buffer* dst, const cl::Buffer* src, const size_t size); - - static const std::string cl_src_path; -}; - -} // namespace singa - -#endif // USE_OPENCL - -#endif // SINGA_CORE_OPENCL_DEVICE_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/464dcda6/src/core/device/opencl_device.cc ---------------------------------------------------------------------- diff --git a/src/core/device/opencl_device.cc b/src/core/device/opencl_device.cc index d4d1fe5..b941cd2 100644 --- a/src/core/device/opencl_device.cc +++ b/src/core/device/opencl_device.cc @@ -21,7 +21,7 @@ #include <sstream> #include <string> -#include "singa/core/opencl_device.h" +#include "singa/core/device.h" #include "singa/utils/tinydir.h" #ifdef USE_OPENCL @@ -32,11 +32,11 @@ namespace singa { const string OpenclDevice::cl_src_path = "../src/core/tensor"; -OpenclDevice::OpenclDevice(int id, int num_executors) +OpenclDevice::OpenclDevice(int id, int num_executors) : Device(id, num_executors) { lang_ = kOpencl; - this->kernels = std::make_shared<std::map<std::string, cl::Kernel>>(); - + this->kernels = std::make_shared<std::unordered_map<string, cl::Kernel>>(); + // Create the OpenCL Device, Context, and CommandQueue. /// TODO: This merely chooses the first device on the first platform. cl_int status = CL_SUCCESS; @@ -44,7 +44,7 @@ OpenclDevice::OpenclDevice(int id, int num_executors) std::vector<cl::Platform> platforms; status = cl::Platform::get(&platforms); OCL_CHECK(status, "Failed to find any OpenCL platforms!"); - + std::vector<cl::Device> devices; status = platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices); OCL_CHECK(status, "Failed to get list of devices from platform!"); @@ -57,7 +57,7 @@ OpenclDevice::OpenclDevice(int id, int num_executors) OCL_CHECK(status, "Failed to create a command queue!"); BuildPrograms(); - + ctx_.kernels = kernels; ctx_.ocl_cmdq = cmdq; ctx_.ocl_ctx = ocl_ctx; @@ -65,7 +65,7 @@ OpenclDevice::OpenclDevice(int id, int num_executors) OpenclDevice::~OpenclDevice() { - + // Flush and finish the command queue. cmdq.flush(); cmdq.finish(); @@ -150,7 +150,7 @@ void OpenclDevice::BuildPrograms(const std::string &kdir) { std::vector<cl::Kernel> built_kernels; status = program.createKernels(&built_kernels); OCL_CHECK(status, "Failed to create kernels in built program."); - + for (auto k : built_kernels) { std::string name = k.getInfo<CL_KERNEL_FUNCTION_NAME>(&status); this->kernels->insert(std::make_pair(name, k)); @@ -221,7 +221,7 @@ void OpenclDevice::Free(void* p) { void OpenclDevice::WriteToDevice(cl::Buffer* dst, const void* src, const size_t size) { cl_int status = CL_SUCCESS; - + status = cmdq.enqueueWriteBuffer(*dst, CL_TRUE, 0, size, src); OCL_CHECK(status, "Unable to write data to OpenCL device."); } @@ -229,7 +229,7 @@ void OpenclDevice::WriteToDevice(cl::Buffer* dst, const void* src, const size_t void OpenclDevice::ReadFromDevice(void* dst, const cl::Buffer* src, const size_t size) { cl_int status = CL_SUCCESS; - + status = cmdq.enqueueReadBuffer(*src, CL_TRUE, 0, size, dst); OCL_CHECK(status, "Unable to read data from OpenCL device."); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/464dcda6/test/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 632a2cd..044d65a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -4,12 +4,12 @@ ADD_LIBRARY(gtest STATIC EXCLUDE_FROM_ALL "gtest/gtest.h" "gtest/gtest-all.cc") AUX_SOURCE_DIRECTORY(singa singa_test_source) -IF(NOT BUILD_OPENCL_TESTS) +IF(NOT USE_OPENCL) MESSAGE(STATUS "Skipping OpenCL tests") LIST(REMOVE_ITEM singa_test_source "singa/test_opencl.cc") ENDIF() -ADD_EXECUTABLE(test_singa "gtest/gtest_main.cc" ${singa_test_source}) +ADD_EXECUTABLE(test_singa "gtest/gtest_main.cc" ${singa_test_source}) ADD_DEPENDENCIES(test_singa singa_core singa_utils) MESSAGE(STATUS "link libs" ${singa_linker_libs}) TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/464dcda6/test/singa/test_opencl.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_opencl.cc b/test/singa/test_opencl.cc index 0a335e5..3ce1889 100644 --- a/test/singa/test_opencl.cc +++ b/test/singa/test_opencl.cc @@ -20,16 +20,16 @@ *************************************************************/ #include "gtest/gtest.h" -#include "singa/core/opencl_device.h" +#include "singa/core/device.h" #include "singa/core/tensor.h" #include "singa/proto/core.pb.h" - -using singa::OpenclDevice; using singa::CppCPU; using singa::Block; using singa::Shape; using singa::Tensor; +#ifdef USE_OPENCL +using singa::OpenclDevice; class OpenCL_TensorMath : public ::testing::Test { protected: @@ -38,36 +38,36 @@ protected: float4[i] = (float)i; float4zero[i] = 0.0f; } - + for (int i = 0; i < 16; i++) { float16[i] = (float)i; float16zero[i] = 0.0f; } - + auto ocl_dev = std::make_shared<OpenclDevice>(); - + tf4in = Tensor(Shape{1, 4}, ocl_dev); tf4in.CopyDataFromHostPtr(float4, 4); - + tf4zin = Tensor(Shape{1, 4}, ocl_dev); tf4zin.CopyDataFromHostPtr(float4zero, 4); tf16in = Tensor(Shape{4, 4}, ocl_dev); tf16in.CopyDataFromHostPtr(float16, 16); - + tf16zin = Tensor(Shape{4, 4}, ocl_dev); tf16zin.CopyDataFromHostPtr(float16zero, 16); - + float empty[10000] = {}; empty10k = Tensor(Shape{10000}, ocl_dev); empty10k.CopyDataFromHostPtr(empty, 10000); } - + float float4[4]; float float4zero[4]; float float16[16]; float float16zero[16]; - + Tensor tf4in, tf16in; Tensor tf4zin, tf16zin; Tensor empty10k; @@ -101,19 +101,19 @@ TEST(OpenclDevice, MemoryAllocFree) { TEST(OpenclDevice, CopyDataToFrom) { OpenclDevice dev; CppCPU host; - + Block* a = host.NewBlock(4); Block* b = dev.NewBlock(4); Block* c = host.NewBlock(4); - + // Allocate the Block object on the host. char s[] = {'a', 'b', 'c', 'x'}; host.CopyDataFromHostPtr(a, s, 4); - + // Copy back and forth. dev.CopyDataToFrom(b, a, 4, singa::kHostToDevice); dev.CopyDataToFrom(c, b, 4, singa::kDeviceToHost); - + const char* astr = static_cast<const char*>(c->data()); EXPECT_EQ('a', astr[0]); EXPECT_EQ('b', astr[1]); @@ -124,21 +124,21 @@ TEST(OpenclDevice, CopyDataToFrom) { TEST(OpenclDevice, DuplicateDataOnDevice) { OpenclDevice dev; CppCPU host; - + Block* a = host.NewBlock(4); Block* b = dev.NewBlock(4); Block* c = dev.NewBlock(4); Block* d = host.NewBlock(4); - + // Allocate the Block object on the host. char s[] = {'a', 'b', 'c', 'x'}; host.CopyDataFromHostPtr(a, s, 4); - + // Copy to device and duplicate. dev.CopyDataToFrom(b, a, 4, singa::kHostToDevice); dev.CopyDataToFrom(c, b, 4, singa::kDeviceToDevice); dev.CopyDataToFrom(d, c, 4, singa::kDeviceToHost); - + const char* astr = static_cast<const char*>(d->data()); EXPECT_EQ('a', astr[0]); EXPECT_EQ('b', astr[1]); @@ -150,7 +150,7 @@ TEST(OpenclDevice, DuplicateDataOnDevice) { TEST_F(OpenCL_TensorMath, CopyDataToDevice) { tf4in.ToHost(); const float* out = tf4in.data<float>(); - + EXPECT_EQ(1.0f, out[1]); EXPECT_EQ(3.0f, out[3]); } @@ -158,10 +158,10 @@ TEST_F(OpenCL_TensorMath, CopyDataToDevice) { TEST_F(OpenCL_TensorMath, MemberAbs) { tf4in = Abs(tf4in); - + tf4in.ToHost(); const float* out = tf4in.data<float>(); - + EXPECT_EQ(0.0f, out[0]); EXPECT_EQ(1.0f, out[1]); EXPECT_EQ(2.0f, out[2]); @@ -171,10 +171,10 @@ TEST_F(OpenCL_TensorMath, MemberAbs) { TEST_F(OpenCL_TensorMath, MemberExp) { tf4in = Exp(tf4in); - + tf4in.ToHost(); const float* out = tf4in.data<float>(); - + EXPECT_NEAR(exp(0.0f), out[0], 1e-5); EXPECT_NEAR(exp(1.0f), out[1], 1e-5); EXPECT_NEAR(exp(2.0f), out[2], 1e-5); @@ -184,10 +184,10 @@ TEST_F(OpenCL_TensorMath, MemberExp) { TEST_F(OpenCL_TensorMath, MemberLog) { tf4in = Log(tf4in); - + tf4in.ToHost(); const float* out = tf4in.data<float>(); - + // EXPECT_NEAR(log(0.0f), out[0], 1e-5); // Evaluates to neg infinity. EXPECT_NEAR(log(1.0f), out[1], 1e-5); EXPECT_NEAR(log(2.0f), out[2], 1e-5); @@ -198,10 +198,10 @@ TEST_F(OpenCL_TensorMath, MemberLog) { TEST_F(OpenCL_TensorMath, MemberReLU) { tf4in -= 2.0f; Tensor result = ReLU(tf4in); - + result.ToHost(); const float* out = result.data<float>(); - + EXPECT_NEAR(0.0f, out[0], 1e-5); EXPECT_NEAR(0.0f, out[1], 1e-5); EXPECT_NEAR(0.0f, out[2], 1e-5); @@ -211,10 +211,10 @@ TEST_F(OpenCL_TensorMath, MemberReLU) { TEST_F(OpenCL_TensorMath, MemberSigmoid) { tf4in = Sigmoid(tf4in); - + tf4in.ToHost(); const float* out = tf4in.data<float>(); - + EXPECT_NEAR(1.0f / (1.0f + exp(-0.0f)), out[0], 1e-5); EXPECT_NEAR(1.0f / (1.0f + exp(-1.0f)), out[1], 1e-5); EXPECT_NEAR(1.0f / (1.0f + exp(-2.0f)), out[2], 1e-5); @@ -223,10 +223,10 @@ TEST_F(OpenCL_TensorMath, MemberSigmoid) { TEST_F(OpenCL_TensorMath, MemberSign) { tf4in -= 1.0f; - + tf4in.ToHost(); const float* out = tf4in.data<float>(); - + EXPECT_NEAR(-1.0f, out[0], 1e-5); EXPECT_NEAR(0.0f, out[1], 1e-5); EXPECT_NEAR(1.0f, out[2], 1e-5); @@ -236,10 +236,10 @@ TEST_F(OpenCL_TensorMath, MemberSign) { TEST_F(OpenCL_TensorMath, MemberSqrt) { tf4in = Sqrt(tf4in); - + tf4in.ToHost(); const float* out = tf4in.data<float>(); - + EXPECT_NEAR(0.0f, out[0], 1e-5); EXPECT_NEAR(1.0f, out[1], 1e-5); EXPECT_NEAR(sqrt(2.0f), out[2], 1e-5); @@ -249,10 +249,10 @@ TEST_F(OpenCL_TensorMath, MemberSqrt) { TEST_F(OpenCL_TensorMath, MemberSquare) { tf4in = Square(tf4in); - + tf4in.ToHost(); const float* out = tf4in.data<float>(); - + EXPECT_NEAR(0.0f, out[0], 1e-5); EXPECT_NEAR(1.0f, out[1], 1e-5); EXPECT_NEAR(4.0f, out[2], 1e-5); @@ -262,10 +262,10 @@ TEST_F(OpenCL_TensorMath, MemberSquare) { TEST_F(OpenCL_TensorMath, MemberTanh) { tf4in = Tanh(tf4in); - + tf4in.ToHost(); const float* out = tf4in.data<float>(); - + EXPECT_NEAR(0.0f, out[0], 1e-5); EXPECT_NEAR(tanh(1.0f), out[1], 1e-5); EXPECT_NEAR(tanh(2.0f), out[2], 1e-5); @@ -278,7 +278,7 @@ TEST_F(OpenCL_TensorMath, Sum) { result.ToHost(); const float* out = result.data<float>(); - + EXPECT_NEAR(0.0f, out[0], 1e-5); EXPECT_NEAR(1.0f, out[1], 1e-5); EXPECT_NEAR(2.0f, out[2], 1e-5); @@ -287,10 +287,10 @@ TEST_F(OpenCL_TensorMath, Sum) { TEST_F(OpenCL_TensorMath, MemberLT) { Tensor result = tf4in < 2.0f; - + result.ToHost(); const float* out = result.data<float>(); - + EXPECT_FLOAT_EQ(1.0f, out[0]); EXPECT_FLOAT_EQ(1.0f, out[1]); EXPECT_FLOAT_EQ(0.0f, out[2]); @@ -300,10 +300,10 @@ TEST_F(OpenCL_TensorMath, MemberLT) { TEST_F(OpenCL_TensorMath, MemberLE) { Tensor result = tf4in <= 2.0f; - + result.ToHost(); const float* out = result.data<float>(); - + EXPECT_FLOAT_EQ(1.0f, out[0]); EXPECT_FLOAT_EQ(1.0f, out[1]); EXPECT_FLOAT_EQ(1.0f, out[2]); @@ -313,10 +313,10 @@ TEST_F(OpenCL_TensorMath, MemberLE) { TEST_F(OpenCL_TensorMath, MemberGT) { Tensor result = tf4in > 2.0f; - + result.ToHost(); const float* out = result.data<float>(); - + EXPECT_FLOAT_EQ(0.0f, out[0]); EXPECT_FLOAT_EQ(0.0f, out[1]); EXPECT_FLOAT_EQ(0.0f, out[2]); @@ -326,10 +326,10 @@ TEST_F(OpenCL_TensorMath, MemberGT) { TEST_F(OpenCL_TensorMath, MemberGE) { Tensor result = tf4in >= 2.0f; - + result.ToHost(); const float* out = result.data<float>(); - + EXPECT_FLOAT_EQ(0.0f, out[0]); EXPECT_FLOAT_EQ(0.0f, out[1]); EXPECT_FLOAT_EQ(1.0f, out[2]); @@ -342,17 +342,17 @@ TEST_F(OpenCL_TensorMath, MemberPow) { result.ToHost(); const float* out = result.data<float>(); - + EXPECT_FLOAT_EQ(0.0f, out[0]); EXPECT_FLOAT_EQ(1.0f, out[1]); EXPECT_FLOAT_EQ(4.0f, out[2]); EXPECT_FLOAT_EQ(9.0f, out[3]); - + result = Pow(tf4in, tf4in); - + result.ToHost(); const float* out1 = result.data<float>(); - + EXPECT_FLOAT_EQ(1.0f, out1[0]); // 0 ^ 0 is 1, apparently. EXPECT_FLOAT_EQ(1.0f, out1[1]); EXPECT_FLOAT_EQ(4.0f, out1[2]); @@ -365,17 +365,17 @@ TEST_F(OpenCL_TensorMath, MemberSub) { result.ToHost(); const float* out = result.data<float>(); - + EXPECT_FLOAT_EQ(0.0f, out[0]); EXPECT_FLOAT_EQ(1.0f, out[1]); EXPECT_FLOAT_EQ(2.0f, out[2]); EXPECT_FLOAT_EQ(3.0f, out[3]); - + result = tf4in - 0.0f; result.ToHost(); const float* out1 = result.data<float>(); - + EXPECT_FLOAT_EQ(0.0f, out1[0]); EXPECT_FLOAT_EQ(1.0f, out1[1]); EXPECT_FLOAT_EQ(2.0f, out1[2]); @@ -388,17 +388,17 @@ TEST_F(OpenCL_TensorMath, MemberEltwiseMult) { result.ToHost(); const float* out = result.data<float>(); - + EXPECT_FLOAT_EQ(0.0f, out[0]); EXPECT_FLOAT_EQ(0.0f, out[1]); EXPECT_FLOAT_EQ(0.0f, out[2]); EXPECT_FLOAT_EQ(0.0f, out[3]); - + result = tf4in * 10.0f; result.ToHost(); const float* out1 = result.data<float>(); - + EXPECT_FLOAT_EQ(0.0f, out1[0]); EXPECT_FLOAT_EQ(10.0f, out1[1]); EXPECT_FLOAT_EQ(20.0f, out1[2]); @@ -411,27 +411,27 @@ TEST_F(OpenCL_TensorMath, MemberDiv) { result.ToHost(); const float* out = result.data<float>(); - + // EXPECT_FLOAT_EQ(0.0f, out[0]); // Divide by zero. EXPECT_FLOAT_EQ(1.0f, out[1]); EXPECT_FLOAT_EQ(1.0f, out[2]); EXPECT_FLOAT_EQ(1.0f, out[3]); - + result = tf4in / 10.0f; result.ToHost(); const float* out1 = result.data<float>(); - + EXPECT_FLOAT_EQ(0.0f, out1[0]); EXPECT_FLOAT_EQ(0.1f, out1[1]); EXPECT_FLOAT_EQ(0.2f, out1[2]); EXPECT_FLOAT_EQ(0.3f, out1[3]); - + result = Div(10.0f, tf4in); result.ToHost(); const float* out2 = result.data<float>(); - + // EXPECT_FLOAT_EQ(0.0f, out[0]); // Divide by 0. EXPECT_FLOAT_EQ(10.0f, out2[1]); EXPECT_FLOAT_EQ(5.0f, out2[2]); @@ -446,7 +446,7 @@ TEST_F(OpenCL_TensorMath, Bernoulli) { const float p = 0.3f; Bernoulli(p, &empty10k); - + empty10k.ToHost(); const float* out = empty10k.data<float>(); @@ -454,53 +454,53 @@ TEST_F(OpenCL_TensorMath, Bernoulli) { for (int i = 0; i < 10000; i++) sum += out[i]; float mean = sum / 10000; - + EXPECT_NEAR(mean, p, 1e-2); - + sum = 0.0f; for (int i = 0; i < 10000; i++) sum += (out[i] - mean) * (out[i] - mean); float variance = sum / 9999; - + EXPECT_NEAR(variance, p * (1 - p), 1e-2); } TEST_F(OpenCL_TensorMath, Gaussian) { Gaussian(0.0f, 1.0f, &empty10k); - + empty10k.ToHost(); const float* out = empty10k.data<float>(); - + float sum = 0.0f; for (int i = 0; i < 10000; i++) sum += out[i]; float mean = sum / 10000; - + EXPECT_NEAR(mean, 0.0f, 1e-2); - + sum = 0.0f; for (int i = 0; i < 10000; i++) sum += (out[i] - mean) * (out[i] - mean); float variance = sum / 9999; - + EXPECT_NEAR(variance, 1.0f, 1e-2); } TEST_F(OpenCL_TensorMath, Uniform) { Uniform(0.1f, 0.2f, &empty10k); - + empty10k.ToHost(); const float* out = empty10k.data<float>(); - + float sum = 0.0f; for (int i = 0; i < 10000; i++) sum += out[i]; float mean = sum / 10000; - + EXPECT_NEAR(mean, 0.15f, 1e-2); - + sum = 0.0f; for (int i = 0; i < 10000; i++) sum += (out[i] - mean) * (out[i] - mean); float variance = sum / 9999; - + EXPECT_NEAR(variance, 0.01f, 1e-2); } @@ -514,38 +514,38 @@ TEST_F(OpenCL_TensorMath, EltwiseAdd) { result.ToHost(); const float* out = result.data<float>(); - + EXPECT_EQ(0.0f, out[0]); EXPECT_EQ(2.0f, out[1]); EXPECT_EQ(4.0f, out[2]); EXPECT_EQ(6.0f, out[3]); - + result = tf4in + tf4zin; result.ToHost(); const float* out1 = result.data<float>(); - + EXPECT_EQ(0.0f, out1[0]); EXPECT_EQ(1.0f, out1[1]); EXPECT_EQ(2.0f, out1[2]); EXPECT_EQ(3.0f, out1[3]); - + result = Tensor(tf4in.shape(), tf4in.device(), tf4in.data_type()); Add(tf4in, tf4in, &result); result.ToHost(); const float* out2 = result.data<float>(); - + EXPECT_EQ(0.0f, out2[0]); EXPECT_EQ(2.0f, out2[1]); EXPECT_EQ(4.0f, out2[2]); EXPECT_EQ(6.0f, out2[3]); - + result = tf4in + 1.0f; - + result.ToHost(); const float* out3 = result.data<float>(); - + EXPECT_EQ(1.0f, out3[0]); EXPECT_EQ(2.0f, out3[1]); EXPECT_EQ(3.0f, out3[2]); @@ -556,10 +556,10 @@ TEST_F(OpenCL_TensorMath, EltwiseAdd) { TEST_F(OpenCL_TensorMath, SetValue) { const float one_third = 1.0f / 3.0f; empty10k.SetValue(one_third); - + empty10k.ToHost(); const float* out = empty10k.data<float>(); - + EXPECT_EQ(one_third, out[0]); EXPECT_EQ(one_third, out[1]); EXPECT_EQ(one_third, out[1024]); @@ -571,10 +571,10 @@ TEST_F(OpenCL_TensorMath, SetValue) { TEST_F(OpenCL_TensorMath, Axpy) { Axpy(10.0f, tf4in, &tf4in); - + tf4in.ToHost(); const float* out = tf4in.data<float>(); - + EXPECT_EQ(0.0f, out[0]); // 0 * 10 + 0 = 0 EXPECT_EQ(11.0f, out[1]); // 1 * 10 + 1 = 11 EXPECT_EQ(22.0f, out[2]); // 2 * 10 + 2 = 22 @@ -583,39 +583,39 @@ TEST_F(OpenCL_TensorMath, Axpy) { TEST_F(OpenCL_TensorMath, Mult) { Tensor result = Mult(tf4in, tf4zin.T()); // Multiply with zero. - + result.ToHost(); const float* out = result.data<float>(); - + EXPECT_EQ(0.0f, out[0]); // 1x4 * 4x1 = 1x1. - + result = Mult(tf4in, tf4in.T()); - + result.ToHost(); const float* out0 = result.data<float>(); - + EXPECT_EQ(14.0f, out0[0]); // 1x4 * 4x1 = 1x1. - + tf16zin.SetValue(10.0f); // Multiply with 10.0. result = Mult(tf16in, tf16zin); // 4x4 * 4x4 = 4x4. - + result.ToHost(); const float* out1 = result.data<float>(); EXPECT_EQ(240.0f, out1[0]); EXPECT_EQ(280.0f, out1[1]); EXPECT_EQ(320.0f, out1[2]); EXPECT_EQ(360.0f, out1[3]); - + EXPECT_EQ(240.0f, out1[4]); EXPECT_EQ(280.0f, out1[5]); EXPECT_EQ(320.0f, out1[6]); EXPECT_EQ(360.0f, out1[7]); - + EXPECT_EQ(240.0f, out1[8]); EXPECT_EQ(280.0f, out1[9]); EXPECT_EQ(320.0f, out1[10]); EXPECT_EQ(360.0f, out1[11]); - + EXPECT_EQ(240.0f, out1[12]); EXPECT_EQ(280.0f, out1[13]); EXPECT_EQ(320.0f, out1[14]); @@ -625,3 +625,5 @@ TEST_F(OpenCL_TensorMath, Mult) { // TODO: ComputeCrossEntropy, SoftmaxCrossEntropy +// +#endif // USE_OPENCL
