SINGA-104 Add Context Class check with cpplint
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b2cfa17b Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b2cfa17b Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b2cfa17b Branch: refs/heads/master Commit: b2cfa17b8564dff993d195b0dd89be0bad0813a6 Parents: e3bda08 Author: WANG Sheng <[email protected]> Authored: Thu Nov 26 19:30:42 2015 +0800 Committer: WANG Sheng <[email protected]> Committed: Thu Nov 26 19:30:42 2015 +0800 ---------------------------------------------------------------------- include/singa/utils/context.h | 74 ++-- src/test/test_context.cc | 35 +- src/test/test_math.cc | 747 ++++++++++++++++++------------------- src/test/test_msg.cc | 2 +- src/test/test_paramslicer.cc | 2 +- 5 files changed, 423 insertions(+), 437 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/include/singa/utils/context.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h index 5066633..905b810 100644 --- a/include/singa/utils/context.h +++ b/include/singa/utils/context.h @@ -22,22 +22,19 @@ #ifndef SINGA_UTILS_CONTEXT_H_ #define SINGA_UTILS_CONTEXT_H_ -#include <vector> -#include <random> +#include <glog/logging.h> #include <chrono> +#include <random> #include <thread> #include <unordered_map> -#include <glog/logging.h> - +#include <vector> #ifdef USE_GPU #include "singa/utils/cuda_utils.h" #endif - namespace singa { - /** * Context is used as a global singleton, which stores the mapping from CPU * thread id to GPU device id. If a thread has no GPU, then its associated @@ -52,30 +49,29 @@ class Context { /** * Destructor, release random generators and handlers. */ - ~Context() { + ~Context() { #ifdef USE_GPU - for (auto& entry : device_id_) { - if (entry.second != -1) { - cudaSetDevice(entry.second); - if (cublas_handle_[entry.second] != nullptr) { - cublasDestroy(cublas_handle_[entry.second]); - cublas_handle_[entry.second] = nullptr; - } - if(curand_generator_[entry.second] != nullptr) { - curandDestroyGenerator(curand_generator_[entry.second]); - curand_generator_[entry.second] = nullptr; - } - } - } + for (auto& entry : device_id_) { + if (entry.second != -1) { + cudaSetDevice(entry.second); + if (cublas_handle_[entry.second] != nullptr) { + cublasDestroy(cublas_handle_[entry.second]); + cublas_handle_[entry.second] = nullptr; + } + if (curand_generator_[entry.second] != nullptr) { + curandDestroyGenerator(curand_generator_[entry.second]); + curand_generator_[entry.second] = nullptr; + } + } + } #endif - for (auto& entry : rand_generator_) { - if (entry.second != nullptr) { - delete entry.second; - entry.second = nullptr; - } - } - - } + for (auto& entry : rand_generator_) { + if (entry.second != nullptr) { + delete entry.second; + entry.second = nullptr; + } + } + } /** * Constructor, init handlers and GPU rand generators to nullptr. */ @@ -90,12 +86,12 @@ class Context { * @return the ID of the device attached to a given CPU thread, or -1 if this * thread has not been attached GPU device. */ - int device_id(const std::thread::id& tid) { + int device_id(const std::thread::id& tid) { if (device_id_.find(tid) != device_id_.end()) return device_id_[tid]; else return -1; - } + } /** * Setup the CPU thread, which may be assigned a GPU device. * If there is no GPU device, then set did to -1. @@ -168,7 +164,7 @@ class Context { /** * Get the rand generator of the GPU device assigned to the given thread. */ - curandGenerator_t curand_generator(const std::thread::id thread_id) { + curandGenerator_t curand_generator(const std::thread::id thread_id) { return curand_generator(device_id(thread_id)); } /** @@ -177,10 +173,10 @@ class Context { * @return random generator. If it does not exist, then create one. * The random seed will be set to CURAND_RNG_PSEUDO_DEFAULT if it is not set. */ - curandGenerator_t curand_generator(const int device_id) { + curandGenerator_t curand_generator(const int device_id) { CHECK_GE(device_id, 0); if (curand_generator_.at(device_id) == nullptr) { - // TODO handle user set seed + // TODO(wangwei) handle user set seed /* CHECK(seed_.find(tid) != seed_.end()); auto seed = seed_[tid]; @@ -189,8 +185,8 @@ class Context { curandCreateGenerator(&curand_generator_[device_id], CURAND_RNG_PSEUDO_DEFAULT); } - return curand_generator_[device_id]; - } + return curand_generator_[device_id]; + } #endif @@ -198,19 +194,19 @@ class Context { //!< max num of GPUs per process const int kMaxNumGPU = 64; //!< map from thread id to device id - std::unordered_map<std::thread::id, int> device_id_; + std::unordered_map<std::thread::id, int> device_id_; //!< map from thread id to cpu rand generator std::unordered_map<std::thread::id, std::mt19937 *> rand_generator_; //!< map from thread id to cpu rand generator seed std::unordered_map<std::thread::id, int> seed_; #ifdef USE_GPU //!< cublas handler indexed by GPU device ID - std::vector<cublasHandle_t> cublas_handle_; + std::vector<cublasHandle_t> cublas_handle_; //!< cublas rand generator indexed by GPU device ID - std::vector<curandGenerator_t> curand_generator_; + std::vector<curandGenerator_t> curand_generator_; #endif }; } // namespace singa -#endif // SINGA_UTILS_MATH_ADDR_H_ +#endif // SINGA_UTILS_CONTEXT_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/src/test/test_context.cc ---------------------------------------------------------------------- diff --git a/src/test/test_context.cc b/src/test/test_context.cc index 5e501b9..70f6d07 100644 --- a/src/test/test_context.cc +++ b/src/test/test_context.cc @@ -1,3 +1,24 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + #include <thread> #include "gtest/gtest.h" #include "singa/utils/singleton.h" @@ -25,17 +46,17 @@ TEST(ContextTest, TestHandle) { float A[12]; float B[12]; - for(int i = 0; i < 12; i++) { - A[i]=i-1; - B[i]=i+1; + for (int i = 0; i < 12; i++) { + A[i] = i - 1; + B[i] = i + 1; } float* A_gpu = NULL; float* B_gpu = NULL; context->SetupDevice(std::this_thread::get_id(), 0); - cudaMalloc((void**)&A_gpu, 12 * sizeof(float)); - cudaMalloc((void**)&B_gpu, 12 * sizeof(float)); + cudaMalloc(reinterpret_cast<void**>(&A_gpu), 12 * sizeof(float)); + cudaMalloc(reinterpret_cast<void**>(&B_gpu), 12 * sizeof(float)); cudaMemcpy(A_gpu, A, 12 * sizeof(float), cudaMemcpyHostToDevice); cudaMemcpy(B_gpu, B, 12 * sizeof(float), cudaMemcpyHostToDevice); @@ -44,11 +65,11 @@ TEST(ContextTest, TestHandle) { cublasSdot(handle, 12, A_gpu, 1, B_gpu, 1, &gpu_ret); - for(int i = 0; i < 12;++i) { + for (int i = 0; i < 12; ++i) { cpu_ret += A[i] * B[i]; } - ASSERT_EQ(gpu_ret,cpu_ret); + ASSERT_EQ(gpu_ret, cpu_ret); cudaFree(A_gpu); cudaFree(B_gpu); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/src/test/test_math.cc ---------------------------------------------------------------------- diff --git a/src/test/test_math.cc b/src/test/test_math.cc index 8f8c633..39ec2a0 100644 --- a/src/test/test_math.cc +++ b/src/test/test_math.cc @@ -1,3 +1,24 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + #include "gtest/gtest.h" #include "singa/utils/math_addr.h" #include "singa/utils/math_kernel.h" @@ -12,459 +33,407 @@ using namespace singa; using namespace std; TEST(MathTest, TestGemmCPU) { - float A[3][2] = {}; - float B[3][2] = {}; - float C[2][2] = {}; - for(int i = 0; i < 3; i++) - for(int j = 0; j < 2; j++) - { - A[i][j] = i+j; - B[i][j] = i+j - i*j; - } - cpu_gemm(A[0], B[0], 2, 2, 3 , 1.0f, 0.0f, true, false, C[0]); - float D[2][2] = {}; - for(int i = 0; i < 2; i++) - for(int j = 0; j < 2; j++) - { - D[i][j] = 0; - for(int k = 0; k < 3; k++) - D[i][j] += A[k][i]*B[k][j]; - } - for(int i = 0; i < 2; i++) - for(int j = 0; j < 2; j++) - { - ASSERT_EQ(C[i][j], D[i][j]); - } + float A[3][2] = {}; + float B[3][2] = {}; + float C[2][2] = {}; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 2; j++) { + A[i][j] = i+j; + B[i][j] = i+j - i*j; + } + cpu_gemm(A[0], B[0], 2, 2, 3 , 1.0f, 0.0f, true, false, C[0]); + float D[2][2] = {}; + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) { + D[i][j] = 0; + for (int k = 0; k < 3; k++) + D[i][j] += A[k][i]*B[k][j]; + } + for (int i = 0; i < 2; i++) + for (int j = 0; j < 2; j++) { + ASSERT_EQ(C[i][j], D[i][j]); + } } TEST(MathTest, TestGemvCPU) { - float A[4][3] = {}; - float B[4]= {}; - float C[3] = {}; - float D[3] = {}; - - for(int i = 0; i < 3; i++) - { - for(int j = 0; j < 4; j++) - { - A[j][i] = i-j + i*j; - } - } - - for(int i = 0; i < 4; i++)B[i] = i; - for(int i = 0; i < 3; i++)C[i] = 10; - cpu_gemv(A[0], B, 4, 3, 1.0f, 1.0f, true, C); - - for(int i = 0; i < 3; i++) - { - for(int j = 0; j < 4; j++) - { - D[i] += A[j][i]*B[j]; - } - } - for(int i = 0; i < 3; i++) - { - ASSERT_EQ(C[i], D[i]+10); - } + float A[4][3] = {}; + float B[4]= {}; + float C[3] = {}; + float D[3] = {}; + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 4; j++) { + A[j][i] = i-j + i*j; + } + } + + for (int i = 0; i < 4; i++)B[i] = i; + for (int i = 0; i < 3; i++)C[i] = 10; + cpu_gemv(A[0], B, 4, 3, 1.0f, 1.0f, true, C); + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 4; j++) { + D[i] += A[j][i]*B[j]; + } + } + for (int i = 0; i < 3; i++) { + ASSERT_EQ(C[i], D[i]+10); + } } TEST(MathTest, TestAxpyCPU) { - float A[4][3] = {}; - float C[4][3] = {}; - float B[3][4] = {}; - float D[3][4] = {}; - - for(int i = 0; i < 4; i++) - { - for(int j = 0; j < 3; j++) - { - A[i][j] = i-j + i*j; - B[j][i] = i-j + i*j; - C[i][j] = A[i][j]; - D[j][i] = B[j][i]; - } - } - - cpu_axpy(A[0], 12, 2.0f, B[0]); - for(int i = 0; i < 12; i++) - { - D[i / 4][i % 4] += 2*C[i / 3][i % 3]; - } - - for(int i = 0; i < 3; i++) - { - for(int j = 0; j < 4; j++) - { - ASSERT_EQ(B[i][j],D[i][j]); - } - } + float A[4][3] = {}; + float C[4][3] = {}; + float B[3][4] = {}; + float D[3][4] = {}; + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 3; j++) { + A[i][j] = i-j + i*j; + B[j][i] = i-j + i*j; + C[i][j] = A[i][j]; + D[j][i] = B[j][i]; + } + } + + cpu_axpy(A[0], 12, 2.0f, B[0]); + for (int i = 0; i < 12; i++) { + D[i / 4][i % 4] += 2*C[i / 3][i % 3]; + } + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 4; j++) { + ASSERT_EQ(B[i][j], D[i][j]); + } + } } /* TEST(MathTest, TestEopCPU) { - float A[10] = {}; - float B[10] = {}; - float C[10] = {}; - float O[10] = {}; - - for(int i = 0; i < 10; i++) - { - A[i] = i; - B[i] = -i; - C[i] = i; - - } - cpu_e_f<singa::op::Set>(5, 15.0f, O, O); - for(int i = 0; i < 5; i++) - { - ASSERT_EQ(O[i]-15,0); - } - for(int i = 5; i < 10; i++) - { - ASSERT_EQ(O[i],0); - } + float A[10] = {}; + float B[10] = {}; + float C[10] = {}; + float O[10] = {}; + + for (int i = 0; i < 10; i++) { + A[i] = i; + B[i] = -i; + C[i] = i; + } + cpu_e_f<singa::op::Set>(5, 15.0f, O, O); + for (int i = 0; i < 5; i++) { + ASSERT_EQ(O[i]-15,0); + } + for (int i = 5; i < 10; i++) { + ASSERT_EQ(O[i],0); + } } */ #ifdef USE_GPU TEST(MathTest, TestGemmGPU) { - float A[3][2] = {}; - float B[3][2] = {}; - float C[2][2] = {}; - for(int i = 0; i < 3; i++) - { - for(int j = 0; j < 2; j++) - { - A[i][j] = i+j; - B[i][j] = i+j - i*j; - } - } - - float* A_gpu=NULL; - float* B_gpu=NULL; - float* C_gpu=NULL; - - cudaMalloc((void**)&A_gpu, 3*2*sizeof(float)); - cudaMalloc((void**)&B_gpu, 3*2*sizeof(float)); - cudaMalloc((void**)&C_gpu, 2*2*sizeof(float)); - - cudaMemcpy(A_gpu,A,3*2*sizeof(float),cudaMemcpyHostToDevice); - cudaMemcpy(B_gpu,B,3*2*sizeof(float),cudaMemcpyHostToDevice); - - gpu_gemm<float>(A_gpu, B_gpu, 2, 2, 3 , 1, 0, true, false, C_gpu); - - cudaMemcpy(C,C_gpu,2*2*sizeof(float),cudaMemcpyDeviceToHost); - - float D[2][2] = {}; - for(int i = 0; i < 2; i++) - { - for(int j = 0; j < 2; j++) - { - D[i][j] = 0; - for(int k = 0; k < 3; k++) - { - D[i][j] += A[k][i]*B[k][j]; - } - } - } - - for(int i = 0; i < 2; i++) - { - for(int j = 0; j < 2; j++) - { - ASSERT_EQ(C[i][j],D[i][j]); - } - } - - cudaFree(A_gpu); - cudaFree(B_gpu); - cudaFree(C_gpu); + float A[3][2] = {}; + float B[3][2] = {}; + float C[2][2] = {}; + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 2; j++) { + A[i][j] = i+j; + B[i][j] = i+j - i*j; + } + } + + float* A_gpu = NULL; + float* B_gpu = NULL; + float* C_gpu = NULL; + + cudaMalloc(reinterpret_cast<void**>(&A_gpu), 3*2*sizeof(float)); + cudaMalloc(reinterpret_cast<void**>(&B_gpu), 3*2*sizeof(float)); + cudaMalloc(reinterpret_cast<void**>(&C_gpu), 2*2*sizeof(float)); + + cudaMemcpy(A_gpu, A, 3*2*sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, 3*2*sizeof(float), cudaMemcpyHostToDevice); + + gpu_gemm<float>(A_gpu, B_gpu, 2, 2, 3 , 1, 0, true, false, C_gpu); + + cudaMemcpy(C, C_gpu, 2*2*sizeof(float), cudaMemcpyDeviceToHost); + + float D[2][2] = {}; + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + D[i][j] = 0; + for (int k = 0; k < 3; k++) { + D[i][j] += A[k][i]*B[k][j]; + } + } + } + + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + ASSERT_EQ(C[i][j], D[i][j]); + } + } + + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); } TEST(MathTest, TestGemvGPU) { - float A[4][3] = {}; - float B[4]= {}; - float C[3] = {}; - float D[3] = {}; - - for(int i = 0; i < 4; i++) - { - for(int j = 0; j < 3; j++) - { - A[i][j] = i-j + i*j; - } - } - - for(int i = 0; i < 4; i++)B[i] = i; - for(int i = 0; i < 3; i++)C[i] = 10; - - float* A_gpu=NULL; - float* B_gpu=NULL; - float* C_gpu=NULL; - - cudaMalloc((void**)&A_gpu, 4*3*sizeof(float)); - cudaMalloc((void**)&B_gpu, 4*sizeof(float)); - cudaMalloc((void**)&C_gpu, 3*sizeof(float)); - - cudaMemcpy(A_gpu,A,4*3*sizeof(float),cudaMemcpyHostToDevice); - cudaMemcpy(B_gpu,B,4*sizeof(float),cudaMemcpyHostToDevice); - cudaMemcpy(C_gpu,C,3*sizeof(float),cudaMemcpyHostToDevice); - - gpu_gemv<float>(A_gpu, B_gpu, 4, 3, 1, 1, true, C_gpu); - - cudaMemcpy(C,C_gpu,3*sizeof(float),cudaMemcpyDeviceToHost); - - for(int i = 0; i < 3; i++) - { - for(int j = 0; j < 4; j++) - { - D[i] += A[j][i]*B[j]; - } - } - - for(int i = 0; i < 3; i++) - { - ASSERT_EQ(C[i],D[i]+10); - } - - cudaFree(A_gpu); - cudaFree(B_gpu); - cudaFree(C_gpu); + float A[4][3] = {}; + float B[4]= {}; + float C[3] = {}; + float D[3] = {}; + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 3; j++) { + A[i][j] = i-j + i*j; + } + } + + for (int i = 0; i < 4; i++) B[i] = i; + for (int i = 0; i < 3; i++) C[i] = 10; + + float* A_gpu = NULL; + float* B_gpu = NULL; + float* C_gpu = NULL; + + cudaMalloc(reinterpret_cast<void**>(&A_gpu), 4*3*sizeof(float)); + cudaMalloc(reinterpret_cast<void**>(&B_gpu), 4*sizeof(float)); + cudaMalloc(reinterpret_cast<void**>(&C_gpu), 3*sizeof(float)); + + cudaMemcpy(A_gpu, A, 4*3*sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, 4*sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(C_gpu, C, 3*sizeof(float), cudaMemcpyHostToDevice); + + gpu_gemv<float>(A_gpu, B_gpu, 4, 3, 1, 1, true, C_gpu); + + cudaMemcpy(C, C_gpu, 3*sizeof(float), cudaMemcpyDeviceToHost); + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 4; j++) { + D[i] += A[j][i]*B[j]; + } + } + + for (int i = 0; i < 3; i++) { + ASSERT_EQ(C[i], D[i]+10); + } + + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); } /* TEST(MathTest, TestAxpyGPU) { - float A[4][3] = {}; - float C[4][3] = {}; - float B[3][4] = {}; - float D[3][4] = {}; - - for(int i = 0; i < 4; i++) - { - for(int j = 0; j < 3; j++) - { - A[i][j] = i-j + i*j; - B[j][i] = i-j + i*j; - C[i][j] = A[i][j]; - D[j][i] = B[j][i]; - } - } - - float* A_gpu=NULL; - float* B_gpu=NULL; - - cudaMalloc((void**)&A_gpu, 4*3*sizeof(float)); - cudaMalloc((void**)&B_gpu, 3*4*sizeof(float)); - - cudaMemcpy(A_gpu,A,4*3*sizeof(float),cudaMemcpyHostToDevice); - cudaMemcpy(B_gpu,B,3*4*sizeof(float),cudaMemcpyHostToDevice); - - gpu_axpy<float>(A_gpu, 12, 2, B_gpu); - - cudaMemcpy(A,A_gpu,4*3*sizeof(float),cudaMemcpyDeviceToHost); - cudaMemcpy(B,B_gpu,3*4*sizeof(float),cudaMemcpyDeviceToHost); - - //for(int i = 0; i < 12; i++)D[0][i] += 2*C[0][i]; - - for(int i = 0; i < 4; i++) - { - for(int j = 0; j < 3; j++) - { - D[i][j] += C[i][j]; - ASSERT_EQ(B[i][j],D[i][j]); - } - } - - cudaFree(A_gpu); - cudaFree(B_gpu); + float A[4][3] = {}; + float C[4][3] = {}; + float B[3][4] = {}; + float D[3][4] = {}; + + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 3; j++) + { + A[i][j] = i-j + i*j; + B[j][i] = i-j + i*j; + C[i][j] = A[i][j]; + D[j][i] = B[j][i]; + } + } + + float* A_gpu=NULL; + float* B_gpu=NULL; + + cudaMalloc((void**)&A_gpu, 4*3*sizeof(float)); + cudaMalloc((void**)&B_gpu, 3*4*sizeof(float)); + + cudaMemcpy(A_gpu,A,4*3*sizeof(float),cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu,B,3*4*sizeof(float),cudaMemcpyHostToDevice); + + gpu_axpy<float>(A_gpu, 12, 2, B_gpu); + + cudaMemcpy(A,A_gpu,4*3*sizeof(float),cudaMemcpyDeviceToHost); + cudaMemcpy(B,B_gpu,3*4*sizeof(float),cudaMemcpyDeviceToHost); + + //for (int i = 0; i < 12; i++)D[0][i] += 2*C[0][i]; + + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 3; j++) + { + D[i][j] += C[i][j]; + ASSERT_EQ(B[i][j],D[i][j]); + } + } + + cudaFree(A_gpu); + cudaFree(B_gpu); } */ TEST(MathTest, TestDotGPU) { - float A[12]; - float B[12]; - - for(int i = 0; i < 12; i++) - { - A[i]=i-1; - B[i]=i+1; - } + float A[12]; + float B[12]; - float* A_gpu=NULL; - float* B_gpu=NULL; + for (int i = 0; i < 12; i++) { + A[i] = i - 1; + B[i] = i + 1; + } - cudaMalloc((void**)&A_gpu, 12*sizeof(float)); - cudaMalloc((void**)&B_gpu, 12*sizeof(float)); + float* A_gpu = NULL; + float* B_gpu = NULL; - cudaMemcpy(A_gpu,A,12*sizeof(float),cudaMemcpyHostToDevice); - cudaMemcpy(B_gpu,B,12*sizeof(float),cudaMemcpyHostToDevice); - float gpu_ret=gpu_dot<float>(A_gpu,B_gpu,12); + cudaMalloc(reinterpret_cast<void**>(&A_gpu), 12*sizeof(float)); + cudaMalloc(reinterpret_cast<void**>(&B_gpu), 12*sizeof(float)); - float cpu_ret=0.0f; - for(int i = 0; i < 12; i++) - { - cpu_ret+=A[i]*B[i]; - } + cudaMemcpy(A_gpu, A, 12*sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, 12*sizeof(float), cudaMemcpyHostToDevice); + float gpu_ret = gpu_dot<float>(A_gpu, B_gpu, 12); - ASSERT_EQ(gpu_ret,cpu_ret); + float cpu_ret = 0.0f; + for (int i = 0; i < 12; i++) { + cpu_ret += A[i] * B[i]; + } - cudaFree(A_gpu); - cudaFree(B_gpu); + ASSERT_EQ(gpu_ret, cpu_ret); + cudaFree(A_gpu); + cudaFree(B_gpu); } TEST(MathTest, TestSingaSumColGPU) { + float A[3][4]; + float B[4]; + float C[4]; - float A[3][4]; - float B[4]; - float C[4]; - - for(int i = 0; i < 3; i++) - { - for(int j = 0; j < 4; j++) - { - A[i][j]=i+j; - } - } - - for(int i = 0; i < 4; i++) - { - B[i]=0.0f; - C[i]=0.0f; - } - - float* A_gpu=NULL; - float* B_gpu=NULL; - - cudaMalloc((void**)&A_gpu, 12*sizeof(float)); - cudaMalloc((void**)&B_gpu, 4*sizeof(float)); - cudaMemcpy(A_gpu,A,12*sizeof(float),cudaMemcpyHostToDevice); - - singa_gpu_sum_col(A_gpu,B_gpu,3,4,4); - - cudaMemcpy(B,B_gpu,4*sizeof(float),cudaMemcpyDeviceToHost); - - for(int i = 0; i < 4; i++) - { - for(int j = 0; j < 3; j++) - { - C[i]+=A[j][i]; - } - } - - for(int i = 0; i <4; i++) - { - ASSERT_EQ(B[i],C[i]); - } - - cudaFree(A_gpu); - cudaFree(B_gpu); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 4; j++) { + A[i][j] = i + j; + } + } + + for (int i = 0; i < 4; i++) { + B[i] = 0.0f; + C[i] = 0.0f; + } + + float* A_gpu = NULL; + float* B_gpu = NULL; + + cudaMalloc(reinterpret_cast<void**>(&A_gpu), 12*sizeof(float)); + cudaMalloc(reinterpret_cast<void**>(&B_gpu), 4*sizeof(float)); + cudaMemcpy(A_gpu, A, 12*sizeof(float), cudaMemcpyHostToDevice); + + singa_gpu_sum_col(A_gpu, B_gpu, 3, 4, 4); + + cudaMemcpy(B, B_gpu, 4*sizeof(float), cudaMemcpyDeviceToHost); + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 3; j++) { + C[i] += A[j][i]; + } + } + + for (int i = 0; i < 4; i++) { + ASSERT_EQ(B[i], C[i]); + } + + cudaFree(A_gpu); + cudaFree(B_gpu); } TEST(MathTest, TestSingaAddVecRowGPU) { - - float A[3][4]; - float B[4]; - float C[3][4]; - float D[3][4]; - - for(int i = 0; i < 4; i++) - { - B[i]=i; - } - - for(int i = 0; i < 3; i++) - { - for(int j = 0; j < 4; j++) - { - A[i][j]=i+j; - D[i][j]=A[i][j]+B[j]; - } - } - - - float* A_gpu=NULL; - float* B_gpu=NULL; - float* C_gpu=NULL; - - cudaMalloc((void**)&A_gpu, 3*4*sizeof(float)); - cudaMalloc((void**)&B_gpu, 4*sizeof(float)); - cudaMalloc((void**)&C_gpu, 3*4*sizeof(float)); - cudaMemcpy(A_gpu,A,3*4*sizeof(float),cudaMemcpyHostToDevice); - cudaMemcpy(B_gpu,B,4*sizeof(float),cudaMemcpyHostToDevice); - - singa_gpu_add_vec_row(B_gpu,A_gpu,C_gpu,3,4,4); - - cudaMemcpy(C,C_gpu,3*4*sizeof(float),cudaMemcpyDeviceToHost); - - for(int i = 0; i < 3; i++) - { - for(int j = 0; j < 4; j++) - { - ASSERT_EQ(C[i][j],D[i][j]); - } - } - - cudaFree(A_gpu); - cudaFree(B_gpu); - cudaFree(C_gpu); + float A[3][4]; + float B[4]; + float C[3][4]; + float D[3][4]; + + for (int i = 0; i < 4; i++) { + B[i] = i; + } + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 4; j++) { + A[i][j] = i + j; + D[i][j] = A[i][j] + B[j]; + } + } + + float* A_gpu = NULL; + float* B_gpu = NULL; + float* C_gpu = NULL; + + cudaMalloc(reinterpret_cast<void**>(&A_gpu), 3*4*sizeof(float)); + cudaMalloc(reinterpret_cast<void**>(&B_gpu), 4*sizeof(float)); + cudaMalloc(reinterpret_cast<void**>(&C_gpu), 3*4*sizeof(float)); + cudaMemcpy(A_gpu, A, 3*4*sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, 4*sizeof(float), cudaMemcpyHostToDevice); + + singa_gpu_add_vec_row(B_gpu, A_gpu, C_gpu, 3, 4, 4); + + cudaMemcpy(C, C_gpu, 3*4*sizeof(float), cudaMemcpyDeviceToHost); + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 4; j++) { + ASSERT_EQ(C[i][j], D[i][j]); + } + } + + cudaFree(A_gpu); + cudaFree(B_gpu); + cudaFree(C_gpu); } TEST(MathTest, TestSingaSetValueGPU) { + float A[3][4]; + float* A_gpu = NULL; - float A[3][4]; + cudaMalloc(reinterpret_cast<void**>(&A_gpu), 3*4*sizeof(float)); - float* A_gpu=NULL; + cudaMemcpy(A_gpu, A, 3*4*sizeof(float), cudaMemcpyHostToDevice); - cudaMalloc((void**)&A_gpu, 3*4*sizeof(float)); + singa_gpu_set_value(A_gpu, 4.0, 3*4); - cudaMemcpy(A_gpu,A,3*4*sizeof(float),cudaMemcpyHostToDevice); + cudaMemcpy(A, A_gpu, 3*4*sizeof(float), cudaMemcpyDeviceToHost); - singa_gpu_set_value(A_gpu,4.0,3*4); + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 4; j++) { + ASSERT_EQ(A[i][j], 4.0f); + } + } - cudaMemcpy(A,A_gpu,3*4*sizeof(float),cudaMemcpyDeviceToHost); - - for(int i = 0; i < 3; i++) - { - for(int j = 0; j < 4; j++) - { - ASSERT_EQ(A[i][j],4.0f); - } - } - - cudaFree(A_gpu); + cudaFree(A_gpu); } TEST(MathTest, TestEopGPU) { + float A[10] = {}; + float B[10] = {}; - float A[10] = {}; - float B[10] = {}; - - for(int i = 0; i < 10; i++) - { - A[i] = i; - B[i] = -i; - } + for (int i = 0; i < 10; i++) { + A[i] = i; + B[i] = -i; + } - float* A_gpu=NULL; - float* B_gpu=NULL; + float* A_gpu = NULL; + float* B_gpu = NULL; - cudaMalloc((void**)&A_gpu, 10*sizeof(float)); - cudaMalloc((void**)&B_gpu, 10*sizeof(float)); + cudaMalloc(reinterpret_cast<void**>(&A_gpu), 10*sizeof(float)); + cudaMalloc(reinterpret_cast<void**>(&B_gpu), 10*sizeof(float)); - cudaMemcpy(A_gpu,A,10*sizeof(float),cudaMemcpyHostToDevice); - cudaMemcpy(B_gpu,B,10*sizeof(float),cudaMemcpyHostToDevice); + cudaMemcpy(A_gpu, A, 10*sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(B_gpu, B, 10*sizeof(float), cudaMemcpyHostToDevice); - gpu_e_f<singa::op::Sigmoid<float>, float>(10, A_gpu, B_gpu); + gpu_e_f<singa::op::Sigmoid<float>, float>(10, A_gpu, B_gpu); - cudaFree(A_gpu); - cudaFree(B_gpu); + cudaFree(A_gpu); + cudaFree(B_gpu); } #endif // USE_GPU http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/src/test/test_msg.cc ---------------------------------------------------------------------- diff --git a/src/test/test_msg.cc b/src/test/test_msg.cc index d5d9f20..db83b1c 100644 --- a/src/test/test_msg.cc +++ b/src/test/test_msg.cc @@ -42,7 +42,7 @@ TEST(MsgTest, AddrTest) { } TEST(MsgTest, AddFrameTest) { - int buf[5]={1,2,3,4,5}; + int buf[5] = {1, 2, 3, 4, 5}; Msg msg; msg.AddFrame("abcdefg", 7); msg.AddFrame(buf, sizeof(int) * 5); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b2cfa17b/src/test/test_paramslicer.cc ---------------------------------------------------------------------- diff --git a/src/test/test_paramslicer.cc b/src/test/test_paramslicer.cc index c693da1..bc7dedd 100644 --- a/src/test/test_paramslicer.cc +++ b/src/test/test_paramslicer.cc @@ -25,7 +25,7 @@ using namespace singa; -const int param_size[]={2400,32,25600,32, 51200,64,57600,10}; +const int param_size[] = {2400, 32, 25600, 32, 51200, 64, 57600, 10}; /* class ParamSlicerTest : public ::testing::Test {
