[14/19] incubator-singa git commit: SINGA-100 Implement layers using CUDNN for GPU training

wangsh Wed, 16 Dec 2015 04:12:07 -0800

SINGA-100 Implement layers using CUDNN for GPU training

Fix a bug:
a failed test when running "make test",caused by src/test/test_math.cc:349.
modified file:
math_kernel.cu: singa_gpu_sum_row() and singa_gpu_sum_col() set
threads_per_block and num_blocks wrong value



Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/05680dd1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/05680dd1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/05680dd1

Branch: refs/heads/master
Commit: 05680dd174297cd7be8447aacf334299724c6575
Parents: 15b23a6
Author: seaok <[email protected]>
Authored: Fri Dec 11 17:21:07 2015 +0800
Committer: seaok <[email protected]>
Committed: Fri Dec 11 17:21:07 2015 +0800

----------------------------------------------------------------------
 src/test/test_math.cc    | 7 ++++---
 src/utils/math_kernel.cu | 8 ++++----
 2 files changed, 8 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05680dd1/src/test/test_math.cc
----------------------------------------------------------------------
diff --git a/src/test/test_math.cc b/src/test/test_math.cc
index 0b9f0ff..6583b3d 100644
--- a/src/test/test_math.cc
+++ b/src/test/test_math.cc
@@ -313,14 +313,15 @@ TEST(MathTest, TestDotGPU) {
   cudaFree(B_gpu);
 }
 
-TEST(MathTest, TestSingaSumColGPU) {
+TEST(MathTest, TestSingaSumRowGPU) {
   float A[3][4];
   float B[4];
   float C[4];
 
   for (int i = 0; i < 3; i++) {
     for (int j = 0; j < 4; j++) {
-      A[i][j] = i + j;
+      //A[i][j] = i + j;
+         A[i][j] = 1.0f;
     }
   }
 
@@ -335,7 +336,7 @@ TEST(MathTest, TestSingaSumColGPU) {
   cudaMalloc(reinterpret_cast<void**>(&A_gpu), 12*sizeof(float));
   cudaMalloc(reinterpret_cast<void**>(&B_gpu), 4*sizeof(float));
   cudaMemcpy(A_gpu, A, 12*sizeof(float), cudaMemcpyHostToDevice);
-  //singa_gpu_sum_row(A_gpu, B_gpu, 3, 4, 4);
+  singa_gpu_sum_row(A_gpu, B_gpu, 3, 4, 4);
 
   cudaMemcpy(B, B_gpu, 4*sizeof(float), cudaMemcpyDeviceToHost);
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/05680dd1/src/utils/math_kernel.cu
----------------------------------------------------------------------
diff --git a/src/utils/math_kernel.cu b/src/utils/math_kernel.cu
index 3650c09..fe543d7 100644
--- a/src/utils/math_kernel.cu
+++ b/src/utils/math_kernel.cu
@@ -339,21 +339,21 @@ void singa_gpu_sum_vec(float *data, float *sum , int n) {
   kernel_sum_vec<<<num_blocks, threads_per_block>>>(data, sum, n);
 }
 
-void singa_gpu_sum_col(const float *src_mat_data, float *dst_vec_data,
+void singa_gpu_sum_row(const float *src_mat_data, float *dst_vec_data,
     int rows, int cols, int stride) {
   int threads_per_block = rows > CU1DBLOCK ? CU1DBLOCK : rows;
   int num_blocks = cols;
 
-  kernel_sum_col<<<num_blocks, threads_per_block>>>(src_mat_data,
+  kernel_sum_row<<<num_blocks, threads_per_block>>>(src_mat_data,
       dst_vec_data, rows, cols, stride);
 }
 
-void singa_gpu_sum_row(const float *src_mat_data, float *dst_vec_data,
+void singa_gpu_sum_col(const float *src_mat_data, float *dst_vec_data,
     int rows, int cols, int stride) {
   int threads_per_block = cols > CU1DBLOCK ? CU1DBLOCK : cols;
   int num_blocks = rows;
 
-  kernel_sum_row<<<num_blocks, threads_per_block>>>(src_mat_data,
+  kernel_sum_col<<<num_blocks, threads_per_block>>>(src_mat_data,
       dst_vec_data, rows, cols, stride);
 }

[14/19] incubator-singa git commit: SINGA-100 Implement layers using CUDNN for GPU training

Reply via email to