[13/19] incubator-singa git commit: SINGA-100 Implement layers using CUDNN for GPU training

wangsh Wed, 16 Dec 2015 04:12:13 -0800

SINGA-100 Implement layers using CUDNN for GPU training

Support compiling with cudnn related code.
Please run:
    ./configure --enable-cuda --with-cuda=/CUDA/PATH --enable-cudnn 
--with-cudnn=/CUDNN/PATH
to generate makefile.


Fix a bug that CPU compilation will detect nvcc existence.

There exists 1 failed test when running "make test", caused by 
src/test/test_math.cc:349.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/15b23a62
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/15b23a62
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/15b23a62

Branch: refs/heads/master
Commit: 15b23a62f9bb4fbcf53e0b5db446886d997e6e30
Parents: f8be9af
Author: xiezl <[email protected]>
Authored: Fri Dec 11 15:49:18 2015 +0800
Committer: xiezl <[email protected]>
Committed: Fri Dec 11 15:49:18 2015 +0800

----------------------------------------------------------------------
 Makefile.am           | 66 +++++++++++++++++++++++++++++++++++++---------
 configure.ac          | 51 +++++++++++++++++++++++++++++------
 src/test/test_math.cc |  2 +-
 3 files changed, 98 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/15b23a62/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index 470ea8a..1959fb6 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -27,7 +27,7 @@ AUTOMAKE_OPTIONS = foreign subdir-objects
 
 MSHADOW_FLAGS = -DMSHADOW_USE_CUDA=0 -DMSHADOW_USE_CBLAS=1 -DMSHADOW_USE_MKL=0
 DEFAULT_FLAGS = -Wall -pthread -fPIC -std=c++11 -Wno-unknown-pragmas \
-              $(MSHADOW_FLAGS) -DCPU_ONLY=1 -funroll-loops -DTHREADED
+              $(MSHADOW_FLAGS) -funroll-loops -DTHREADED
 
 CFLAGS = $(DEBUG)
 CXXFLAGS = $(DEBUG)
@@ -49,8 +49,17 @@ PROTO_PYS := tool/python/pb2/singa_pb2.py \
              tool/python/pb2/common_pb2.py
 
 CUDA_SRCS := src/utils/math_kernel.cu
+CUDA_HDRS := include/singa/utils/math_kernel.h 
 
-PY_SRCS := tool/python/singa/driver_wrap.cxx
+CUDNN_SRCS := src/neuralnet/loss_layer/cudnn_softmaxloss.cc \
+                         src/neuralnet/neuron_layer/cudnn_softmax.cc \
+                         src/neuralnet/neuron_layer/cudnn_pooling.cc \
+                         src/neuralnet/neuron_layer/cudnn_activation.cc \
+                         src/neuralnet/neuron_layer/cudnn_lrn.cc \
+                         src/neuralnet/neuron_layer/cudnn_convolution.cc
+
+
+PY_SRCS := tool/python/singa/driver_wrap.cxx \
                   src/driver.cc
 
 SINGA_SRCS := src/driver.cc \
@@ -103,7 +112,9 @@ SINGA_SRCS := src/driver.cc \
               src/utils/image_transform.cc
 
 SINGA_HDRS := include/singa.h \
-              include/utils/cluster.h \
+                         include/singa/utils/math_blob.h \
+                         include/singa/utils/math_addr.h \
+                         include/singa/utils/cluster.h \
               include/utils/cluster_rt.h \
               include/utils/param.h \
               include/utils/common.h \
@@ -170,14 +181,20 @@ py_LTLIBRARIES = $(PY_PROGS)
 #lib_LTLIBRARIES = libsinga.la
 libsinga_la_SOURCES = $(PROTO_SRCS) $(SINGA_SRCS)
 libsinga_la_CXXFLAGS = $(DEFAULT_FLAGS) -msse3 -fpermissive 
-I$(top_srcdir)/include
+libsinga_la_LDFLAGS =
 if LMDB
 libsinga_la_CXXFLAGS += -DUSE_LMDB
 endif
 #libsinga_la_LDFLAGS = -I$(top_srcdir)/include
 if DCUDA
-libsinga_la_SOURCES += $(CUDA_SRCS)
+libsinga_la_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)
 libsinga_la_CXXFLAGS += $(CUDA_CFLAGS)
-libsinga_la_LDFLAGS = $(CUDA_LDFLAGS) $(CUDA_LIBS)
+libsinga_la_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS)
+endif
+if DCUDNN
+libsinga_la_SOURCES += $(CUDNN_SRCS)
+libsinga_la_CXXFLAGS += $(CUDNN_CFLAGS)
+libsinga_la_LDFLAGS += $(CUDNN_LDFLAGS) $(CUDNN_LIBS)
 endif
 
 
@@ -197,19 +214,34 @@ singa_LDFLAGS += -llmdb
 endif
 
 if DCUDA
-singa_SOURCES += $(CUDA_SRCS)
+singa_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)
 singa_CXXFLAGS += $(CUDA_CFLAGS)
 singa_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS)
 endif
+if DCUDNN
+singa_SOURCES += $(CUDNN_SRCS)
+singa_CXXFLAGS += $(CUDNN_CFLAGS)
+singa_LDFLAGS += $(CUDNN_LDFLAGS) $(CUDNN_LIBS)
+endif
 
 #bin_PROGRAMS += singatool
-singatool_SOURCES = src/utils/tool.cc
+singatool_SOURCES = src/utils/tool.cc #$(CUDA_SRCS) $(CUDA_HDRS) $(CUDNN_SRCS)
 singatool_CXXFLAGS = -Wall -pthread -fPIC -std=c++11 -MMD -Wno-unknown-pragmas 
\
-                     -funroll-loops -DTHREADED -I$(top_srcdir)/include
+                                        -funroll-loops -DTHREADED 
-I$(top_srcdir)/include $(DEFAULT_FLAGS)
 singatool_LDFLAGS = -lsinga \
                     -lglog  \
                     -lprotobuf \
-                    -lzookeeper_mt
+                    -lzookeeper_mt 
+if DCUDA
+singatool_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)
+singatool_CXXFLAGS += $(CUDA_CFLAGS)
+singatool_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS)
+endif
+if DCUDNN
+singatool_SOURCES += $(CUDNN_SRCS)
+singatool_CXXFLAGS += $(CUDNN_CFLAGS)
+singatool_LDFLAGS += $(CUDNN_LDFLAGS) $(CUDNN_LIBS)
+endif
 
 #lib_LTLIBRARIES += libgtest.la
 libgtest_la_SOURCES = $(GTEST_HDRS) $(GTEST_SRCS)
@@ -236,21 +268,31 @@ singatest_LDFLAGS = -lsinga \
 if LMDB
 singatest_LDFLAGS += -llmdb
 endif
-if DCUDA
-singatest_SOURCES += $(CUDA_SRCS)
+if DCUDA 
+singatest_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)
 singatest_CXXFLAGS += $(CUDA_CFLAGS)
 singatest_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS)
 endif
+if DCUDNN
+singatest_SOURCES += $(CUDNN_SRCS)
+singatest_CXXFLAGS += $(CUDNN_CFLAGS)
+singatest_LDFLAGS += $(CUDNN_LDFLAGS) $(CUDNN_LIBS)
+endif
 
 _driver_la_SOURCES = $(PY_SRCS)
 _driver_la_CXXFLAGS = $(DEFAULT_FLAGS) $(MSHADOW_FLAGS) 
-I$(top_srcdir)/include $(PYFLAGS)
 _driver_la_LDFLAGS = -lsinga -module -shared $(PYLIBS) -avoid-version -rpath 
$(pydir)
 
 if DCUDA
-_driver_la_CXXFLAGS += $(CUDA_CFLAGS)
+_driver_la_CXXFLAGS += $(CUDA_CFLAGS) 
 _driver_la_LDFLAGS += $(CUDA_LDFLAGS) $(CUDA_LIBS)
 endif
 
+if DCUDNN
+_driver_la_CXXFLAGS += $(CUDNN_CFLAGS)
+_driver_la_LDFLAGS += $(CUDNN_LDFLAGS) $(CUDNN_LIBS)
+endif
+
 clean-local:
        rm -rf $(PROTO_SRCS) $(PROTO_HDRS)
        rm -rf $(PROTO_PYS)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/15b23a62/configure.ac
----------------------------------------------------------------------
diff --git a/configure.ac b/configure.ac
index 902beab..85cc5fb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -55,16 +55,16 @@ if test "$cuda_prefix" == "yes"; then
     fi
 fi
 
-AC_MSG_CHECKING([nvcc in $cuda_prefix/bin])
-if test -x "$cuda_prefix/bin/nvcc"; then
-    AC_MSG_RESULT([found])
-    AC_DEFINE_UNQUOTED([NVCC_PATH], ["$cuda_prefix/bin/nvcc"], [Path to nvcc 
binary])
-else
-    AC_MSG_RESULT([not found!])
-    AC_MSG_FAILURE([nvcc was not found in $cuda_prefix/bin])
-fi
 
 if test x"$cudaval" = x"yes"; then
+    AC_MSG_CHECKING([nvcc in $cuda_prefix/bin])
+    if test -x "$cuda_prefix/bin/nvcc"; then
+        AC_MSG_RESULT([found])
+        AC_DEFINE_UNQUOTED([NVCC_PATH], ["$cuda_prefix/bin/nvcc"], [Path to 
nvcc binary])
+    else
+        AC_MSG_RESULT([not found!])
+        AC_MSG_FAILURE([nvcc was not found in $cuda_prefix/bin])
+    fi
     CUDA_CFLAGS="-I$cuda_prefix/include"
     CUDA_LDFLAGS="-L$cuda_prefix/lib64 -L$cuda_prefix/lib"
     CUDA_LIBS="-lcublas -lcudart -lcurand"
@@ -85,12 +85,47 @@ else
     CUDA_LDFLAGS=""
     CUDA_LIBS=""
     NVCC=""
+    DEBUG="-DCPU_ONLY"
 fi
 AC_SUBST(NVCC)
 AC_SUBST(CUDA_LDFLAGS)
 AC_SUBST(CUDA_LIBS)
 AC_SUBST(CUDA_CFLAGS)
 
+# Setup custom CUDA paths
+AC_ARG_ENABLE([cudnn],
+    [AS_HELP_STRING(--enable-cudnn,enable CUDNN support)],
+    [enable_cudnn=yes], [enable_cudnn=no])
+AM_CONDITIONAL(DCUDNN, [test "$enable_cudnn" = "yes"])
+
+AC_ARG_WITH([cudnn],
+    [AS_HELP_STRING([--with-cudnn=PATH], [prefix where CUDNN is installed])],
+    [cudnn_prefix=$cudnnwithval], [cudnn_prefix="/usr/local/cuda"])
+if test "$cudnn_prefix" == "yes"; then
+    if test "$cudnnwithval" == "yes"; then
+        cudnn_prefix="/usr/local/cuda"
+    fi
+fi
+
+if test x"$enable_cudnn" == x"yes"; then
+    CUDNN_CFLAGS="-I$cudnn_prefix/include"
+    CUDNN_LDFLAGS="-L$cudnn_prefix/lib64 -L$cudnn_prefix/lib"
+    CUDNN_LIBS="-lcudnn"
+    DEBUG+=" -DUSE_CUDNN "
+    AC_DEFINE(DCUDNN,[1],[Defined if CUDNN should be used])
+    AC_CHECK_LIB([cudnn], [main], [], [
+        AC_MSG_ERROR([unable to find cudnn library])
+        ])
+else
+    CUDNN_CFLAGS=""
+    CUDNN_LDFLAGS=""
+    CUDNN_LIBS=""
+fi
+
+AC_SUBST(CUDNN_CFLAGS)
+AC_SUBST(CUDNN_LDFLAGS)
+AC_SUBST(CUDNN_LIBS)
+
 # Checks for libraries.
 AC_SEARCH_LIBS([cblas_sgemm], [openblas], [], [
   AC_MSG_ERROR([unable to find cblas_sgemm() function])

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/15b23a62/src/test/test_math.cc
----------------------------------------------------------------------
diff --git a/src/test/test_math.cc b/src/test/test_math.cc
index c2730a4..0b9f0ff 100644
--- a/src/test/test_math.cc
+++ b/src/test/test_math.cc
@@ -335,7 +335,7 @@ TEST(MathTest, TestSingaSumColGPU) {
   cudaMalloc(reinterpret_cast<void**>(&A_gpu), 12*sizeof(float));
   cudaMalloc(reinterpret_cast<void**>(&B_gpu), 4*sizeof(float));
   cudaMemcpy(A_gpu, A, 12*sizeof(float), cudaMemcpyHostToDevice);
-  singa_gpu_sum_by_col(A_gpu, B_gpu, 3, 4, 4);
+  //singa_gpu_sum_row(A_gpu, B_gpu, 3, 4, 4);
 
   cudaMemcpy(B, B_gpu, 4*sizeof(float), cudaMemcpyDeviceToHost);

[13/19] incubator-singa git commit: SINGA-100 Implement layers using CUDNN for GPU training

Reply via email to