SINGA-41:Support single node single GPU training

Fix a buf from the cuda compilation option --- close PTXAS option.
But the performance degrades a lot.
Need to optimize this or replace Mshadow with other libs.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/1770377b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/1770377b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/1770377b

Branch: refs/heads/gpu
Commit: 1770377b93d0c2133271f5824b19d6ef0195efd6
Parents: cb30ead
Author: seaok <[email protected]>
Authored: Wed Sep 23 15:33:20 2015 +0800
Committer: Wei Wang <[email protected]>
Committed: Tue Sep 29 10:22:25 2015 +0800

----------------------------------------------------------------------
 Makefile.gpu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1770377b/Makefile.gpu
----------------------------------------------------------------------
diff --git a/Makefile.gpu b/Makefile.gpu
index 6bcd361..49853db 100644
--- a/Makefile.gpu
+++ b/Makefile.gpu
@@ -28,7 +28,7 @@ ZK_FLAGS :=-DTHREADED -fpermissive
 CXXFLAGS := -O2 -msse3 -Wall -pthread -fPIC -std=c++11 -Wno-unknown-pragmas \
        $(MSHADOW_FLAGS) $(ZK_FLAGS)\
        -funroll-loops $(foreach includedir, $(INCLUDE_DIRS), -I$(includedir))
-CUCXXFLAGS := $(MSHADOW_FLAGS) -std=c++11 \
+CUCXXFLAGS := $(MSHADOW_FLAGS) -std=c++11 -G $(CUDA_ARCH) \
        $(foreach includedir, $(INCLUDE_DIRS), -I$(includedir))
 
 # Add device compile option

Reply via email to