SINGA-41:Support single node single GPU training Fix a buf from the cuda compilation option --- close PTXAS option. But the performance degrades a lot. Need to optimize this or replace Mshadow with other libs.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/1770377b Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/1770377b Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/1770377b Branch: refs/heads/gpu Commit: 1770377b93d0c2133271f5824b19d6ef0195efd6 Parents: cb30ead Author: seaok <[email protected]> Authored: Wed Sep 23 15:33:20 2015 +0800 Committer: Wei Wang <[email protected]> Committed: Tue Sep 29 10:22:25 2015 +0800 ---------------------------------------------------------------------- Makefile.gpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/1770377b/Makefile.gpu ---------------------------------------------------------------------- diff --git a/Makefile.gpu b/Makefile.gpu index 6bcd361..49853db 100644 --- a/Makefile.gpu +++ b/Makefile.gpu @@ -28,7 +28,7 @@ ZK_FLAGS :=-DTHREADED -fpermissive CXXFLAGS := -O2 -msse3 -Wall -pthread -fPIC -std=c++11 -Wno-unknown-pragmas \ $(MSHADOW_FLAGS) $(ZK_FLAGS)\ -funroll-loops $(foreach includedir, $(INCLUDE_DIRS), -I$(includedir)) -CUCXXFLAGS := $(MSHADOW_FLAGS) -std=c++11 \ +CUCXXFLAGS := $(MSHADOW_FLAGS) -std=c++11 -G $(CUDA_ARCH) \ $(foreach includedir, $(INCLUDE_DIRS), -I$(includedir)) # Add device compile option
