commit: a788dbf0de374240b116598b7a93881509258e24
Author: Benda Xu <heroxbd <AT> gentoo <DOT> org>
AuthorDate: Fri Aug 7 12:55:50 2020 +0000
Commit: Benda XU <heroxbd <AT> gentoo <DOT> org>
CommitDate: Fri Aug 7 12:55:58 2020 +0000
URL: https://gitweb.gentoo.org/proj/sci.git/commit/?id=a788dbf0
sci-libs/pytorch: refresh cuda nccl patch for 1.6.0.
Package-Manager: Portage-2.3.88, Repoman-2.3.18
Signed-off-by: Benda Xu <heroxbd <AT> gentoo.org>
.../files/pytorch-1.6.0-nccl-nvccflags.patch | 27 ++++++++++++++++++++++
sci-libs/pytorch/pytorch-1.6.0.ebuild | 2 +-
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/sci-libs/pytorch/files/pytorch-1.6.0-nccl-nvccflags.patch
b/sci-libs/pytorch/files/pytorch-1.6.0-nccl-nvccflags.patch
new file mode 100644
index 000000000..052474ee4
--- /dev/null
+++ b/sci-libs/pytorch/files/pytorch-1.6.0-nccl-nvccflags.patch
@@ -0,0 +1,27 @@
+diff -uprN nccl-5949d96f36d050e59d05872f8bbffd2549318e95/makefiles/common.mk
nccl-patched/makefiles/common.mk
+--- nccl-5949d96f36d050e59d05872f8bbffd2549318e95/makefiles/common.mk
2020-06-09 00:31:44.000000000 +0800
++++ nccl-patched/makefiles/common.mk 2020-08-06 21:25:57.784279738 +0800
+@@ -54,7 +54,7 @@ CXXFLAGS := -DCUDA_MAJOR=$(CUDA_MAJOR)
+ # Maxrregcount needs to be set accordingly to NCCL_MAX_NTHREADS (otherwise it
will cause kernel launch errors)
+ # 512 : 120, 640 : 96, 768 : 80, 1024 : 60
+ # We would not have to set this if we used __launch_bounds__, but this only
works on kernels, not on functions.
+-NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -std=c++11 -Xptxas
-maxrregcount=96 -Xfatbin -compress-all
++NVCUFLAGS := $(NVCCFLAGS) $(NVCC_GENCODE) -std=c++11 -Xptxas
-maxrregcount=96 -Xfatbin -compress-all
+ # Use addprefix so that we can specify more than one path
+ NVLDFLAGS := -L${CUDA_LIB} -lcudart -lrt
+
+@@ -68,14 +68,6 @@ NVLDFLAGS += ${GCOV_FLAGS:%=-Xcompiler
+ # $(warning GCOV_FLAGS=${GCOV_FLAGS})
+ ########## GCOV ##########
+
+-ifeq ($(DEBUG), 0)
+-NVCUFLAGS += -O3
+-CXXFLAGS += -O3 -g
+-else
+-NVCUFLAGS += -O0 -G -g
+-CXXFLAGS += -O0 -g -ggdb3
+-endif
+-
+ ifneq ($(VERBOSE), 0)
+ NVCUFLAGS += -Xptxas -v -Xcompiler -Wall,-Wextra,-Wno-unused-parameter
+ CXXFLAGS += -Wall -Wextra
diff --git a/sci-libs/pytorch/pytorch-1.6.0.ebuild
b/sci-libs/pytorch/pytorch-1.6.0.ebuild
index c59b248e2..1394bdbe3 100644
--- a/sci-libs/pytorch/pytorch-1.6.0.ebuild
+++ b/sci-libs/pytorch/pytorch-1.6.0.ebuild
@@ -147,7 +147,7 @@ src_prepare() {
if use cuda; then
cd ../nccl || die
- eapply "${FILESDIR}"/${PN}-1.4.0-nccl-nvccflags.patch
+ eapply "${FILESDIR}"/${PN}-1.6.0-nccl-nvccflags.patch
ln -s . nccl || die
cuda_src_prepare