commit:     a5bd494c9be931e7bdcf88f75f37d9f4d8594864
Author:     Sv. Lockal <lockalsash <AT> gmail <DOT> com>
AuthorDate: Mon Mar 11 16:37:02 2024 +0000
Commit:     Alfredo Tupone <tupone <AT> gentoo <DOT> org>
CommitDate: Mon Mar 11 19:27:48 2024 +0000
URL:        https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=a5bd494c

sci-libs/caffe2: add USE=rocm flag for AMDGPU support for 2.1.2 and 2.2.1

Closes: https://bugs.gentoo.org/905286
Signed-off-by: Sv. Lockal <lockalsash <AT> gmail.com>
Closes: https://github.com/gentoo/gentoo/pull/35713
Signed-off-by: Alfredo Tupone <tupone <AT> gentoo.org>

 ...ffe2-2.1.2-r6.ebuild => caffe2-2.1.2-r7.ebuild} | 61 +++++++++++++++----
 ...{caffe2-2.2.1.ebuild => caffe2-2.2.1-r1.ebuild} | 54 ++++++++++++++---
 .../files/caffe2-2.1.2-rocm-fix-std-cpp17.patch    | 68 ++++++++++++++++++++++
 sci-libs/caffe2/metadata.xml                       |  1 +
 4 files changed, 166 insertions(+), 18 deletions(-)

diff --git a/sci-libs/caffe2/caffe2-2.1.2-r6.ebuild 
b/sci-libs/caffe2/caffe2-2.1.2-r7.ebuild
similarity index 79%
rename from sci-libs/caffe2/caffe2-2.1.2-r6.ebuild
rename to sci-libs/caffe2/caffe2-2.1.2-r7.ebuild
index 969c36754c5c..f57406145c6a 100644
--- a/sci-libs/caffe2/caffe2-2.1.2-r6.ebuild
+++ b/sci-libs/caffe2/caffe2-2.1.2-r7.ebuild
@@ -4,7 +4,8 @@
 EAPI=8
 
 PYTHON_COMPAT=( python3_{9..12} )
-inherit python-single-r1 cmake cuda flag-o-matic prefix
+ROCM_VERSION=5.7
+inherit python-single-r1 cmake cuda flag-o-matic prefix rocm
 
 MYPN=pytorch
 MYP=${MYPN}-${PV}
@@ -17,7 +18,7 @@ 
SRC_URI="https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz
 LICENSE="BSD"
 SLOT="0"
 KEYWORDS="~amd64"
-IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn 
openblas opencl opencv openmp qnnpack tensorpipe xnnpack"
+IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn 
openblas opencl opencv openmp qnnpack rocm tensorpipe xnnpack"
 RESTRICT="test"
 REQUIRED_USE="
        ${PYTHON_REQUIRED_USE}
@@ -26,7 +27,9 @@ REQUIRED_USE="
        tensorpipe? ( distributed )
        distributed? ( tensorpipe )
        gloo? ( distributed )
-" # ?? ( cuda rocm )
+       ?? ( cuda rocm )
+       rocm? ( || ( ${ROCM_REQUIRED_USE} ) )
+"
 
 # CUDA 12 not supported yet: https://github.com/pytorch/pytorch/issues/91122
 RDEPEND="
@@ -59,6 +62,20 @@ RDEPEND="
        opencl? ( virtual/opencl )
        opencv? ( media-libs/opencv:= )
        qnnpack? ( sci-libs/QNNPACK )
+       rocm? (
+               >=dev-util/hip-5.7
+               >=dev-libs/rccl-5.7[${ROCM_USEDEP}]
+               >=sci-libs/rocThrust-5.7[${ROCM_USEDEP}]
+               >=sci-libs/rocPRIM-5.7[${ROCM_USEDEP}]
+               >=sci-libs/hipBLAS-5.7[${ROCM_USEDEP}]
+               >=sci-libs/hipFFT-5.7[${ROCM_USEDEP}]
+               >=sci-libs/hipSPARSE-5.7[${ROCM_USEDEP}]
+               >=sci-libs/hipRAND-5.7[${ROCM_USEDEP}]
+               >=sci-libs/hipCUB-5.7[${ROCM_USEDEP}]
+               >=sci-libs/hipSOLVER-5.7[${ROCM_USEDEP}]
+               >=sci-libs/miopen-5.7[${ROCM_USEDEP}]
+               >=dev-util/roctracer-5.7[${ROCM_USEDEP}]
+       )
        tensorpipe? ( sci-libs/tensorpipe[cuda?] )
        xnnpack? ( >=sci-libs/XNNPACK-2022.12.22 )
        mkl? ( sci-libs/mkl )
@@ -92,6 +109,7 @@ PATCHES=(
        "${FILESDIR}"/${PN}-2.1.1-cudaExtra.patch
        "${FILESDIR}"/${PN}-2.1.2-fix-rpath.patch
        "${FILESDIR}"/${PN}-2.1.2-fix-openmp-link.patch
+       "${FILESDIR}"/${PN}-2.1.2-rocm-fix-std-cpp17.patch
 )
 
 src_prepare() {
@@ -118,6 +136,18 @@ src_prepare() {
                cmake/Dependencies.cmake \
                torch/CMakeLists.txt \
                CMakeLists.txt
+
+       if use rocm; then
+               sed -e "s:ROCM_PATH /opt/rocm:ROCM_PATH /usr:" \
+                       -e "s:HIP_PATH \${ROCM_PATH}/hip:HIP_PATH /usr:" \
+                       -e 
"s:\${HIP_PATH}/cmake:/usr/$(get_libdir)/cmake/hip:g" \
+                       -e "s/HIP 1.0/HIP 1.0 REQUIRED/" \
+                       -i cmake/public/LoadHIP.cmake || die
+
+               ebegin "HIPifying cuda sources"
+               ${EPYTHON} tools/amd_build/build_amd.py || die
+               eend $?
+       fi
 }
 
 src_configure() {
@@ -140,9 +170,6 @@ src_configure() {
 
                -DUSE_CCACHE=OFF
                -DUSE_CUDA=$(usex cuda)
-               -DUSE_CUDNN=$(usex cuda)
-               -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
-               -DBUILD_NVFUSER=$(usex cuda)
                -DUSE_DISTRIBUTED=$(usex distributed)
                -DUSE_MPI=$(usex mpi)
                -DUSE_FAKELOWP=OFF
@@ -155,7 +182,6 @@ src_configure() {
                -DUSE_LEVELDB=OFF
                -DUSE_MAGMA=OFF # TODO: In GURU as sci-libs/magma
                -DUSE_MKLDNN=$(usex onednn)
-               -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
                -DUSE_NNPACK=$(usex nnpack)
                -DUSE_QNNPACK=$(usex qnnpack)
                -DUSE_XNNPACK=$(usex xnnpack)
@@ -166,7 +192,7 @@ src_configure() {
                -DUSE_OPENCL=$(usex opencl)
                -DUSE_OPENCV=$(usex opencv)
                -DUSE_OPENMP=$(usex openmp)
-               -DUSE_ROCM=OFF # TODO
+               -DUSE_ROCM=$(usex rocm)
                -DUSE_SYSTEM_CPUINFO=ON
                -DUSE_SYSTEM_PYBIND11=ON
                -DUSE_UCC=OFF
@@ -200,8 +226,20 @@ src_configure() {
                addpredict "/dev/char"
 
                mycmakeargs+=(
+                       -DUSE_CUDNN=ON
+                       -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 
7.0}"
+                       -DBUILD_NVFUSER=ON
+                       -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication 
Library
                        -DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")"
                )
+       elif use rocm; then
+               export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)"
+
+               mycmakeargs+=(
+                       -DBUILD_NVFUSER=ON
+                       -DUSE_NCCL=ON
+                       -DUSE_SYSTEM_NCCL=ON
+               )
        fi
 
        if use onednn; then
@@ -214,6 +252,9 @@ src_configure() {
        fi
 
        cmake_src_configure
+
+       # do not rerun cmake and the build process in src_install
+       sed '/RERUN/,+1d' -i "${BUILD_DIR}"/build.ninja || die
 }
 
 src_install() {
@@ -225,7 +266,7 @@ src_install() {
        rm -rf python
        mkdir -p python/torch/include || die
        mv "${ED}"/usr/lib/python*/site-packages/caffe2 python/ || die
-       if use cuda; then
+       if use cuda || use rocm; then
                mv "${ED}${S}"/nvfuser python/nvfuser || die
                mv "${ED}"/usr/$(get_libdir)/nvfuser.so python/nvfuser/_C.so || 
die
        fi
@@ -234,7 +275,7 @@ src_install() {
        python_domodule python/torch
        ln -s ../../../../../include/torch \
                "${D}$(python_get_sitedir)"/torch/include/torch || die # bug 
923269
-       if use cuda; then
+       if use cuda || use rocm; then
                python_domodule python/nvfuser
        fi
        rm -rf "${ED}${WORKDIR}"

diff --git a/sci-libs/caffe2/caffe2-2.2.1.ebuild 
b/sci-libs/caffe2/caffe2-2.2.1-r1.ebuild
similarity index 80%
rename from sci-libs/caffe2/caffe2-2.2.1.ebuild
rename to sci-libs/caffe2/caffe2-2.2.1-r1.ebuild
index 6f96107154b7..80dc2b500a0f 100644
--- a/sci-libs/caffe2/caffe2-2.2.1.ebuild
+++ b/sci-libs/caffe2/caffe2-2.2.1-r1.ebuild
@@ -4,7 +4,8 @@
 EAPI=8
 
 PYTHON_COMPAT=( python3_{9..12} )
-inherit python-single-r1 cmake cuda flag-o-matic prefix
+ROCM_VERSION=5.7
+inherit python-single-r1 cmake cuda flag-o-matic prefix rocm
 
 MYPN=pytorch
 MYP=${MYPN}-${PV}
@@ -17,14 +18,16 @@ 
SRC_URI="https://github.com/pytorch/${MYPN}/archive/refs/tags/v${PV}.tar.gz
 LICENSE="BSD"
 SLOT="0"
 KEYWORDS="~amd64"
-IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn 
openblas opencl opencv openmp qnnpack xnnpack"
+IUSE="cuda distributed fbgemm ffmpeg gloo mkl mpi nnpack +numpy onednn 
openblas opencl opencv openmp qnnpack rocm xnnpack"
 RESTRICT="test"
 REQUIRED_USE="
        ${PYTHON_REQUIRED_USE}
        ffmpeg? ( opencv )
        mpi? ( distributed )
        gloo? ( distributed )
-" # ?? ( cuda rocm )
+       ?? ( cuda rocm )
+       rocm? ( || ( ${ROCM_REQUIRED_USE} ) )
+"
 
 # CUDA 12 not supported yet: https://github.com/pytorch/pytorch/issues/91122
 RDEPEND="
@@ -57,6 +60,20 @@ RDEPEND="
        opencl? ( virtual/opencl )
        opencv? ( media-libs/opencv:= )
        qnnpack? ( sci-libs/QNNPACK )
+       rocm? (
+               >=dev-util/hip-5.7
+               >=dev-libs/rccl-5.7[${ROCM_USEDEP}]
+               >=sci-libs/rocThrust-5.7[${ROCM_USEDEP}]
+               >=sci-libs/rocPRIM-5.7[${ROCM_USEDEP}]
+               >=sci-libs/hipBLAS-5.7[${ROCM_USEDEP}]
+               >=sci-libs/hipFFT-5.7[${ROCM_USEDEP}]
+               >=sci-libs/hipSPARSE-5.7[${ROCM_USEDEP}]
+               >=sci-libs/hipRAND-5.7[${ROCM_USEDEP}]
+               >=sci-libs/hipCUB-5.7[${ROCM_USEDEP}]
+               >=sci-libs/hipSOLVER-5.7[${ROCM_USEDEP}]
+               >=sci-libs/miopen-5.7[${ROCM_USEDEP}]
+               >=dev-util/roctracer-5.7[${ROCM_USEDEP}]
+       )
        distributed? ( sci-libs/tensorpipe[cuda?] )
        xnnpack? ( >=sci-libs/XNNPACK-2022.12.22 )
        mkl? ( sci-libs/mkl )
@@ -89,6 +106,7 @@ PATCHES=(
        "${FILESDIR}"/${PN}-2.0.0-cudnn_include_fix.patch
        "${FILESDIR}"/${PN}-2.1.2-fix-rpath.patch
        "${FILESDIR}"/${PN}-2.1.2-fix-openmp-link.patch
+       "${FILESDIR}"/${PN}-2.1.2-rocm-fix-std-cpp17.patch
 )
 
 src_prepare() {
@@ -115,6 +133,17 @@ src_prepare() {
                cmake/Dependencies.cmake \
                torch/CMakeLists.txt \
                CMakeLists.txt
+
+       if use rocm; then
+               sed -e "s:/opt/rocm:/usr:" \
+                       -e "s:lib/cmake:$(get_libdir)/cmake:g" \
+                       -e "s/HIP 1.0/HIP 1.0 REQUIRED/" \
+                       -i cmake/public/LoadHIP.cmake || die
+
+               ebegin "HIPifying cuda sources"
+               ${EPYTHON} tools/amd_build/build_amd.py || die
+               eend $?
+       fi
 }
 
 src_configure() {
@@ -137,9 +166,6 @@ src_configure() {
 
                -DUSE_CCACHE=OFF
                -DUSE_CUDA=$(usex cuda)
-               -DUSE_CUDNN=$(usex cuda)
-               -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 7.0}"
-               -DBUILD_NVFUSER=$(usex cuda)
                -DUSE_DISTRIBUTED=$(usex distributed)
                -DUSE_MPI=$(usex mpi)
                -DUSE_FAKELOWP=OFF
@@ -152,7 +178,6 @@ src_configure() {
                -DUSE_LEVELDB=OFF
                -DUSE_MAGMA=OFF # TODO: In GURU as sci-libs/magma
                -DUSE_MKLDNN=$(usex onednn)
-               -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication Library
                -DUSE_NNPACK=$(usex nnpack)
                -DUSE_QNNPACK=$(usex qnnpack)
                -DUSE_XNNPACK=$(usex xnnpack)
@@ -163,7 +188,7 @@ src_configure() {
                -DUSE_OPENCL=$(usex opencl)
                -DUSE_OPENCV=$(usex opencv)
                -DUSE_OPENMP=$(usex openmp)
-               -DUSE_ROCM=OFF # TODO
+               -DUSE_ROCM=$(usex rocm)
                -DUSE_SYSTEM_CPUINFO=ON
                -DUSE_SYSTEM_PYBIND11=ON
                -DUSE_UCC=OFF
@@ -197,8 +222,18 @@ src_configure() {
                addpredict "/dev/char"
 
                mycmakeargs+=(
+                       -DUSE_CUDNN=ON
+                       -DTORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5 
7.0}"
+                       -DUSE_NCCL=OFF # TODO: NVIDIA Collective Communication 
Library
                        -DCMAKE_CUDA_FLAGS="$(cuda_gccdir -f | tr -d \")"
                )
+       elif use rocm; then
+               export PYTORCH_ROCM_ARCH="$(get_amdgpu_flags)"
+
+               mycmakeargs+=(
+                       -DUSE_NCCL=ON
+                       -DUSE_SYSTEM_NCCL=ON
+               )
        fi
 
        if use onednn; then
@@ -211,6 +246,9 @@ src_configure() {
        fi
 
        cmake_src_configure
+
+       # do not rerun cmake and the build process in src_install
+       sed '/RERUN/,+1d' -i "${BUILD_DIR}"/build.ninja || die
 }
 
 src_install() {

diff --git a/sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch 
b/sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch
new file mode 100644
index 000000000000..cb0fa0c48e80
--- /dev/null
+++ b/sci-libs/caffe2/files/caffe2-2.1.2-rocm-fix-std-cpp17.patch
@@ -0,0 +1,68 @@
+Fix for error: invalid argument '-std=c++17' not allowed with 'C'
+https://github.com/pytorch/pytorch/issues/103222
+--- a/c10/hip/CMakeLists.txt
++++ b/c10/hip/CMakeLists.txt
+@@ -30,6 +30,7 @@ hip_add_library(c10_hip ${C10_HIP_SRCS} ${C10_HIP_HEADERS})
+ 
+ # Propagate HIP_CXX_FLAGS that were set from Dependencies.cmake
+ target_compile_options(c10_hip PRIVATE ${HIP_CXX_FLAGS})
++set_target_properties(c10_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS OFF)
+ 
+ # caffe2_hip adds a bunch of dependencies like rocsparse, but c10/hip is 
supposed to be
+ # minimal.  I'm not sure if we need hip_hcc or not; for now leave it out
+--- a/caffe2/CMakeLists.txt
++++ b/caffe2/CMakeLists.txt
+@@ -1598,6 +1598,7 @@ if(USE_ROCM)
+ 
+   # Since PyTorch files contain HIP headers, these flags are required for the 
necessary definitions to be added.
+   target_compile_options(torch_hip PUBLIC ${HIP_CXX_FLAGS})  # experiment
++  set_target_properties(torch_hip PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS 
OFF)
+   target_link_libraries(torch_hip PUBLIC c10_hip)
+ 
+   if(NOT INTERN_BUILD_MOBILE)
+@@ -1774,6 +1775,7 @@ if(BUILD_TEST)
+       target_include_directories(${test_name} PRIVATE 
$<INSTALL_INTERFACE:include>)
+       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} 
${Caffe2_HIP_INCLUDE})
+       target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
++      set_target_properties(${test_name} PROPERTIES CXX_STANDARD 17 
CXX_EXTENSIONS OFF)
+       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+       if(INSTALL_TEST)
+         install(TARGETS ${test_name} DESTINATION test)
+@@ -1955,6 +1957,7 @@ if(BUILD_PYTHON)
+     endif()
+     if(NOT MSVC)
+       target_compile_options(caffe2_pybind11_state_hip PRIVATE 
${HIP_CXX_FLAGS} -fvisibility=hidden)
++      set_target_properties(caffe2_pybind11_state_hip PROPERTIES CXX_STANDARD 
17 CXX_EXTENSIONS OFF)
+     endif()
+     set_target_properties(caffe2_pybind11_state_hip PROPERTIES PREFIX "")
+     set_target_properties(caffe2_pybind11_state_hip PROPERTIES SUFFIX 
${PY_EXT_SUFFIX})
+--- a/cmake/Dependencies.cmake
++++ b/cmake/Dependencies.cmake
+@@ -1287,7 +1287,6 @@ if(USE_ROCM)
+     list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
+     list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
+     list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
+-    list(APPEND HIP_CXX_FLAGS -std=c++17)
+     add_definitions(-DROCM_VERSION=${ROCM_VERSION_DEV_INT})
+     add_definitions(-DTORCH_HIP_VERSION=${TORCH_HIP_VERSION})
+     message("TORCH_HIP_VERSION=${TORCH_HIP_VERSION} is added as a compiler 
defines")
+--- a/cmake/public/utils.cmake
++++ b/cmake/public/utils.cmake
+@@ -335,6 +335,7 @@ function(caffe2_hip_binary_target target_name_or_src)
+   caffe2_binary_target(${target_name_or_src})
+ 
+   target_compile_options(${__target} PRIVATE ${HIP_CXX_FLAGS})
++  set_target_properties(${__target} PROPERTIES CXX_STANDARD 17 CXX_EXTENSIONS 
OFF)
+   target_include_directories(${__target} PRIVATE ${Caffe2_HIP_INCLUDE})
+ endfunction()
+ 
+--- a/modules/detectron/CMakeLists.txt
++++ b/modules/detectron/CMakeLists.txt
+@@ -31,6 +31,7 @@ if(BUILD_CAFFE2_OPS)
+         ${Detectron_CPU_SRCS}
+         ${Detectron_HIP_SRCS})
+     target_compile_options(caffe2_detectron_ops_hip PRIVATE ${HIP_CXX_FLAGS})
++    set_target_properties(caffe2_detectron_ops_hip PROPERTIES CXX_STANDARD 17 
CXX_EXTENSIONS OFF)
+     if(USE_MKLDNN)
+       target_link_libraries(caffe2_detectron_ops_hip PRIVATE caffe2::mkldnn)
+     endif()

diff --git a/sci-libs/caffe2/metadata.xml b/sci-libs/caffe2/metadata.xml
index 3fe84b0977fc..ed1f9fa58993 100644
--- a/sci-libs/caffe2/metadata.xml
+++ b/sci-libs/caffe2/metadata.xml
@@ -18,6 +18,7 @@
                <flag name="opencv">Add support for image processing 
operators</flag>
                <flag name="openmp">Use OpenMP for parallel code</flag>
                <flag name="qnnpack">Use QNNPACK</flag>
+               <flag name="rocm">Enable ROCm gpu computing support</flag>
                <flag name="tensorpipe">Use tensorpipe</flag>
                <flag name="xnnpack">Use XNNPACK</flag>
        </use>

Reply via email to