Date: Thursday, July 23, 2020 @ 20:24:24 Author: kgizdov Revision: 665091
archrelease: copy trunk to community-testing-x86_64 Added: root/repos/community-testing-x86_64/ root/repos/community-testing-x86_64/PKGBUILD (from rev 665090, root/trunk/PKGBUILD) root/repos/community-testing-x86_64/ROOFIT_LICENSE (from rev 665090, root/trunk/ROOFIT_LICENSE) root/repos/community-testing-x86_64/adapt_tmva_to_support_cudnn8.patch (from rev 665090, root/trunk/adapt_tmva_to_support_cudnn8.patch) root/repos/community-testing-x86_64/jupyter_notebook_config.py (from rev 665090, root/trunk/jupyter_notebook_config.py) root/repos/community-testing-x86_64/nbman-for-arch.patch (from rev 665090, root/trunk/nbman-for-arch.patch) root/repos/community-testing-x86_64/root.pc.tpl (from rev 665090, root/trunk/root.pc.tpl) root/repos/community-testing-x86_64/root.xml (from rev 665090, root/trunk/root.xml) root/repos/community-testing-x86_64/settings-cuda.cmake (from rev 665090, root/trunk/settings-cuda.cmake) root/repos/community-testing-x86_64/settings.cmake (from rev 665090, root/trunk/settings.cmake) root/repos/community-testing-x86_64/thisroot.fail (from rev 665090, root/trunk/thisroot.fail) ------------------------------------+ PKGBUILD | 281 ++++++++ ROOFIT_LICENSE | 22 adapt_tmva_to_support_cudnn8.patch | 1130 +++++++++++++++++++++++++++++++++++ jupyter_notebook_config.py | 1 nbman-for-arch.patch | 177 +++++ root.pc.tpl | 12 root.xml | 14 settings-cuda.cmake | 110 +++ settings.cmake | 110 +++ thisroot.fail | 12 10 files changed, 1869 insertions(+) Copied: root/repos/community-testing-x86_64/PKGBUILD (from rev 665090, root/trunk/PKGBUILD) =================================================================== --- community-testing-x86_64/PKGBUILD (rev 0) +++ community-testing-x86_64/PKGBUILD 2020-07-23 20:24:24 UTC (rev 665091) @@ -0,0 +1,281 @@ +# Maintainer: Konstantin Gizdov < arch at kge dot pw > +# Contributor: Frank Siegert < frank.siegert at googlemail dot com > +# Contributor: Scott Lawrence < bytbox at gmail dot com > +# Contributor: Thomas Dziedzic < gostrc at gmail dot com > +# Contributor: Sebastian Voecking < voeck at web dot de > + +pkgbase=root +pkgname=('root' 'root-cuda') +pkgver=6.22.00 +pkgrel=1 +pkgdesc='C++ data analysis framework and interpreter from CERN' +arch=('x86_64') +url='https://root.cern' +license=('LGPL2.1' 'GPL' 'custom:University of California and Stanford University License') +makedepends=( + 'ccache' + 'cern-vdt' + 'chromium' + 'cfitsio' + 'cmake' + 'cuda' + 'cudnn' + 'gcc-fortran' + 'gcc9-fortran' + 'git' + 'go' + 'libxml2' + 'libmariadbclient' + 'ocaml' + 'ocaml-ctypes' + 'openmp' + 'openmpi' + 'openssl' + 'postgresql-libs' + 'pythia8>=8.2.40-1' + 'qt5-webengine' + 'sqlite' + 'unuran' + 'vc' + 'xrootd>=4.6.0-2' + 'z3' +) +depends=( + 'blas' + 'desktop-file-utils' + 'fcgi' + 'fftw' + 'ftgl' + 'giflib' + 'gl2ps' + 'glew' + 'graphviz' + 'gsl' + 'hicolor-icon-theme' + 'intel-tbb' + 'libafterimage' + 'librsvg' + 'libxpm' + 'python' + 'python-numpy' + 'tex-gyre-fonts' + 'unixodbc' + 'xxhash>=0.6.5-1' + 'zstd' +) +optdepends=( + 'cern-vdt: Add a set of fast and vectorisable mathematical functions' + 'chromium: Support for WebGUI' + 'cfitsio: Read images and data from FITS files' + 'libmariadbclient: MySQL support' + 'libxml2: XML parser interface' + 'openmp: Support OpenMP extensions in Minuit2' + 'openmpi: Support OpenMPI extensions in Minuit2' + 'openssl: OpenSSL support' + 'postgresql-libs: PostgreSQL support' + 'pythia8>=8.2.40-1: Pythia8 EG support' + 'qt5-webengine: Support for WebGUI' + 'sqlite: SQLite support' + 'tcsh: Legacy CSH support' + 'unuran: Support non-uniform random numbers' + 'vc: Add types for portable and intuitive SIMD programming' + 'xrootd: Support remote file server and client' + 'z3: Suuport the Z3 theorem prover' +) +source=( + "https://root.cern.ch/download/root_v${pkgver}.source.tar.gz" + 'ROOFIT_LICENSE' + 'root.xml' + 'root.pc.tpl' + 'settings.cmake' + 'settings-cuda.cmake' + 'jupyter_notebook_config.py' + 'nbman-for-arch.patch' + 'thisroot.fail' + 'adapt_tmva_to_support_cudnn8.patch' +) +sha512sums=('9e3c54bbc146b0abb0a2d960af380255ec59d0b3a11a4a97a2a25cb7ac567b07280c4eb48dddf99c1fa2e692881f6396a842ce125d3a253037e52f719739f01e' + 'af8f178fc9df66997d5495b271e38adcd1636aab4c8fc994c6600c2496127829d831250d73d3fc229b02dfe49b9867d0be979beacb959f2f3a05351b8118a4a6' + '1fe6f4aa09d583d33f27cc766f4935510bb7ab6bbb8d4700baa1aaab92ea6c876500b67da1e4f6e0b510aa5616e4e193b860264b86925de85f2d9f558d75d5dc' + '3c81d255a17b902ffac0187af1752847036137e16641a88b17eef0d9c944e6f0d3c954bc93307d6270603f43f6c23f2e04f98dc7a68f9d076dbaa8006a2527d6' + '9ee5b6606dbd352608a2a4998344ca4026d677c86823e62fff615f6e84efcecdffc07a1e9182a356aa35035e7f35df5a107127722a6bad4b97d1f49cffebf5b9' + '7665bc8cbe79162e0b969b08802e1b7b2ed22ed8b1402d50cf194172a644f647dcaf0f5abb76f8b6007dfab8dbc811604479be826b345d8fd77edfb51032110b' + '1c905ee7a3f8f5f3f567d957f9be6b503a8631565d4d9b9bfea5e496ef86865c5a8be1a1f8c7842754029879cf0afd2465249f532a116cc43660aa2e460ae682' + '12814f50b7016bd86d3f91e0e31c052783a0c0fa72b7d6a072d3ae6f86c2437323d585e531235377ebbfdd9cb76abd7da84d9631de821151547f1d4b13417e69' + 'ff555ac4db568affe139701907f86d919a2206f3e304f69dd317b756ea0904b5934d9364a524060778aa507809ce78448621619bb34039ba34c5a71af71a4a8c' + '2ae126795df4127c27a6287a1499bdb8b2bacb74cfbec17dabe378a5fb9fc7c755644e4090a4da1d0045bf5d4f542f06da827a0f48a5927ee8509874045f18b6') + +get_pyver () { + python -c 'import sys; print(str(sys.version_info[0]) + "." + str(sys.version_info[1]))' +} + +prepare() { + local src + for src in "${source[@]}"; do + src="${src%%::*}" + src="${src##*/}" + [[ $src = *.patch ]] || continue + echo " -> Applying patch $src..." + patch -Np1 -i "../$src" -d "${srcdir}/${pkgbase}-${pkgver}" + done + + # specify some custom flags + # needed by vc to link properly + CUSTOM_CMAKE_FLAGS="-DTARGET_ARCHITECTURE:STRING=generic" + # make sure it finds python + CUSTOM_CMAKE_FLAGS+=" -DPYTHON_EXECUTABLE:PATH=/usr/bin/python" + # need to set install prefix like so + CUSTOM_CMAKE_FLAGS+=" -DINSTALL_PREFIX=/usr" + export CUSTOM_CMAKE_FLAGS + + # update system flags + # don't let ROOT play around with lib paths + # the following is no longer necessary + # sed -i -e 's@SetLibraryPath();@@g' \ + # "${srcdir}/${pkgbase}-${pkgver}/rootx/src/rootx.cxx" + # now only depends on IS_RPATH_BUILD being set + # so pass it to GCC + export CPPFLAGS="${CPPFLAGS} -DIS_RPATH_BUILD=1" + # make sure pthread gets detected + CUSTOM_COMPILER_FLAGS="${CPPFLAGS} -pthread" + export CFLAGS="${CFLAGS} ${CUSTOM_COMPILER_FLAGS}" + export CXXFLAGS="${CXXFLAGS} ${CUSTOM_COMPILER_FLAGS}" + # do not link undefined + CUSTOM_COMPILER_FLAGS+=" -Wl,--no-undefined" + export LDFLAGS="${LDFLAGS} ${CUSTOM_COMPILER_FLAGS}" + + # go flags for built-in clang + export CGO_LDFLAGS="${LDFLAGS}" + export GOFLAGS="-buildmode=pie -trimpath -modcacherw" + + cp -r "${pkgbase}-${pkgver}" "${pkgbase}-${pkgver}-cuda" +} + +build() { + ## ROOT + mkdir -p "${srcdir}/build" + cd "${srcdir}/build" + + cmake -C "${srcdir}/settings.cmake" \ + ${CUSTOM_CMAKE_FLAGS} \ + "${srcdir}/${pkgbase}-${pkgver}" + make + + ## ROOT with CUDA + mkdir -p "${srcdir}/build-cuda" + cd "${srcdir}/build-cuda" + + CC=/usr/bin/gcc-9 \ + CXX=/usr/bin/g++-9 \ + cmake -C "${srcdir}/settings-cuda.cmake" \ + ${CUSTOM_CMAKE_FLAGS} \ + "${srcdir}/${pkgbase}-${pkgver}-cuda" + make +} + +_package() { + local bld_dir="${srcdir}/${1}" + cd "${bld_dir}" + + make DESTDIR="${pkgdir}" install + + # fix missing hardlinks for genreflex and rootcint + cd "${pkgdir}"/usr/bin + ln -f rootcling rootcint + ln -f rootcling genreflex + cd "${bld_dir}" # go back + + # fix python env call + sed -e 's/@python@/python/' -i "${pkgdir}/usr/lib/root/cmdLineUtils.py" + + # try to deal with weird PyROOT, PyMVA and JupyROOT stuff + rm -rf "${pkgdir}/usr/lib/root/__pycache__" + local _pyver=$(get_pyver) + local _pydir="${pkgdir}/usr/lib/python${_pyver}/site-packages" + install -d "${_pydir}" + find "${pkgdir}/usr/lib/root" -maxdepth 1 -mindepth 1 \( -iname "*py*" -or -name "*Js*" \) \ + ! \( -name "*EGPythia8*" -or -iname "*.rootmap" -or -iname "*.pcm" \) -print0 | while read -rd $'\0' _lib; do + _base=$(basename "${_lib}") + ln -sf "/usr/lib/root/${_base}" "${pkgdir}/usr/lib/python${_pyver}/site-packages/${_base}" + done + + # recompile pycache to strip $pkgdir from embedded paths + python -m compileall -d "/usr/lib/python${_pyver}" \ + "${pkgdir}/usr/lib/python${_pyver}" + python -O -m compileall -d "/usr/lib/python${_pyver}" \ + "${pkgdir}/usr/lib/python${_pyver}" + + # icon, shortcut and mime + install -Dm644 "${srcdir}/${pkgbase}-${pkgver}/icons/Root6Icon.png" \ + "${pkgdir}/usr/share/icons/hicolor/48x48/apps/root.png" + install -Dm644 "${srcdir}/${pkgbase}-${pkgver}/etc/root.desktop" \ + "${pkgdir}/usr/share/applications/root.desktop" + echo 'Icon=root.png' >> "${pkgdir}/usr/share/applications/root.desktop" + install -Dm644 "${srcdir}/root.xml" \ + "${pkgdir}/usr/share/mime/packages/root.xml" + + # use a file that pacman can track instead of adding directly to ld.so.conf + install -d "${pkgdir}/etc/ld.so.conf.d" + echo '/usr/lib/root' > "${pkgdir}/etc/ld.so.conf.d/root.conf" + + # create pkg-config file + local _prefix _exec_prefix _bindir _libdir _incdir _pkg_ver _libs _cflags _requires + _prefix="$("${pkgdir}"/usr/bin/root-config --prefix)" + _exec_prefix="$("${pkgdir}"/usr/bin/root-config --exec-prefix)" + _bindir="$("${pkgdir}"/usr/bin/root-config --bindir)" + _libdir="$("${pkgdir}"/usr/bin/root-config --libdir)" + _incdir="$("${pkgdir}"/usr/bin/root-config --incdir)" + _pkg_ver="$(sed -n 's,.*ROOT_RELEASE *\"\(.*\)\".*,\1,p' < "${pkgdir}"/usr/include/RVersion.h)" + _libs="$("${pkgdir}"/usr/bin/root-config --libs)" + _cflags="$("${pkgdir}"/usr/bin/root-config --cflags)" + printf -v _requires '%s,' "${depends[@]}" + cp "${srcdir}/root.pc.tpl" "${bld_dir}"/ + sed -e "s@_PREFIX@${_prefix}@" -e "s@_EXECPREFIX@${_exec_prefix}@" \ + -e "s@_LIBDIR@${_libdir}@" -e "s@_INCDIR@${_incdir}@" \ + -e "s@_PKGVERSION@${_pkg_ver}@" -e "s@_LIBRARIES@${_libs}@" \ + -e "s@_CFLAGS@${_cflags}@" -e "s@_UPSTREAM_URL@${url}@" \ + -e "s@_REQUIRES@${_requires}@" \ + -i "${bld_dir}/root.pc.tpl" + install -Dm644 "${bld_dir}/root.pc.tpl" "${pkgdir}/usr/lib/pkgconfig/root.pc" + + # install all licenses & docs + install -d "${pkgdir}/usr/share/licenses/roofit" + install "${srcdir}/ROOFIT_LICENSE" "${pkgdir}/usr/share/licenses/roofit/LICENSE" + install -d "${pkgdir}/usr/share/licenses/${pkgname}" + ln -s '/usr/share/doc/root/LICENSE' "${pkgdir}/usr/share/licenses/${pkgname}/LICENSE" + for fold in fonts js; do + install -d "${pkgdir}/usr/share/licenses/${pkgname}/${fold}" + ln -s "/usr/share/root/${fold}/LICENSE" "${pkgdir}/usr/share/licenses/${pkgname}/${fold}"/ + done + ln -s '/usr/share/licenses/roofit' "${pkgdir}/usr/share/licenses/${pkgname}/roofit" + if [ "${pkgname}" != "root" ]; then + ln -s "/usr/share/licenses/${pkgname}" "${pkgdir}/usr/share/licenses/root" + ln -s "/usr/share/doc/root" "${pkgdir}/usr/share/doc/${pkgname}" + fi + + # install jupyter kernels and `root --notebook` config + install -d "${pkgdir}/usr/share/jupyter/kernels" + ln -s '/etc/root/notebook/kernels/root' "${pkgdir}/usr/share/jupyter/kernels/root" + install "${srcdir}/jupyter_notebook_config.py" "${pkgdir}/etc/root/notebook"/ + + # drop thisroot.* shell files + rm -rf "${pkgdir}"/usr/bin/thisroot.* + install -Dm755 "${srcdir}/thisroot.fail" "${pkgdir}/usr/bin/thisroot.sh" + for suffix in csh fish; do + ln -s '/usr/bin/thisroot.sh' "${pkgdir}/usr/bin/thisroot.${suffix}" + done +} + +package_root() { + optdepends+=('gcc-fortran: Enable the Fortran components of ROOT') + _package build +} + +package_root-cuda() { + pkgdesc='C++ data analysis framework and interpreter from CERN with GPU (CUDA) features enabled' + provides=('root') + conflicts=('root') + depends+=('cuda' 'cudnn') + optdepends+=('gcc8-fortran: Enable the Fortran components of ROOT') + _package build-cuda +} Copied: root/repos/community-testing-x86_64/ROOFIT_LICENSE (from rev 665090, root/trunk/ROOFIT_LICENSE) =================================================================== --- community-testing-x86_64/ROOFIT_LICENSE (rev 0) +++ community-testing-x86_64/ROOFIT_LICENSE 2020-07-23 20:24:24 UTC (rev 665091) @@ -0,0 +1,22 @@ +RooFit --- Copyright (c) 2000-2005, Regents of the University of California and Stanford University +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + - Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Copied: root/repos/community-testing-x86_64/adapt_tmva_to_support_cudnn8.patch (from rev 665090, root/trunk/adapt_tmva_to_support_cudnn8.patch) =================================================================== --- community-testing-x86_64/adapt_tmva_to_support_cudnn8.patch (rev 0) +++ community-testing-x86_64/adapt_tmva_to_support_cudnn8.patch 2020-07-23 20:24:24 UTC (rev 665091) @@ -0,0 +1,1130 @@ +From 05739e6b01fb34b5ef40e1a584107876e68e4b77 Mon Sep 17 00:00:00 2001 +From: Konstantin Gizdov <kgiz...@gmail.com> +Date: Tue, 21 Jul 2020 15:13:57 +0300 +Subject: [PATCH 01/10] update deprecated function call name to backward + compatible one + +--- + tmva/tmva/src/DNN/Architectures/Cudnn/RecurrentPropagation.cu | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/RecurrentPropagation.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/RecurrentPropagation.cu +index 058cee28424..60289ec2fdd 100644 +--- a/tmva/tmva/src/DNN/Architectures/Cudnn/RecurrentPropagation.cu ++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/RecurrentPropagation.cu +@@ -132,7 +132,11 @@ void TCudnn<AFloat>::InitializeRecurrentDescriptors(TDescriptors *&descriptors, + cudnnDataType_t mathPrec = CUDNN_DATA_FLOAT; + if (std::is_same<AFloat, double>::value) { mathPrec = CUDNN_DATA_DOUBLE;} + ++#if (CUDNN_VERSION >= 8000) ++ CUDNNCHECK(cudnnSetRNNDescriptor_v6(handle, rnnDescriptors->LayerDescriptor, hiddenSize, numLayers, rnnDescriptors->HelperDescriptor, ++#else + CUDNNCHECK(cudnnSetRNNDescriptor(handle, rnnDescriptors->LayerDescriptor, hiddenSize, numLayers, rnnDescriptors->HelperDescriptor, ++#endif + inputMode, direction, mode, algo, mathPrec) ); + + + +From 90baa4f6ad10076fa148f5aa06ef432bd0f34208 Mon Sep 17 00:00:00 2001 +From: Konstantin Gizdov <kgiz...@gmail.com> +Date: Tue, 21 Jul 2020 19:06:09 +0300 +Subject: [PATCH 02/10] adapt convolution forward to cuDNN 8 + +--- + .../src/DNN/Architectures/Cudnn/Propagate.cu | 77 ++++++++++++++++++- + 1 file changed, 76 insertions(+), 1 deletion(-) + +diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +index 7a57b6bf104..cc953ee45f9 100644 +--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu ++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +@@ -27,6 +27,9 @@ + // #include "Kernels.cuh"*/ + // #include <math.h> + ++// for std::numeric_limits<T>::max() ++#include <limits> ++ + namespace TMVA { + namespace DNN { + +@@ -378,7 +381,78 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + cudnnHandle_t cudnnHandle = outputTensor.GetCudnnHandle(); + + // cuDNN decides which algorithm to use +- // More detailed alternative: cudnnFindConvolutionForwardAlgorithm ++#if (CUDNN_VERSION >= 8000) ++ /** ++ * I'm sure there may be a faster way, but this works ++ */ ++ int convRequestedAlgoCount{8}; // requestedAlgoCount is setting how many algorithms to try, can be tuned, fixed for now as all available ++ cudnnConvolutionDescriptor_t tempConvDescriptor; ++ CUDDNCHECK(cudnnCreateConvolutionDescriptor(&tempConvDescriptor)); ++ cudnnTensorDescriptor_t outputTensorDescriptor; ++ CUDNNCHECK(cudnnCreateTensorDescriptor(&outputTensorDescriptor)); ++ CUDNNCHECK(cudnnSetTensor4dDescriptor(outputTensorDescriptor, ++ CUDNN_TENSOR_NCHW, // Layout of the tensor in memory ++ Tensor_t::GetDataType(), ++ (int)L->GetBatchSize(), ++ (int)L->GetDepth(), ++ (int)L->GetHeight(), ++ (int)L->GetWidth())); ++ int algoCount; ++ cudnnConvolutionFwdAlgoPerf_t convPerfResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm ++ CUDNNCHECK(cudnnFindConvolutionForwardAlgorithm( ++ cudnnHandle, ++ inputTensorDescriptor, ++ convDescriptors->WeightsDescriptor, ++ tempConvDescriptor, ++ outputTensorDescriptor, ++ convRequestedAlgoCount, ++ &algoCount, ++ &convPerfResults)); ++ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx), ++ // but we arrive at an chicken or egg problem: ++ // workspace size is calculated from chosen forward algorithm, ++ // but finding a forward algorithm depends on workspace size... ++ // i.e. ++ // Tensor_t & inputTensor = L->GetInput(); ++ // inputTensor = Tensor_t(inputTensor.GetDeviceBuffer(),{ L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth() },GetTensorLayout(),0,0); ++ // CUDNNCHECK(cudnnFindConvolutionForwardAlgorithmEx( ++ // cudnnHandle, ++ // inputTensorDescriptor, ++ // &inputTensor, ++ // convDescriptors->WeightsDescriptor, ++ // &filters, ++ // tempConvDescriptor, ++ // outputTensorDescriptor, ++ // &outputTensor, ++ // convRequestedAlgoCount, ++ // &algoCount, ++ // &convPerfResults, ++ // &convWorkspace, ++ // convWorkspace->ForwardWorkspaceSize)); ++ // instead choose either fastest or lowest memory algo as per preference ++ int algoIdx{0}; ++ if (CNNOptions::ConvMaxWorkspaceSize != 0) { // prefer fastest ++ float temp_runtime{std::numeric_limits<float>::max()}; ++ for (int i = 0; i < algoCount; ++i) { ++ if (convPerfResults[i].status != 0) continue; ++ if (convPerfResults[i].time < temp_runtime) { ++ temp_runtime = convPerfResults[i].time; ++ algoIdx = i; ++ } ++ } ++ } else { // prefer smallest workspace size ++ size_t temp_memsize{std::numeric_limits<size_t>::max()}; ++ for (int i = 0; i < algoCount; ++i) { ++ if (convPerfResults[i].status != 0) continue; ++ if (convPerfResults[i].memory < temp_memsize) { ++ temp_memsize = convPerfResults[i].memory; ++ algoIdx = i; ++ } ++ } ++ } ++ convWorkspace->AlgorithmForward = convPerfResults[algoIdx].algo; ++#else ++ // More detailed alternative: cudnnFindConvolutionForwardAlgorithm (only option in newer cuDNN versions) + cudnnConvolutionFwdPreference_t preferenceFwd = (CNNOptions::ConvMaxWorkspaceSize !=0) ? CUDNN_CONVOLUTION_FWD_PREFER_FASTEST : + CUDNN_CONVOLUTION_FWD_NO_WORKSPACE; + +@@ -389,6 +463,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + outputTensor.GetTensorDescriptor(), preferenceFwd, + memLimit, // Memory limit in bytes for mode CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT + &convWorkspace->AlgorithmForward)); ++#endif + + // Allocate memory for the convolution + //size_t workSpaceSizeInBytes = 0; + +From d9b5e2f82917e7183b9f45a49135641981741477 Mon Sep 17 00:00:00 2001 +From: Konstantin Gizdov <kgiz...@gmail.com> +Date: Tue, 21 Jul 2020 19:34:00 +0300 +Subject: [PATCH 03/10] adapt convolution backward to cuDNN 8 + +--- + .../src/DNN/Architectures/Cudnn/Propagate.cu | 72 +++++++++++++++++++ + 1 file changed, 72 insertions(+) + +diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +index cc953ee45f9..85a5c3aa175 100644 +--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu ++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +@@ -515,6 +515,77 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // dx : Activation gradient to be computed -> activationGradients [in place op] + // dy : Gradient of activation from the following layer (backpropagation)-> activationGradients + ++#if (CUDNN_VERSION >= 8000) ++ /** ++ * I'm sure there may be a faster way, but this works ++ */ ++ convRequestedAlgoCount = 6; // reset to max number of available backward algorithms ++ cudnnConvolutionDescriptor_t tempConvBwdDescriptor; ++ CUDDNCHECK(cudnnCreateConvolutionDescriptor(&tempConvBwdDescriptor)); ++ cudnnTensorDescriptor_t outputBwdTensorDescriptor; ++ CUDNNCHECK(cudnnCreateTensorDescriptor(&outputBwdTensorDescriptor)); ++ CUDNNCHECK(cudnnSetTensor4dDescriptor(outputBwdTensorDescriptor, ++ CUDNN_TENSOR_NCHW, // Layout of the tensor in memory ++ Tensor_t::GetDataType(), ++ (int)L->GetBatchSize(), ++ (int)L->GetInputDepth(), ++ (int)L->GetInputHeight(), ++ (int)L->GetInputWidth())); ++ int algoCount; ++ cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm ++ CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithm( ++ cudnnHandle, ++ convDescriptors->WeightsDescriptor, ++ activationGradientsBackwardDescriptor, ++ tempConvBwdDescriptor, ++ outputBwdTensorDescriptor, ++ convRequestedAlgoCount, ++ &algoCount, ++ &convPerfBwdResults)); ++ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx), ++ // but we arrive at an chicken or egg problem: ++ // workspace size is calculated from chosen forward algorithm, ++ // but finding a forward algorithm depends on workspace size... ++ // i.e. ++ // Tensor_t & outputBwdTensor = L->GetInput(); ++ // outputBwdTensor = Tensor_t(outputBwdTensor.GetDeviceBuffer(),{ L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth() },GetTensorLayout(),0,0); ++ // CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithmEx( ++ // cudnnHandle, ++ // convDescriptors->WeightsDescriptor, ++ // &filters, ++ // activationGradientsBackwardDescriptor, ++ // &activationGradientsBackwardTensor, ++ // tempConvBwdDescriptor, ++ // outputBwdTensorDescriptor, ++ // &outputBwdTensor, ++ // convRequestedAlgoCount, ++ // &algoCount, ++ // &convPerfBwdResults, ++ // &convWorkspace, ++ // convWorkspace->ForwardWorkspaceSize)); ++ // instead choose either fastest or lowest memory algo as per preference ++ int algoIdx{0}; ++ if (CNNOptions::ConvMaxWorkspaceSize != 0) { // prefer fastest ++ float temp_runtime{std::numeric_limits<float>::max()}; ++ for (int i = 0; i < algoCount; ++i) { ++ if (convPerfBwdResults[i].status != 0) continue; ++ if (convPerfBwdResults[i].time < temp_runtime) { ++ temp_runtime = convPerfBwdResults[i].time; ++ algoIdx = i; ++ } ++ } ++ } else { // prefer smallest workspace size ++ size_t temp_memsize{std::numeric_limits<size_t>::max()}; ++ for (int i = 0; i < algoCount; ++i) { ++ if (convPerfBwdResults[i].status != 0) continue; ++ if (convPerfBwdResults[i].memory < temp_memsize) { ++ temp_memsize = convPerfBwdResults[i].memory; ++ algoIdx = i; ++ } ++ } ++ } ++ convWorkspace->AlgorithmBackward = convPerfBwdResults[algoIdx].algo; ++#else + cudnnConvolutionBwdDataPreference_t preferenceBwdData = + (CNNOptions::ConvMaxWorkspaceSize != 0) ? CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST : CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE; + +@@ -525,6 +596,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + activationGradientsBackwardDescriptor, + preferenceBwdData, memLimit, + &convWorkspace->AlgorithmBackward)); ++#endif + + std::cout << "CONV BWD Data Algo used is " << convWorkspace->AlgorithmBackward << std::endl; + //CUDNNCHECK(cudnnSetConvolutionMathType(convDescriptors->LayerDescriptor, CUDNN_TENSOR_OP_MATH)); + +From 526b7177c0201be1d0c6b36de0772b7d2ecb90d5 Mon Sep 17 00:00:00 2001 +From: Konstantin Gizdov <kgiz...@gmail.com> +Date: Wed, 22 Jul 2020 11:50:29 +0300 +Subject: [PATCH 04/10] fix typo and re-declarations + +--- + tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +index 85a5c3aa175..1b7e3e845d8 100644 +--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu ++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +@@ -387,7 +387,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + */ + int convRequestedAlgoCount{8}; // requestedAlgoCount is setting how many algorithms to try, can be tuned, fixed for now as all available + cudnnConvolutionDescriptor_t tempConvDescriptor; +- CUDDNCHECK(cudnnCreateConvolutionDescriptor(&tempConvDescriptor)); ++ CUDNNCHECK(cudnnCreateConvolutionDescriptor(&tempConvDescriptor)); + cudnnTensorDescriptor_t outputTensorDescriptor; + CUDNNCHECK(cudnnCreateTensorDescriptor(&outputTensorDescriptor)); + CUDNNCHECK(cudnnSetTensor4dDescriptor(outputTensorDescriptor, +@@ -407,7 +407,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + outputTensorDescriptor, + convRequestedAlgoCount, + &algoCount, +- &convPerfResults)); ++ convPerfResults)); + // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx), + // but we arrive at an chicken or egg problem: + // workspace size is calculated from chosen forward algorithm, +@@ -521,7 +521,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + */ + convRequestedAlgoCount = 6; // reset to max number of available backward algorithms + cudnnConvolutionDescriptor_t tempConvBwdDescriptor; +- CUDDNCHECK(cudnnCreateConvolutionDescriptor(&tempConvBwdDescriptor)); ++ CUDNNCHECK(cudnnCreateConvolutionDescriptor(&tempConvBwdDescriptor)); + cudnnTensorDescriptor_t outputBwdTensorDescriptor; + CUDNNCHECK(cudnnCreateTensorDescriptor(&outputBwdTensorDescriptor)); + CUDNNCHECK(cudnnSetTensor4dDescriptor(outputBwdTensorDescriptor, +@@ -531,7 +531,6 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + (int)L->GetInputDepth(), + (int)L->GetInputHeight(), + (int)L->GetInputWidth())); +- int algoCount; + cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm + CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithm( + cudnnHandle, +@@ -541,7 +540,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + outputBwdTensorDescriptor, + convRequestedAlgoCount, + &algoCount, +- &convPerfBwdResults)); ++ convPerfBwdResults)); + // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx), + // but we arrive at an chicken or egg problem: + // workspace size is calculated from chosen forward algorithm, +@@ -564,7 +563,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // &convWorkspace, + // convWorkspace->ForwardWorkspaceSize)); + // instead choose either fastest or lowest memory algo as per preference +- int algoIdx{0}; ++ algoIdx = 0; + if (CNNOptions::ConvMaxWorkspaceSize != 0) { // prefer fastest + float temp_runtime{std::numeric_limits<float>::max()}; + for (int i = 0; i < algoCount; ++i) { + +From 6d84e765322a72c48de00b4a9b7471da8a15fece Mon Sep 17 00:00:00 2001 +From: Konstantin Gizdov <kgiz...@gmail.com> +Date: Wed, 22 Jul 2020 17:00:01 +0300 +Subject: [PATCH 05/10] implement workspace limits, fix an algoruthm preference + bug and rewrite relevant sections + +--- + .../src/DNN/Architectures/Cudnn/Propagate.cu | 273 ++++++++++-------- + 1 file changed, 151 insertions(+), 122 deletions(-) + +diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +index 1b7e3e845d8..2049e2b9195 100644 +--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu ++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +@@ -333,35 +333,108 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + TDescriptors * & descriptors, + const DNN::CNN::TConvParams & /*params*/, + ConvLayer_t *L) { +- auto convWorkspace = new ConvWorkspace_t (); ++ auto convWorkspace = new ConvWorkspace_t(); ++ size_t memLimit = (CNNOptions::ConvMaxWorkspaceSize > 0) ? static_cast<size_t>(CNNOptions::ConvMaxWorkspaceSize) : 0; + auto convDescriptors = static_cast<ConvDescriptors_t *>(descriptors); ++ // can we do the following and substitute below??? ++ // auto weightsDescriptor{convDescriptors->WeightsDescriptor}; ++ // auto convDescriptor{convDescriptors->LayerDescriptor}; + ++#if (CUDNN_VERSION >= 8000) ++ enum algoPreference { no_workspace, fastest, workspace_limit }; ++ algoPreference algoChoice; ++ auto choose_algo = [](algoPreference const& algoPref, auto&& perfResults, size_t memLim = std::numeric_limits<size_t>::max()) -> int { ++ int algoIdx{0}; ++ if (algoPref == algoPreference::fastest) { // prefer fastest ++ float temp_runtime{std::numeric_limits<float>::max()}; ++ for (int i = 0; i < algoCount; ++i) { ++ if (PerfResults[i].status == CUDNN_STATUS_SUCCESS && PerfResults[i].time < temp_runtime) { ++ temp_runtime = PerfResults[i].time; ++ algoIdx = i; ++ } ++ } ++ } else if (algoPref == algoPreference::workspace_limit) { // constrain to workspace size ++ float temp_runtime{std::numeric_limits<float>::max()}; ++ for (int i = 0; i < algoCount; ++i) { ++ if (PerfResults[i].status == CUDNN_STATUS_SUCCESS && PerfResults[i].time < temp_runtime && PerfResults[i].memory <= memLim) { ++ temp_runtime = PerfResults[i].time; ++ algoIdx = i; ++ } ++ } ++ } else { // prefer smallest workspace size ++ size_t temp_memsize{std::numeric_limits<size_t>::max()}; ++ for (int i = 0; i < algoCount; ++i) { ++ if (PerfResults[i].status == CUDNN_STATUS_SUCCESS && PerfResults[i].memory < temp_memsize) { ++ temp_memsize = PerfResults[i].memory; ++ algoIdx = i; ++ } ++ } ++ } ++ return algoIdx; ++ }; ++#else ++ // More detailed alternative: cudnnFindConvolutionForwardAlgorithm (only option in newer cuDNN versions) ++ cudnnConvolutionFwdPreference_t preferenceFwd; ++ cudnnConvolutionBwdDataPreference_t preferenceBwdData; ++ cudnnConvolutionBwdFilterPreference_t preferenceBwdFilter; ++#endif ++ // decide on algorithm preference early ++ if (CNNOptions::ConvMaxWorkspaceSize < 0) { ++ // no workspace case ++#if (CUDNN_VERSION >= 8000) ++ algoChoice = no_workspace; ++#else ++ preferenceFwd = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE; ++ preferenceBwdData = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE; ++ preferenceBwdFilter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE; ++#endif ++ ++ } else if (CNNOptions::ConvMaxWorkspaceSize == 0) { ++ // fastest overall ++#if (CUDNN_VERSION >= 8000) ++ algoChoice = fastest; ++#else ++ preferenceFwd = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST; ++ preferenceBwdData = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST; ++ preferenceBwdFilter = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST; ++#endif ++ ++ } else { ++ // fastest in memory limit ++#if (CUDNN_VERSION >= 8000) ++ algoChoice = workspace_limit; ++#else ++ preferenceFwd = CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT; ++ preferenceBwdData = CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT; ++ preferenceBwdFilter = CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT; ++#endif ++ } + // fix the weight tensor shapes + // by default the weights are columnmajor, set them to be row major . At this points + // they are not yet initialized + Tensor_t & filters = L->GetWeightsAt(0); +- filters = Tensor_t (filters.GetDeviceBuffer(), {L->GetDepth(),L->GetInputDepth(), L->GetFilterHeight(),L->GetFilterWidth()}, MemoryLayout::RowMajor, 0, 0 ); +- //PrintTensor(L->GetWeightsAt(0)); ++ filters = Tensor_t(filters.GetDeviceBuffer(), {L->GetDepth(), L->GetInputDepth(), L->GetFilterHeight(), L->GetFilterWidth()}, MemoryLayout::RowMajor, 0, 0); ++ // PrintTensor(L->GetWeightsAt(0)); + Tensor_t & biases = L->GetBiasesAt(0); +- biases = Tensor_t (biases.GetDeviceBuffer(), {1, L->GetDepth(),1,1}, GetTensorLayout(), 0, 0 ); ++ biases = Tensor_t(biases.GetDeviceBuffer(), {1, L->GetDepth(), 1, 1}, GetTensorLayout(), 0, 0); + + Tensor_t & outputTensor = L->GetOutput(); +- outputTensor = Tensor_t(outputTensor.GetDeviceBuffer(),{ L->GetBatchSize(), L->GetDepth(), L->GetHeight(), L->GetWidth() },GetTensorLayout(),0,0 ); ++ outputTensor = Tensor_t(outputTensor.GetDeviceBuffer(), {L->GetBatchSize(), L->GetDepth(), L->GetHeight(), L->GetWidth()}, GetTensorLayout(), 0, 0); + Tensor_t & inputActivation = L->GetInputActivation(); +- inputActivation = Tensor_t(inputActivation.GetDeviceBuffer(),outputTensor.GetShape() ,GetTensorLayout(),0,0 ); ++ inputActivation = Tensor_t(inputActivation.GetDeviceBuffer(),outputTensor.GetShape() ,GetTensorLayout(), 0, 0); + + Tensor_t & activationGradients = L->GetActivationGradients(); +- activationGradients = Tensor_t(activationGradients.GetDeviceBuffer(),outputTensor.GetShape() ,GetTensorLayout(),0,0 ); ++ activationGradients = Tensor_t(activationGradients.GetDeviceBuffer(),outputTensor.GetShape(), GetTensorLayout(), 0, 0); + + Tensor_t & weightGradients = L->GetWeightGradientsAt(0); +- weightGradients = Tensor_t( weightGradients.GetDeviceBuffer(), filters.GetShape(), GetTensorLayout(), 0, 0 ); ++ weightGradients = Tensor_t(weightGradients.GetDeviceBuffer(), filters.GetShape(), GetTensorLayout(), 0, 0); + + Tensor_t & biasGradients = L->GetBiasGradientsAt(0); +- biasGradients = Tensor_t( biasGradients.GetDeviceBuffer(), biases.GetShape(), GetTensorLayout(), 0, 0 ); ++ biasGradients = Tensor_t(biasGradients.GetDeviceBuffer(), biases.GetShape(), GetTensorLayout(), 0, 0); + + + // FIXME: Use descriptors instead (Tensor device memory is otherwise allocated during initialization) +- //Tensor_t inputTensor ({L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth()}, MemoryLayout::RowMajor, 0, 0); ++ // Tensor_t inputTensor ({L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth()}, MemoryLayout::RowMajor, 0, 0); + cudnnTensorDescriptor_t inputTensorDescriptor; + CUDNNCHECK(cudnnCreateTensorDescriptor(&inputTensorDescriptor) ); + CUDNNCHECK(cudnnSetTensor4dDescriptor(inputTensorDescriptor, +@@ -385,79 +458,44 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + /** + * I'm sure there may be a faster way, but this works + */ +- int convRequestedAlgoCount{8}; // requestedAlgoCount is setting how many algorithms to try, can be tuned, fixed for now as all available +- cudnnConvolutionDescriptor_t tempConvDescriptor; +- CUDNNCHECK(cudnnCreateConvolutionDescriptor(&tempConvDescriptor)); +- cudnnTensorDescriptor_t outputTensorDescriptor; +- CUDNNCHECK(cudnnCreateTensorDescriptor(&outputTensorDescriptor)); +- CUDNNCHECK(cudnnSetTensor4dDescriptor(outputTensorDescriptor, +- CUDNN_TENSOR_NCHW, // Layout of the tensor in memory +- Tensor_t::GetDataType(), +- (int)L->GetBatchSize(), +- (int)L->GetDepth(), +- (int)L->GetHeight(), +- (int)L->GetWidth())); ++ int convRequestedAlgoCount{8}; // requestedAlgoCount is setting how many algorithms to try, can be tuned, fixed for now as all available ++ + int algoCount; + cudnnConvolutionFwdAlgoPerf_t convPerfResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm +- CUDNNCHECK(cudnnFindConvolutionForwardAlgorithm( +- cudnnHandle, +- inputTensorDescriptor, +- convDescriptors->WeightsDescriptor, +- tempConvDescriptor, +- outputTensorDescriptor, +- convRequestedAlgoCount, +- &algoCount, +- convPerfResults)); ++ CUDNNCHECK( ++ cudnnFindConvolutionForwardAlgorithm( ++ cudnnHandle, ++ inputTensorDescriptor, ++ convDescriptors->WeightsDescriptor, ++ convDescriptors->LayerDescriptor, ++ outputTensor.GetTensorDescriptor(), ++ convRequestedAlgoCount, ++ &algoCount, ++ convPerfResults ++ ) ++ ); + // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx), +- // but we arrive at an chicken or egg problem: +- // workspace size is calculated from chosen forward algorithm, +- // but finding a forward algorithm depends on workspace size... + // i.e. +- // Tensor_t & inputTensor = L->GetInput(); +- // inputTensor = Tensor_t(inputTensor.GetDeviceBuffer(),{ L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth() },GetTensorLayout(),0,0); ++ // create an input tensor before the inputTensorDescriptor ++ // and get the descriptor from there ++ // Tensor_t inputTensor({L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth()}, MemoryLayout::RowMajor, 0, 0); + // CUDNNCHECK(cudnnFindConvolutionForwardAlgorithmEx( + // cudnnHandle, +- // inputTensorDescriptor, ++ // inputTensor.GetTensorDescriptor(), + // &inputTensor, + // convDescriptors->WeightsDescriptor, + // &filters, +- // tempConvDescriptor, +- // outputTensorDescriptor, ++ // convDescriptors->LayerDescriptor, ++ // outputTensor.GetTensorDescriptor(), + // &outputTensor, + // convRequestedAlgoCount, + // &algoCount, + // &convPerfResults, + // &convWorkspace, +- // convWorkspace->ForwardWorkspaceSize)); ++ // memLimit)); // use memLimit for workspace size + // instead choose either fastest or lowest memory algo as per preference +- int algoIdx{0}; +- if (CNNOptions::ConvMaxWorkspaceSize != 0) { // prefer fastest +- float temp_runtime{std::numeric_limits<float>::max()}; +- for (int i = 0; i < algoCount; ++i) { +- if (convPerfResults[i].status != 0) continue; +- if (convPerfResults[i].time < temp_runtime) { +- temp_runtime = convPerfResults[i].time; +- algoIdx = i; +- } +- } +- } else { // prefer smallest workspace size +- size_t temp_memsize{std::numeric_limits<size_t>::max()}; +- for (int i = 0; i < algoCount; ++i) { +- if (convPerfResults[i].status != 0) continue; +- if (convPerfResults[i].memory < temp_memsize) { +- temp_memsize = convPerfResults[i].memory; +- algoIdx = i; +- } +- } +- } +- convWorkspace->AlgorithmForward = convPerfResults[algoIdx].algo; ++ convWorkspace->AlgorithmForward = convPerfResults[choose_algo(algoChoice, convPerfResults, memLimit)].algo; + #else +- // More detailed alternative: cudnnFindConvolutionForwardAlgorithm (only option in newer cuDNN versions) +- cudnnConvolutionFwdPreference_t preferenceFwd = (CNNOptions::ConvMaxWorkspaceSize !=0) ? CUDNN_CONVOLUTION_FWD_PREFER_FASTEST : +- CUDNN_CONVOLUTION_FWD_NO_WORKSPACE; +- +- size_t memLimit = (CNNOptions::ConvMaxWorkspaceSize > 0) ? (size_t) CNNOptions::ConvMaxWorkspaceSize : 0; +- + CUDNNCHECK(cudnnGetConvolutionForwardAlgorithm( + cudnnHandle, inputTensorDescriptor, convDescriptors->WeightsDescriptor, convDescriptors->LayerDescriptor, + outputTensor.GetTensorDescriptor(), preferenceFwd, +@@ -519,75 +557,36 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + /** + * I'm sure there may be a faster way, but this works + */ +- convRequestedAlgoCount = 6; // reset to max number of available backward algorithms +- cudnnConvolutionDescriptor_t tempConvBwdDescriptor; +- CUDNNCHECK(cudnnCreateConvolutionDescriptor(&tempConvBwdDescriptor)); +- cudnnTensorDescriptor_t outputBwdTensorDescriptor; +- CUDNNCHECK(cudnnCreateTensorDescriptor(&outputBwdTensorDescriptor)); +- CUDNNCHECK(cudnnSetTensor4dDescriptor(outputBwdTensorDescriptor, +- CUDNN_TENSOR_NCHW, // Layout of the tensor in memory +- Tensor_t::GetDataType(), +- (int)L->GetBatchSize(), +- (int)L->GetInputDepth(), +- (int)L->GetInputHeight(), +- (int)L->GetInputWidth())); +- cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm ++ convRequestedAlgoCount = 6; // reset to max number of available backward algorithms ++ cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdDataResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm + CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithm( + cudnnHandle, + convDescriptors->WeightsDescriptor, ++ activationGradients.GetTensorDescriptor(), ++ convDescriptors->LayerDescriptor, + activationGradientsBackwardDescriptor, +- tempConvBwdDescriptor, +- outputBwdTensorDescriptor, + convRequestedAlgoCount, + &algoCount, +- convPerfBwdResults)); ++ convPerfBwdDataResults)); + // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx), +- // but we arrive at an chicken or egg problem: +- // workspace size is calculated from chosen forward algorithm, +- // but finding a forward algorithm depends on workspace size... + // i.e. +- // Tensor_t & outputBwdTensor = L->GetInput(); +- // outputBwdTensor = Tensor_t(outputBwdTensor.GetDeviceBuffer(),{ L->GetBatchSize(), L->GetInputDepth(), L->GetInputHeight(), L->GetInputWidth() },GetTensorLayout(),0,0); + // CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithmEx( + // cudnnHandle, + // convDescriptors->WeightsDescriptor, + // &filters, ++ // activationGradients.GetTensorDescriptor(), ++ // &activationGradients, ++ // convDescriptors->LayerDescriptor, + // activationGradientsBackwardDescriptor, +- // &activationGradientsBackwardTensor, +- // tempConvBwdDescriptor, +- // outputBwdTensorDescriptor, +- // &outputBwdTensor, ++ // &inputTensor, + // convRequestedAlgoCount, + // &algoCount, + // &convPerfBwdResults, + // &convWorkspace, +- // convWorkspace->ForwardWorkspaceSize)); ++ // memLimit)); // use memLimit for workspace size + // instead choose either fastest or lowest memory algo as per preference +- algoIdx = 0; +- if (CNNOptions::ConvMaxWorkspaceSize != 0) { // prefer fastest +- float temp_runtime{std::numeric_limits<float>::max()}; +- for (int i = 0; i < algoCount; ++i) { +- if (convPerfBwdResults[i].status != 0) continue; +- if (convPerfBwdResults[i].time < temp_runtime) { +- temp_runtime = convPerfBwdResults[i].time; +- algoIdx = i; +- } +- } +- } else { // prefer smallest workspace size +- size_t temp_memsize{std::numeric_limits<size_t>::max()}; +- for (int i = 0; i < algoCount; ++i) { +- if (convPerfBwdResults[i].status != 0) continue; +- if (convPerfBwdResults[i].memory < temp_memsize) { +- temp_memsize = convPerfBwdResults[i].memory; +- algoIdx = i; +- } +- } +- } +- convWorkspace->AlgorithmBackward = convPerfBwdResults[algoIdx].algo; ++ convWorkspace->AlgorithmBackward = convPerfBwdDataResults[choose_algo(algoChoice, convPerfBwdDataResults, memLimit)].algo; + #else +- cudnnConvolutionBwdDataPreference_t preferenceBwdData = +- (CNNOptions::ConvMaxWorkspaceSize != 0) ? CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST : CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE; +- + CUDNNCHECK(cudnnGetConvolutionBackwardDataAlgorithm(cudnnHandle, + convDescriptors->WeightsDescriptor, + activationGradients.GetTensorDescriptor(), +@@ -628,11 +627,40 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // here should be able to use inputTensorDescriptor + cudnnTensorDescriptor_t activationBackwardDescriptor = inputTensorDescriptor; + +- // cudnnConvolutionBwdFilterPreference_t preference = +- cudnnConvolutionBwdFilterPreference_t preferenceBwdFilter = (CNNOptions::ConvMaxWorkspaceSize != 0) +- ? CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE +- : CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST; +- ++#if (CUDNN_VERSION >= 8000) ++ /** ++ * I'm sure there may be a faster way, but this works ++ */ ++ convRequestedAlgoCount = 6; // reset to max number of available backward algorithms ++ cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdFilterResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm ++ CUDNNCHECK(cudnnFindConvolutionBackwardFilterAlgorithm( ++ cudnnHandle, ++ activationBackwardDescriptor, ++ activationGradients.GetTensorDescriptor(), ++ convDescriptors->LayerDescriptor, ++ convDescriptors->WeightsDescriptor, ++ convRequestedAlgoCount, ++ &algoCount, ++ convPerfBwdFilterResults)); ++ // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx), ++ // i.e. ++ // CUDNNCHECK(cudnnFindConvolutionBackwardFilterAlgorithmEx( ++ // cudnnHandle, ++ // activationBackwardDescriptor, ++ // &inputTensor, ++ // activationGradients.GetTensorDescriptor(), ++ // &activationGradients, ++ // convDescriptors->LayerDescriptor, ++ // convDescriptors->WeightsDescriptor, ++ // &filters, ++ // convRequestedAlgoCount, ++ // &algoCount, ++ // &convPerfBwdFilterResults, ++ // &convWorkspace, ++ // memLimit)); // use memLimit for workspace size ++ // instead choose either fastest or lowest memory algo as per preference ++ convWorkspace->AlgorithmBackward = convPerfBwdFilterResults[choose_algo(algoChoice, convPerfBwdFilterResults, memLimit)].algo; ++#else + CUDNNCHECK(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle, + activationBackwardDescriptor, + activationGradients.GetTensorDescriptor(), +@@ -641,6 +669,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + preferenceBwdFilter, + memLimit, + &convWorkspace->HelperAlgorithm)); ++#endif + + std::cout << "CONV BWD Filter Algo used is " << convWorkspace->HelperAlgorithm << std::endl; + + +From a9d39cc9ccf9ae474d90b6671d3e0d69d4cf6872 Mon Sep 17 00:00:00 2001 +From: Konstantin Gizdov <kgiz...@gmail.com> +Date: Wed, 22 Jul 2020 17:11:30 +0300 +Subject: [PATCH 06/10] implement correct logic behind cudnn logarithm + preference + +--- + .../src/DNN/Architectures/Cudnn/Propagate.cu | 20 +++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +index 2049e2b9195..b74c99d1a99 100644 +--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu ++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +@@ -380,18 +380,8 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + #endif + // decide on algorithm preference early + if (CNNOptions::ConvMaxWorkspaceSize < 0) { +- // no workspace case + #if (CUDNN_VERSION >= 8000) +- algoChoice = no_workspace; +-#else +- preferenceFwd = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE; +- preferenceBwdData = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE; +- preferenceBwdFilter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE; +-#endif +- +- } else if (CNNOptions::ConvMaxWorkspaceSize == 0) { + // fastest overall +-#if (CUDNN_VERSION >= 8000) + algoChoice = fastest; + #else + preferenceFwd = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST; +@@ -399,6 +389,16 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + preferenceBwdFilter = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST; + #endif + ++ } else if (CNNOptions::ConvMaxWorkspaceSize == 0) { ++ // no workspace case ++#if (CUDNN_VERSION >= 8000) ++ algoChoice = no_workspace; ++#else ++ preferenceFwd = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE; ++ preferenceBwdData = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE; ++ preferenceBwdFilter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE; ++#endif ++ + } else { + // fastest in memory limit + #if (CUDNN_VERSION >= 8000) + +From 6282dfa816c7f51af5c0ecaa0065514e3f627631 Mon Sep 17 00:00:00 2001 +From: Konstantin Gizdov <kgiz...@gmail.com> +Date: Wed, 22 Jul 2020 18:51:56 +0300 +Subject: [PATCH 07/10] use decltype instead of auto, fix typos + +--- + .../src/DNN/Architectures/Cudnn/Propagate.cu | 22 +++++++++---------- + 1 file changed, 11 insertions(+), 11 deletions(-) + +diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +index b74c99d1a99..6cefd72c099 100644 +--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu ++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +@@ -343,29 +343,29 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + #if (CUDNN_VERSION >= 8000) + enum algoPreference { no_workspace, fastest, workspace_limit }; + algoPreference algoChoice; +- auto choose_algo = [](algoPreference const& algoPref, auto&& perfResults, size_t memLim = std::numeric_limits<size_t>::max()) -> int { ++ auto choose_algo = [](algoPreference const& algoPref, int const algoCount, decltype(perfResults) const& perfResults, size_t memLim = std::numeric_limits<size_t>::max()) -> int { + int algoIdx{0}; + if (algoPref == algoPreference::fastest) { // prefer fastest + float temp_runtime{std::numeric_limits<float>::max()}; + for (int i = 0; i < algoCount; ++i) { +- if (PerfResults[i].status == CUDNN_STATUS_SUCCESS && PerfResults[i].time < temp_runtime) { +- temp_runtime = PerfResults[i].time; ++ if (perfResults[i].status == CUDNN_STATUS_SUCCESS && perfResults[i].time < temp_runtime) { ++ temp_runtime = perfResults[i].time; + algoIdx = i; + } + } + } else if (algoPref == algoPreference::workspace_limit) { // constrain to workspace size + float temp_runtime{std::numeric_limits<float>::max()}; + for (int i = 0; i < algoCount; ++i) { +- if (PerfResults[i].status == CUDNN_STATUS_SUCCESS && PerfResults[i].time < temp_runtime && PerfResults[i].memory <= memLim) { +- temp_runtime = PerfResults[i].time; ++ if (perfResults[i].status == CUDNN_STATUS_SUCCESS && perfResults[i].time < temp_runtime && perfResults[i].memory <= memLim) { ++ temp_runtime = perfResults[i].time; + algoIdx = i; + } + } + } else { // prefer smallest workspace size + size_t temp_memsize{std::numeric_limits<size_t>::max()}; + for (int i = 0; i < algoCount; ++i) { +- if (PerfResults[i].status == CUDNN_STATUS_SUCCESS && PerfResults[i].memory < temp_memsize) { +- temp_memsize = PerfResults[i].memory; ++ if (perfResults[i].status == CUDNN_STATUS_SUCCESS && perfResults[i].memory < temp_memsize) { ++ temp_memsize = perfResults[i].memory; + algoIdx = i; + } + } +@@ -494,7 +494,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // &convWorkspace, + // memLimit)); // use memLimit for workspace size + // instead choose either fastest or lowest memory algo as per preference +- convWorkspace->AlgorithmForward = convPerfResults[choose_algo(algoChoice, convPerfResults, memLimit)].algo; ++ convWorkspace->AlgorithmForward = convPerfResults[choose_algo(algoChoice, algoCount, convPerfResults, memLimit)].algo; + #else + CUDNNCHECK(cudnnGetConvolutionForwardAlgorithm( + cudnnHandle, inputTensorDescriptor, convDescriptors->WeightsDescriptor, convDescriptors->LayerDescriptor, +@@ -585,7 +585,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // &convWorkspace, + // memLimit)); // use memLimit for workspace size + // instead choose either fastest or lowest memory algo as per preference +- convWorkspace->AlgorithmBackward = convPerfBwdDataResults[choose_algo(algoChoice, convPerfBwdDataResults, memLimit)].algo; ++ convWorkspace->AlgorithmBackward = convPerfBwdDataResults[choose_algo(algoChoice, algoCount, convPerfBwdDataResults, memLimit)].algo; + #else + CUDNNCHECK(cudnnGetConvolutionBackwardDataAlgorithm(cudnnHandle, + convDescriptors->WeightsDescriptor, +@@ -632,7 +632,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + * I'm sure there may be a faster way, but this works + */ + convRequestedAlgoCount = 6; // reset to max number of available backward algorithms +- cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdFilterResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm ++ cudnnConvolutionBwdFilterAlgoPerf_t convPerfBwdFilterResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm + CUDNNCHECK(cudnnFindConvolutionBackwardFilterAlgorithm( + cudnnHandle, + activationBackwardDescriptor, +@@ -659,7 +659,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // &convWorkspace, + // memLimit)); // use memLimit for workspace size + // instead choose either fastest or lowest memory algo as per preference +- convWorkspace->AlgorithmBackward = convPerfBwdFilterResults[choose_algo(algoChoice, convPerfBwdFilterResults, memLimit)].algo; ++ convWorkspace->AlgorithmBackward = convPerfBwdFilterResults[choose_algo(algoChoice, algoCount, convPerfBwdFilterResults, memLimit)].algo; + #else + CUDNNCHECK(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle, + activationBackwardDescriptor, + +From 259c1c9c4d86391d1987f6635a2aece8cae587ac Mon Sep 17 00:00:00 2001 +From: Konstantin Gizdov <kgiz...@gmail.com> +Date: Wed, 22 Jul 2020 19:39:40 +0300 +Subject: [PATCH 08/10] assign backward filter algo to correct place + +--- + tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +index 6cefd72c099..5a80dfbc03d 100644 +--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu ++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +@@ -659,7 +659,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // &convWorkspace, + // memLimit)); // use memLimit for workspace size + // instead choose either fastest or lowest memory algo as per preference +- convWorkspace->AlgorithmBackward = convPerfBwdFilterResults[choose_algo(algoChoice, algoCount, convPerfBwdFilterResults, memLimit)].algo; ++ convWorkspace->HelperAlgorithm = convPerfBwdFilterResults[choose_algo(algoChoice, algoCount, convPerfBwdFilterResults, memLimit)].algo; + #else + CUDNNCHECK(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle, + activationBackwardDescriptor, + +From 2c109efea0e970b380a62f6102a286542676912a Mon Sep 17 00:00:00 2001 +From: Konstantin Gizdov <kgiz...@gmail.com> +Date: Thu, 23 Jul 2020 17:58:58 +0300 +Subject: [PATCH 09/10] make it compile and support C++11 + +--- + .../src/DNN/Architectures/Cudnn/Propagate.cu | 49 ++++++++++++------- + 1 file changed, 30 insertions(+), 19 deletions(-) + +diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +index 5a80dfbc03d..66ce64a5efc 100644 +--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu ++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +@@ -343,29 +343,37 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + #if (CUDNN_VERSION >= 8000) + enum algoPreference { no_workspace, fastest, workspace_limit }; + algoPreference algoChoice; +- auto choose_algo = [](algoPreference const& algoPref, int const algoCount, decltype(perfResults) const& perfResults, size_t memLim = std::numeric_limits<size_t>::max()) -> int { ++ // C++11 lambdas cannot be templated, so we have to do this HORRIBLE stuff... ++ union LocalPerf_t { ++ // these three type are absolutely equivalent ++ // and one can access them as they wish to get info ++ cudnnConvolutionFwdAlgoPerf_t * fwd; ++ cudnnConvolutionBwdFilterAlgoPerf_t * bwdFilter; ++ cudnnConvolutionBwdDataAlgoPerf_t * bwdData; ++ }; ++ auto choose_algo = [](algoPreference const & algoPref, int const algoCount, LocalPerf_t const & perfResults, size_t memLim = std::numeric_limits<size_t>::max()) -> int { + int algoIdx{0}; + if (algoPref == algoPreference::fastest) { // prefer fastest + float temp_runtime{std::numeric_limits<float>::max()}; + for (int i = 0; i < algoCount; ++i) { +- if (perfResults[i].status == CUDNN_STATUS_SUCCESS && perfResults[i].time < temp_runtime) { +- temp_runtime = perfResults[i].time; ++ if (perfResults.fwd[i].status == CUDNN_STATUS_SUCCESS && perfResults.fwd[i].time < temp_runtime) { ++ temp_runtime = perfResults.fwd[i].time; + algoIdx = i; + } + } + } else if (algoPref == algoPreference::workspace_limit) { // constrain to workspace size + float temp_runtime{std::numeric_limits<float>::max()}; + for (int i = 0; i < algoCount; ++i) { +- if (perfResults[i].status == CUDNN_STATUS_SUCCESS && perfResults[i].time < temp_runtime && perfResults[i].memory <= memLim) { +- temp_runtime = perfResults[i].time; ++ if (perfResults.fwd[i].status == CUDNN_STATUS_SUCCESS && perfResults.fwd[i].time < temp_runtime && perfResults.fwd[i].memory <= memLim) { ++ temp_runtime = perfResults.fwd[i].time; + algoIdx = i; + } + } + } else { // prefer smallest workspace size + size_t temp_memsize{std::numeric_limits<size_t>::max()}; + for (int i = 0; i < algoCount; ++i) { +- if (perfResults[i].status == CUDNN_STATUS_SUCCESS && perfResults[i].memory < temp_memsize) { +- temp_memsize = perfResults[i].memory; ++ if (perfResults.fwd[i].status == CUDNN_STATUS_SUCCESS && perfResults.fwd[i].memory < temp_memsize) { ++ temp_memsize = perfResults.fwd[i].memory; + algoIdx = i; + } + } +@@ -461,7 +469,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + int convRequestedAlgoCount{8}; // requestedAlgoCount is setting how many algorithms to try, can be tuned, fixed for now as all available + + int algoCount; +- cudnnConvolutionFwdAlgoPerf_t convPerfResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm ++ cudnnConvolutionFwdAlgoPerf_t convFwdPerfResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm + CUDNNCHECK( + cudnnFindConvolutionForwardAlgorithm( + cudnnHandle, +@@ -471,7 +479,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + outputTensor.GetTensorDescriptor(), + convRequestedAlgoCount, + &algoCount, +- convPerfResults ++ convFwdPerfResults + ) + ); + // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx), +@@ -490,11 +498,12 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // &outputTensor, + // convRequestedAlgoCount, + // &algoCount, +- // &convPerfResults, ++ // &convFwdPerfResults, + // &convWorkspace, + // memLimit)); // use memLimit for workspace size + // instead choose either fastest or lowest memory algo as per preference +- convWorkspace->AlgorithmForward = convPerfResults[choose_algo(algoChoice, algoCount, convPerfResults, memLimit)].algo; ++ LocalPerf_t fwdPerfResults{convFwdPerfResults}; ++ convWorkspace->AlgorithmForward = convFwdPerfResults[choose_algo(algoChoice, algoCount, fwdPerfResults, memLimit)].algo; + #else + CUDNNCHECK(cudnnGetConvolutionForwardAlgorithm( + cudnnHandle, inputTensorDescriptor, convDescriptors->WeightsDescriptor, convDescriptors->LayerDescriptor, +@@ -558,7 +567,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + * I'm sure there may be a faster way, but this works + */ + convRequestedAlgoCount = 6; // reset to max number of available backward algorithms +- cudnnConvolutionBwdDataAlgoPerf_t convPerfBwdDataResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm ++ cudnnConvolutionBwdDataAlgoPerf_t convBwdDataPerfResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm + CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithm( + cudnnHandle, + convDescriptors->WeightsDescriptor, +@@ -567,7 +576,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + activationGradientsBackwardDescriptor, + convRequestedAlgoCount, + &algoCount, +- convPerfBwdDataResults)); ++ convBwdDataPerfResults)); + // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx), + // i.e. + // CUDNNCHECK(cudnnFindConvolutionBackwardDataAlgorithmEx( +@@ -581,11 +590,12 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // &inputTensor, + // convRequestedAlgoCount, + // &algoCount, +- // &convPerfBwdResults, ++ // &convBwdDataPerfResults, + // &convWorkspace, + // memLimit)); // use memLimit for workspace size + // instead choose either fastest or lowest memory algo as per preference +- convWorkspace->AlgorithmBackward = convPerfBwdDataResults[choose_algo(algoChoice, algoCount, convPerfBwdDataResults, memLimit)].algo; ++ LocalPerf_t bwdDataPerfResults{convBwdDataPerfResults}; ++ convWorkspace->AlgorithmBackward = convBwdDataPerfResults[choose_algo(algoChoice, algoCount, bwdDataPerfResults, memLimit)].algo; + #else + CUDNNCHECK(cudnnGetConvolutionBackwardDataAlgorithm(cudnnHandle, + convDescriptors->WeightsDescriptor, +@@ -632,7 +642,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + * I'm sure there may be a faster way, but this works + */ + convRequestedAlgoCount = 6; // reset to max number of available backward algorithms +- cudnnConvolutionBwdFilterAlgoPerf_t convPerfBwdFilterResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm ++ cudnnConvolutionBwdFilterAlgoPerf_t convBwdFilterPerfResults[convRequestedAlgoCount]; // this will store metrics to choose convolution algorithm + CUDNNCHECK(cudnnFindConvolutionBackwardFilterAlgorithm( + cudnnHandle, + activationBackwardDescriptor, +@@ -641,7 +651,7 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + convDescriptors->WeightsDescriptor, + convRequestedAlgoCount, + &algoCount, +- convPerfBwdFilterResults)); ++ convBwdFilterPerfResults)); + // we could also do it with the expert mode (cudnnFindConvolutionForwardAlgorithmEx), + // i.e. + // CUDNNCHECK(cudnnFindConvolutionBackwardFilterAlgorithmEx( +@@ -655,11 +665,12 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // &filters, + // convRequestedAlgoCount, + // &algoCount, +- // &convPerfBwdFilterResults, ++ // &convBwdFilterPerfResults, + // &convWorkspace, + // memLimit)); // use memLimit for workspace size + // instead choose either fastest or lowest memory algo as per preference +- convWorkspace->HelperAlgorithm = convPerfBwdFilterResults[choose_algo(algoChoice, algoCount, convPerfBwdFilterResults, memLimit)].algo; ++ LocalPerf_t bwdFilterPerfResults{convBwdFilterPerfResults}; ++ convWorkspace->HelperAlgorithm = convBwdFilterPerfResults[choose_algo(algoChoice, algoCount, bwdFilterPerfResults, memLimit)].algo; + #else + CUDNNCHECK(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle, + activationBackwardDescriptor, + +From 1f1dfbbac06c29df98bdebdd9367bf566f2e7ce8 Mon Sep 17 00:00:00 2001 +From: Konstantin Gizdov <kgiz...@gmail.com> +Date: Thu, 23 Jul 2020 21:37:33 +0300 +Subject: [PATCH 10/10] compiles completely + +--- + .../src/DNN/Architectures/Cudnn/Propagate.cu | 83 ++++++++++--------- + 1 file changed, 46 insertions(+), 37 deletions(-) + +diff --git a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +index 66ce64a5efc..0694369860a 100644 +--- a/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu ++++ b/tmva/tmva/src/DNN/Architectures/Cudnn/Propagate.cu +@@ -344,41 +344,50 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + enum algoPreference { no_workspace, fastest, workspace_limit }; + algoPreference algoChoice; + // C++11 lambdas cannot be templated, so we have to do this HORRIBLE stuff... +- union LocalPerf_t { +- // these three type are absolutely equivalent +- // and one can access them as they wish to get info +- cudnnConvolutionFwdAlgoPerf_t * fwd; +- cudnnConvolutionBwdFilterAlgoPerf_t * bwdFilter; +- cudnnConvolutionBwdDataAlgoPerf_t * bwdData; +- }; +- auto choose_algo = [](algoPreference const & algoPref, int const algoCount, LocalPerf_t const & perfResults, size_t memLim = std::numeric_limits<size_t>::max()) -> int { +- int algoIdx{0}; +- if (algoPref == algoPreference::fastest) { // prefer fastest +- float temp_runtime{std::numeric_limits<float>::max()}; +- for (int i = 0; i < algoCount; ++i) { +- if (perfResults.fwd[i].status == CUDNN_STATUS_SUCCESS && perfResults.fwd[i].time < temp_runtime) { +- temp_runtime = perfResults.fwd[i].time; +- algoIdx = i; ++ class LocalPerf { ++ public: ++ LocalPerf(cudnnConvolutionFwdAlgoPerf_t * fwd) {m_fwd = fwd;} ++ LocalPerf(cudnnConvolutionBwdFilterAlgoPerf_t * bwdFilter) {m_bwdFilter = bwdFilter;} ++ LocalPerf(cudnnConvolutionBwdDataAlgoPerf_t * bwdData) {m_bwdData = bwdData;} ++ size_t getMemory(int i) {return m_fwd != nullptr ? m_fwd[i].memory : m_bwdFilter != nullptr ? m_bwdFilter[i].memory : m_bwdData != nullptr ? m_bwdData[i].memory : 0;} ++ float getTime(int i) {return m_fwd != nullptr ? m_fwd[i].time : m_bwdFilter != nullptr ? m_bwdFilter[i].time : m_bwdData != nullptr ? m_bwdData[i].time : 0;} ++ cudnnStatus_t getStatus(int i) {return m_fwd != nullptr ? m_fwd[i].status : m_bwdFilter != nullptr ? m_bwdFilter[i].status : m_bwdData != nullptr ? m_bwdData[i].status : CUDNN_STATUS_BAD_PARAM;} ++ int getIdx(algoPreference const & algoPref, int const algoCount, size_t memLim = std::numeric_limits<size_t>::max()) { ++ int algoIdx{0}; ++ if (algoPref == algoPreference::fastest) { // prefer fastest ++ float temp_runtime{std::numeric_limits<float>::max()}; ++ for (int i = 0; i < algoCount; ++i) { ++ if (getStatus(i) == CUDNN_STATUS_SUCCESS && getTime(i) < temp_runtime) { ++ temp_runtime = getTime(i); ++ algoIdx = i; ++ } + } +- } +- } else if (algoPref == algoPreference::workspace_limit) { // constrain to workspace size +- float temp_runtime{std::numeric_limits<float>::max()}; +- for (int i = 0; i < algoCount; ++i) { +- if (perfResults.fwd[i].status == CUDNN_STATUS_SUCCESS && perfResults.fwd[i].time < temp_runtime && perfResults.fwd[i].memory <= memLim) { +- temp_runtime = perfResults.fwd[i].time; +- algoIdx = i; ++ } else if (algoPref == algoPreference::workspace_limit) { // constrain to workspace size ++ float temp_runtime{std::numeric_limits<float>::max()}; ++ for (int i = 0; i < algoCount; ++i) { ++ if (getStatus(i) == CUDNN_STATUS_SUCCESS && getTime(i) < temp_runtime && getMemory(i) <= memLim) { ++ temp_runtime = getTime(i); ++ algoIdx = i; ++ } + } +- } +- } else { // prefer smallest workspace size +- size_t temp_memsize{std::numeric_limits<size_t>::max()}; +- for (int i = 0; i < algoCount; ++i) { +- if (perfResults.fwd[i].status == CUDNN_STATUS_SUCCESS && perfResults.fwd[i].memory < temp_memsize) { +- temp_memsize = perfResults.fwd[i].memory; +- algoIdx = i; ++ } else { // prefer smallest workspace size ++ size_t temp_memsize{std::numeric_limits<size_t>::max()}; ++ for (int i = 0; i < algoCount; ++i) { ++ if (getStatus(i) == CUDNN_STATUS_SUCCESS && getMemory(i) < temp_memsize) { ++ temp_memsize = getMemory(i); ++ algoIdx = i; ++ } + } + } +- } +- return algoIdx; ++ return algoIdx; ++ }; ++ private: ++ LocalPerf(); ++ // these three type are absolutely equivalent ++ // and one can access them as they wish to get info ++ cudnnConvolutionFwdAlgoPerf_t * m_fwd; ++ cudnnConvolutionBwdFilterAlgoPerf_t * m_bwdFilter; ++ cudnnConvolutionBwdDataAlgoPerf_t * m_bwdData; + }; + #else + // More detailed alternative: cudnnFindConvolutionForwardAlgorithm (only option in newer cuDNN versions) +@@ -502,8 +511,8 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // &convWorkspace, + // memLimit)); // use memLimit for workspace size + // instead choose either fastest or lowest memory algo as per preference +- LocalPerf_t fwdPerfResults{convFwdPerfResults}; +- convWorkspace->AlgorithmForward = convFwdPerfResults[choose_algo(algoChoice, algoCount, fwdPerfResults, memLimit)].algo; ++ LocalPerf fwdPerfResults{convFwdPerfResults}; ++ convWorkspace->AlgorithmForward = convFwdPerfResults[fwdPerfResults.getIdx(algoChoice, algoCount, memLimit)].algo; + #else + CUDNNCHECK(cudnnGetConvolutionForwardAlgorithm( + cudnnHandle, inputTensorDescriptor, convDescriptors->WeightsDescriptor, convDescriptors->LayerDescriptor, +@@ -594,8 +603,8 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // &convWorkspace, + // memLimit)); // use memLimit for workspace size + // instead choose either fastest or lowest memory algo as per preference +- LocalPerf_t bwdDataPerfResults{convBwdDataPerfResults}; +- convWorkspace->AlgorithmBackward = convBwdDataPerfResults[choose_algo(algoChoice, algoCount, bwdDataPerfResults, memLimit)].algo; ++ LocalPerf bwdDataPerfResults{convBwdDataPerfResults}; ++ convWorkspace->AlgorithmBackward = convBwdDataPerfResults[bwdDataPerfResults.getIdx(algoChoice, algoCount, memLimit)].algo; + #else + CUDNNCHECK(cudnnGetConvolutionBackwardDataAlgorithm(cudnnHandle, + convDescriptors->WeightsDescriptor, +@@ -669,8 +678,8 @@ void TCudnn<AFloat>::InitializeConvWorkspace(TWorkspace * & workspace, + // &convWorkspace, + // memLimit)); // use memLimit for workspace size + // instead choose either fastest or lowest memory algo as per preference +- LocalPerf_t bwdFilterPerfResults{convBwdFilterPerfResults}; +- convWorkspace->HelperAlgorithm = convBwdFilterPerfResults[choose_algo(algoChoice, algoCount, bwdFilterPerfResults, memLimit)].algo; ++ LocalPerf bwdFilterPerfResults{convBwdFilterPerfResults}; ++ convWorkspace->HelperAlgorithm = convBwdFilterPerfResults[bwdFilterPerfResults.getIdx(algoChoice, algoCount, memLimit)].algo; + #else + CUDNNCHECK(cudnnGetConvolutionBackwardFilterAlgorithm(cudnnHandle, + activationBackwardDescriptor, Copied: root/repos/community-testing-x86_64/jupyter_notebook_config.py (from rev 665090, root/trunk/jupyter_notebook_config.py) =================================================================== --- community-testing-x86_64/jupyter_notebook_config.py (rev 0) +++ community-testing-x86_64/jupyter_notebook_config.py 2020-07-23 20:24:24 UTC (rev 665091) @@ -0,0 +1 @@ +c.NotebookApp.ip = '*' Copied: root/repos/community-testing-x86_64/nbman-for-arch.patch (from rev 665090, root/trunk/nbman-for-arch.patch) =================================================================== --- community-testing-x86_64/nbman-for-arch.patch (rev 0) +++ community-testing-x86_64/nbman-for-arch.patch 2020-07-23 20:24:24 UTC (rev 665091) @@ -0,0 +1,177 @@ +diff --color -aur root-6.22.00-old/main/src/nbmain.cxx root-6.22.00-new/main/src/nbmain.cxx +--- root-6.22.00-old/main/src/nbmain.cxx 2020-07-20 15:26:53.983725609 +0300 ++++ root-6.22.00-new/main/src/nbmain.cxx 2020-07-20 15:29:53.940386060 +0300 +@@ -33,10 +33,6 @@ + #define NB_OPT "notebook" + #define JUPYTER_CONF_DIR_V "JUPYTER_CONFIG_DIR" + #define JUPYTER_PATH_V "JUPYTER_PATH" +-#define NB_CONF_DIR "notebook" +-#define ROOTNB_DIR ".rootnb" +-#define COMMIT_FILE ".rootcommit" +-#define JUPYTER_CONFIG "jupyter_notebook_config.py" + + using namespace std; + +@@ -46,161 +46,12 @@ + #endif + + //////////////////////////////////////////////////////////////////////////////// +-/// Checks whether ROOT notebook files are installed and they are +-/// the current version. +- +-static int CheckNbInstallation(string dir) +-{ +- string commit(gROOT->GetGitCommit()); +- string inputfname(dir + pathsep + ROOTNB_DIR + pathsep + COMMIT_FILE); +- ifstream in(inputfname); +- if (in.is_open()) { +- string line; +- in >> line; +- in.close(); +- if (line.compare(commit) == 0) return 0; // already installed +- else return -1; // install, it's outdated +- } +- else if (gSystem->AccessPathName(inputfname.c_str())) { +- // There is no installation +- return -1; +- } +- else { +- fprintf(stderr, +- "Error checking notebook installation -- cannot open %s\n", +- inputfname.c_str()); +- return -2; +- } +-} +- +-//////////////////////////////////////////////////////////////////////////////// +-/// Installs ROOT notebook files in the user's home directory. +- +-static bool InstallNbFiles(string source, string dest) +-{ +- // Create installation directory +- if (gSystem->AccessPathName(dest.c_str())) { +- if (gSystem->mkdir(dest.c_str())) { +- fprintf(stderr, +- "Error installing notebook configuration files -- cannot create directory %s\n", +- dest.c_str()); +- return false; +- } +- } +- +- // Copy files in source to dest +- TSystemDirectory dir(source.c_str(), source.c_str()); +- std::unique_ptr<TList> files; +- files.reset(dir.GetListOfFiles()); +- if (files) { +- TSystemFile *file; +- TListIter it(files.get()); +- while ((file = (TSystemFile*)it())) { +- TString s = file->GetName(); +- string fname(s.Data()); +- string sourcefile = source + pathsep + fname; +- string destfile = dest + pathsep + fname; +- if (!file->IsDirectory()) { +- if (gSystem->CopyFile(sourcefile.c_str(), destfile.c_str(), true)) { +- fprintf(stderr, +- "Error installing notebook configuration files -- cannot copy file %s to %s\n", +- sourcefile.c_str(), destfile.c_str()); +- return false; +- } +- } +- else if (fname.compare(".") && fname.compare("..") && fname.compare("html")) { +- if (!InstallNbFiles(sourcefile, destfile)) +- return false; +- } +- } +- } +- +- return true; +-} +- +-//////////////////////////////////////////////////////////////////////////////// +-/// Creates the Jupyter notebook configuration file that sets the +-/// necessary environment. +- +-static bool CreateJupyterConfig(string dest, string rootbin, string rootlib, string rootdata) +-{ +- string jupyconfig = dest + pathsep + JUPYTER_CONFIG; +- ofstream out(jupyconfig, ios::trunc); +- if (out.is_open()) { +- out << "import os" << endl; +- out << "rootbin = '" << rootbin << "'" << endl; +- out << "rootlib = '" << rootlib << "'" << endl; +-#ifdef WIN32 +- string jsrootsys = rootdata + "\\js\\"; +- out << "os.environ['PYTHONPATH'] = '%s' % rootlib + ':' + os.getenv('PYTHONPATH', '')" << endl; +- out << "os.environ['PATH'] = '%s:%s\\bin' % (rootbin,rootbin) + ':' + '%s' % rootlib + ':' + os.getenv('PATH', '')" << endl; +-#else +- string jsrootsys = rootdata + "/js/"; +- out << "os.environ['PYTHONPATH'] = '%s' % rootlib + ':' + os.getenv('PYTHONPATH', '')" << endl; +- out << "os.environ['PATH'] = '%s:%s/bin' % (rootbin,rootbin) + ':' + os.getenv('PATH', '')" << endl; +- out << "os.environ['LD_LIBRARY_PATH'] = '%s' % rootlib + ':' + os.getenv('LD_LIBRARY_PATH', '')" << endl; +-#endif +- out << "c.NotebookApp.extra_static_paths = ['" << jsrootsys << "']" << endl; +- out.close(); +- return true; +- } +- else { +- fprintf(stderr, +- "Error installing notebook configuration files -- cannot create IPython config file at %s\n", +- jupyconfig.c_str()); +- return false; +- } +-} +- +-//////////////////////////////////////////////////////////////////////////////// +-/// Creates a file that stores the current commit id in it. +- +-static bool CreateStamp(string dest) +-{ +- ofstream out(dest + pathsep + COMMIT_FILE, ios::trunc); +- if (out.is_open()) { +- out << gROOT->GetGitCommit(); +- out.close(); +- return true; +- } +- else { +- fprintf(stderr, +- "Error installing notebook configuration files -- cannot create %s\n", +- COMMIT_FILE); +- return false; +- } +-} +- +-//////////////////////////////////////////////////////////////////////////////// + /// Spawn a Jupyter notebook customised by ROOT. + + int main(int argc, char **argv) + { +- string rootbin(TROOT::GetBinDir().Data()); +- string rootlib(TROOT::GetLibDir().Data()); +- string rootetc(TROOT::GetEtcDir().Data()); +- string rootdata(TROOT::GetDataDir().Data()); +- +- // If needed, install ROOT notebook files in the user's home directory +-#ifdef WIN32 +- string homedir(getenv("USERPROFILE")); +-#else +- string homedir(getenv("HOME")); +-#endif +- int inst = CheckNbInstallation(homedir); +- if (inst == -1) { +- // The etc directory contains the ROOT notebook files to install +- string source(rootetc + pathsep + NB_CONF_DIR); +- string dest(homedir + pathsep + ROOTNB_DIR); +- bool res = InstallNbFiles(source, dest) && +- CreateJupyterConfig(dest, rootbin, rootlib, rootdata) && +- CreateStamp(dest); +- if (!res) return 1; +- } +- else if (inst == -2) return 1; +- + // Set IPython directory for the ROOT notebook flavour +- string rootnbpath = homedir + pathsep + ROOTNB_DIR; ++ string rootnbpath = pathsep + string("etc") + pathsep + string("root") + pathsep + string("notebook"); + string jupyconfdir(JUPYTER_CONF_DIR_V + ("=" + rootnbpath)); + string jupypathdir(JUPYTER_PATH_V + ("=" + rootnbpath)); + putenv((char *)jupyconfdir.c_str()); Copied: root/repos/community-testing-x86_64/root.pc.tpl (from rev 665090, root/trunk/root.pc.tpl) =================================================================== --- community-testing-x86_64/root.pc.tpl (rev 0) +++ community-testing-x86_64/root.pc.tpl 2020-07-23 20:24:24 UTC (rev 665091) @@ -0,0 +1,12 @@ +prefix=_PREFIX +exec_prefix=_EXECPREFIX +libdir=_LIBDIR +includedir=_INCDIR + +Name: ROOT +Description: C++ data analysis framework and interpreter from CERN +Version: _PKGVERSION +URL: _UPSTREAM_URL +Requires: _REQUIRES +Libs: _LIBRARIES +Cflags: _CFLAGS Copied: root/repos/community-testing-x86_64/root.xml (from rev 665090, root/trunk/root.xml) =================================================================== --- community-testing-x86_64/root.xml (rev 0) +++ community-testing-x86_64/root.xml 2020-07-23 20:24:24 UTC (rev 665091) @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8"?> +<mime-info xmlns="http://www.freedesktop.org/standards/shared-mime-info"> + <mime-type type="application/x-root"> + <comment>ROOT file</comment> + <comment xml:lang="de">ROOT-Datei</comment> + <comment xml:lang="en">ROOT-File</comment> + <comment xml:lang="fr">ROOT-Fichier</comment> + <comment xml:lang="it">ROOT-File</comment> + <glob pattern="*.root"/> + <magic priority="80"> + <match value="root" type="string" offset="0:64"/> + </magic> + </mime-type> +</mime-info> Copied: root/repos/community-testing-x86_64/settings-cuda.cmake (from rev 665090, root/trunk/settings-cuda.cmake) =================================================================== --- community-testing-x86_64/settings-cuda.cmake (rev 0) +++ community-testing-x86_64/settings-cuda.cmake 2020-07-23 20:24:24 UTC (rev 665091) @@ -0,0 +1,110 @@ +set (CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE) +set (BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) +set (CMAKE_INSTALL_PREFIX /usr CACHE PATH "" FORCE) +set (CMAKE_INSTALL_CMAKEDIR /usr/lib/cmake/ROOT CACHE PATH "" FORCE) +set (CMAKE_INSTALL_BINDIR /usr/bin CACHE PATH "" FORCE) +set (CMAKE_INSTALL_LIBDIR /usr/lib/root CACHE PATH "" FORCE) +set (CMAKE_INSTALL_INCLUDEDIR /usr/include CACHE PATH "" FORCE) +set (CMAKE_INSTALL_SYSCONFDIR /etc/root CACHE PATH "" FORCE) +set (CMAKE_INSTALL_DATAROOTDIR /usr/share CACHE PATH "" FORCE) +set (CMAKE_CXX_STANDARD 17 CACHE STRING "" FORCE) +set (CMAKE_CUDA_STANDARD 14 CACHE STRING "" FORCE) +set (PYTHIA8_DATA /usr/share/pythia8/xmldoc CACHE PATH "" FORCE) # sync with pythia8 package +set (GLEW_DIR /usr/include/GL CACHE PATH "" FORCE) # need to set manually +set (alien OFF CACHE BOOL "" FORCE) +set (all OFF CACHE BOOL "" FORCE) +set (asimage ON CACHE BOOL "" FORCE) +set (builtin_afterimage OFF CACHE BOOL "" FORCE) +set (builtin_clang ON CACHE BOOL "" FORCE) +set (CLANG_ENABLE_STATIC_ANALYZER ON CACHE BOOL "" FORCE) +set (CLANG_ANALYZER_BUILD_Z3 ON CACHE BOOL "" FORCE) +set (builtin_cfitsio OFF CACHE BOOL "" FORCE) +set (builtin_davix OFF CACHE BOOL "" FORCE) +set (builtin_fftw3 OFF CACHE BOOL "" FORCE) +set (builtin_ftgl OFF CACHE BOOL "" FORCE) +set (builtin_freetype OFF CACHE BOOL "" FORCE) +set (builtin_gl2ps OFF CACHE BOOL "" FORCE) +set (builtin_glew OFF CACHE BOOL "" FORCE) +set (builtin_gsl OFF CACHE BOOL "" FORCE) +set (builtin_lzma OFF CACHE BOOL "" FORCE) +set (builtin_llvm ON CACHE BOOL "" FORCE) +set (builtin_openssl OFF CACHE BOOL "" FORCE) +set (builtin_pcre OFF CACHE BOOL "" FORCE) +set (builtin_tbb OFF CACHE BOOL "" FORCE) +set (builtin_unuran OFF CACHE BOOL "" FORCE) +set (builtin_vc OFF CACHE BOOL "" FORCE) +set (builtin_xxhash OFF CACHE BOOL "" FORCE) +set (builtin_xrootd OFF CACHE BOOL "" FORCE) +set (builtin_zlib OFF CACHE BOOL "" FORCE) +set (ccache ON CACHE BOOL "" FORCE) +set (clad ON CACHE BOOL "" FORCE) +set (cocoa OFF CACHE BOOL "" FORCE) # MacOS only +set (cuda ON CACHE BOOL "" FORCE) +set (cudnn ON CACHE BOOL "" FORCE) +set (dataframe ON CACHE BOOL "" FORCE) +set (davix OFF CACHE BOOL "" FORCE) +set (dcache OFF CACHE BOOL "" FORCE) +set (exceptions ON CACHE BOOL "" FORCE) +set (fail-on-missing ON CACHE BOOL "" FORCE) +set (fcgi ON CACHE BOOL "" FORCE) +set (fftw3 ON CACHE BOOL "" FORCE) +set (fitsio ON CACHE BOOL "" FORCE) +set (fortran ON CACHE BOOL "" FORCE) +set (gdml ON CACHE BOOL "" FORCE) +set (genvector ON CACHE BOOL "" FORCE) +set (gfal OFF CACHE BOOL "" FORCE) +set (gl2ps ON CACHE BOOL "" FORCE) +set (gminimal OFF CACHE BOOL "" FORCE) +set (gnuinstall ON CACHE BOOL "" FORCE) +set (gsl_shared ON CACHE BOOL "" FORCE) +set (gviz ON CACHE BOOL "" FORCE) +set (http ON CACHE BOOL "" FORCE) +set (imt ON CACHE BOOL "" FORCE) +set (jemalloc OFF CACHE BOOL "" FORCE) +set (mathmore ON CACHE BOOL "" FORCE) +set (minimal OFF CACHE BOOL "" FORCE) +set (minuit2 ON CACHE BOOL "" FORCE) +set (minuit2_mpi ON CACHE BOOL "" FORCE) +set (minuit2_omp ON CACHE BOOL "" FORCE) +set (mlp ON CACHE BOOL "" FORCE) +set (monalisa OFF CACHE BOOL "" FORCE) +set (mpi ON CACHE BOOL "" FORCE) +set (mt ON CACHE BOOL "" FORCE) +set (mysql ON CACHE BOOL "" FORCE) +set (odbc ON CACHE BOOL "" FORCE) +set (opengl ON CACHE BOOL "" FORCE) +set (OpenGL_GL_PREFERENCE GLVND CACHE STRING "" FORCE) # use new policy since 3.11 +set (oracle OFF CACHE BOOL "" FORCE) +set (pgsql ON CACHE BOOL "" FORCE) +set (pythia6 OFF CACHE BOOL "" FORCE) +set (pythia6_nolink OFF CACHE BOOL "" FORCE) +set (pythia8 ON CACHE BOOL "" FORCE) +set (pyroot ON CACHE BOOL "" FORCE) +set (qt5web ON CACHE BOOL "" FORCE) +set (roofit ON CACHE BOOL "" FORCE) +set (root7 ON CACHE BOOL "" FORCE) +set (roottest OFF CACHE BOOL "" FORCE) +set (rpath OFF CACHE BOOL "" FORCE) +set (runtime_cxxmodules OFF CACHE BOOL "" FORCE) # breaks python +set (r OFF CACHE BOOL "" FORCE) # requires r-rcpp +set (shadowpw ON CACHE BOOL "" FORCE) +set (shared ON CACHE BOOL "" FORCE) +set (soversion OFF CACHE BOOL "" FORCE) +set (spectrum ON CACHE BOOL "" FORCE) +set (sqlite ON CACHE BOOL "" FORCE) +set (ssl ON CACHE BOOL "" FORCE) +set (tbb ON CACHE BOOL "" FORCE) +set (tcmalloc OFF CACHE BOOL "" FORCE) +set (testing OFF CACHE BOOL "" FORCE) +set (tmva ON CACHE BOOL "" FORCE) +set (tmva-cpu OFF CACHE BOOL "" FORCE) +set (tmva-gpu ON CACHE BOOL "" FORCE) +set (tmva-pymva ON CACHE BOOL "" FORCE) +set (unuran ON CACHE BOOL "" FORCE) +set (vc ON CACHE BOOL "" FORCE) +set (vdt ON CACHE BOOL "" FORCE) +set (winrtdebug OFF CACHE BOOL "" FORCE) # windows only +set (webgui ON CACHE BOOL "" FORCE) +set (x11 ON CACHE BOOL "" FORCE) +set (xml ON CACHE BOOL "" FORCE) +set (xrootd ON CACHE BOOL "" FORCE) Copied: root/repos/community-testing-x86_64/settings.cmake (from rev 665090, root/trunk/settings.cmake) =================================================================== --- community-testing-x86_64/settings.cmake (rev 0) +++ community-testing-x86_64/settings.cmake 2020-07-23 20:24:24 UTC (rev 665091) @@ -0,0 +1,110 @@ +set (CMAKE_BUILD_TYPE Release CACHE STRING "" FORCE) +set (BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) +set (CMAKE_INSTALL_PREFIX /usr CACHE PATH "" FORCE) +set (CMAKE_INSTALL_CMAKEDIR /usr/lib/cmake/ROOT CACHE PATH "" FORCE) +set (CMAKE_INSTALL_BINDIR /usr/bin CACHE PATH "" FORCE) +set (CMAKE_INSTALL_LIBDIR /usr/lib/root CACHE PATH "" FORCE) +set (CMAKE_INSTALL_INCLUDEDIR /usr/include CACHE PATH "" FORCE) +set (CMAKE_INSTALL_SYSCONFDIR /etc/root CACHE PATH "" FORCE) +set (CMAKE_INSTALL_DATAROOTDIR /usr/share CACHE PATH "" FORCE) +set (CMAKE_CXX_STANDARD 17 CACHE STRING "" FORCE) +set (CMAKE_CUDA_STANDARD 14 CACHE STRING "" FORCE) +set (PYTHIA8_DATA /usr/share/pythia8/xmldoc CACHE PATH "" FORCE) # sync with pythia8 package +set (GLEW_DIR /usr/include/GL CACHE PATH "" FORCE) # need to set manually +set (alien OFF CACHE BOOL "" FORCE) +set (all OFF CACHE BOOL "" FORCE) +set (asimage ON CACHE BOOL "" FORCE) +set (builtin_afterimage OFF CACHE BOOL "" FORCE) +set (builtin_clang ON CACHE BOOL "" FORCE) +set (CLANG_ENABLE_STATIC_ANALYZER ON CACHE BOOL "" FORCE) +set (CLANG_ANALYZER_BUILD_Z3 ON CACHE BOOL "" FORCE) +set (builtin_cfitsio OFF CACHE BOOL "" FORCE) +set (builtin_davix OFF CACHE BOOL "" FORCE) +set (builtin_fftw3 OFF CACHE BOOL "" FORCE) +set (builtin_ftgl OFF CACHE BOOL "" FORCE) +set (builtin_freetype OFF CACHE BOOL "" FORCE) +set (builtin_gl2ps OFF CACHE BOOL "" FORCE) +set (builtin_glew OFF CACHE BOOL "" FORCE) +set (builtin_gsl OFF CACHE BOOL "" FORCE) +set (builtin_lzma OFF CACHE BOOL "" FORCE) +set (builtin_llvm ON CACHE BOOL "" FORCE) +set (builtin_openssl OFF CACHE BOOL "" FORCE) +set (builtin_pcre OFF CACHE BOOL "" FORCE) +set (builtin_tbb OFF CACHE BOOL "" FORCE) +set (builtin_unuran OFF CACHE BOOL "" FORCE) +set (builtin_vc OFF CACHE BOOL "" FORCE) +set (builtin_xxhash OFF CACHE BOOL "" FORCE) +set (builtin_xrootd OFF CACHE BOOL "" FORCE) +set (builtin_zlib OFF CACHE BOOL "" FORCE) +set (ccache ON CACHE BOOL "" FORCE) +set (clad ON CACHE BOOL "" FORCE) +set (cocoa OFF CACHE BOOL "" FORCE) # MacOS only +set (cuda OFF CACHE BOOL "" FORCE) +set (cudnn OFF CACHE BOOL "" FORCE) +set (dataframe ON CACHE BOOL "" FORCE) +set (davix OFF CACHE BOOL "" FORCE) +set (dcache OFF CACHE BOOL "" FORCE) +set (exceptions ON CACHE BOOL "" FORCE) +set (fail-on-missing ON CACHE BOOL "" FORCE) +set (fcgi ON CACHE BOOL "" FORCE) +set (fftw3 ON CACHE BOOL "" FORCE) +set (fitsio ON CACHE BOOL "" FORCE) +set (fortran ON CACHE BOOL "" FORCE) +set (gdml ON CACHE BOOL "" FORCE) +set (genvector ON CACHE BOOL "" FORCE) +set (gfal OFF CACHE BOOL "" FORCE) +set (gl2ps ON CACHE BOOL "" FORCE) +set (gminimal OFF CACHE BOOL "" FORCE) +set (gnuinstall ON CACHE BOOL "" FORCE) +set (gsl_shared ON CACHE BOOL "" FORCE) +set (gviz ON CACHE BOOL "" FORCE) +set (http ON CACHE BOOL "" FORCE) +set (imt ON CACHE BOOL "" FORCE) +set (jemalloc OFF CACHE BOOL "" FORCE) +set (mathmore ON CACHE BOOL "" FORCE) +set (minimal OFF CACHE BOOL "" FORCE) +set (minuit2 ON CACHE BOOL "" FORCE) +set (minuit2_mpi ON CACHE BOOL "" FORCE) +set (minuit2_omp ON CACHE BOOL "" FORCE) +set (mlp ON CACHE BOOL "" FORCE) +set (monalisa OFF CACHE BOOL "" FORCE) +set (mpi ON CACHE BOOL "" FORCE) +set (mt ON CACHE BOOL "" FORCE) +set (mysql ON CACHE BOOL "" FORCE) +set (odbc ON CACHE BOOL "" FORCE) +set (opengl ON CACHE BOOL "" FORCE) +set (OpenGL_GL_PREFERENCE GLVND CACHE STRING "" FORCE) # use new policy since 3.11 +set (oracle OFF CACHE BOOL "" FORCE) +set (pgsql ON CACHE BOOL "" FORCE) +set (pythia6 OFF CACHE BOOL "" FORCE) +set (pythia6_nolink OFF CACHE BOOL "" FORCE) +set (pythia8 ON CACHE BOOL "" FORCE) +set (pyroot ON CACHE BOOL "" FORCE) +set (qt5web ON CACHE BOOL "" FORCE) +set (roofit ON CACHE BOOL "" FORCE) +set (root7 ON CACHE BOOL "" FORCE) +set (roottest OFF CACHE BOOL "" FORCE) +set (rpath OFF CACHE BOOL "" FORCE) +set (runtime_cxxmodules OFF CACHE BOOL "" FORCE) # breaks python +set (r OFF CACHE BOOL "" FORCE) # requires r-rcpp +set (shadowpw ON CACHE BOOL "" FORCE) +set (shared ON CACHE BOOL "" FORCE) +set (soversion OFF CACHE BOOL "" FORCE) +set (spectrum ON CACHE BOOL "" FORCE) +set (sqlite ON CACHE BOOL "" FORCE) +set (ssl ON CACHE BOOL "" FORCE) +set (tbb ON CACHE BOOL "" FORCE) +set (tcmalloc OFF CACHE BOOL "" FORCE) +set (testing OFF CACHE BOOL "" FORCE) +set (tmva ON CACHE BOOL "" FORCE) +set (tmva-cpu ON CACHE BOOL "" FORCE) +set (tmva-gpu OFF CACHE BOOL "" FORCE) +set (tmva-pymva ON CACHE BOOL "" FORCE) +set (unuran ON CACHE BOOL "" FORCE) +set (vc ON CACHE BOOL "" FORCE) +set (vdt ON CACHE BOOL "" FORCE) +set (winrtdebug OFF CACHE BOOL "" FORCE) # windows only +set (webgui ON CACHE BOOL "" FORCE) +set (x11 ON CACHE BOOL "" FORCE) +set (xml ON CACHE BOOL "" FORCE) +set (xrootd ON CACHE BOOL "" FORCE) Copied: root/repos/community-testing-x86_64/thisroot.fail (from rev 665090, root/trunk/thisroot.fail) =================================================================== --- community-testing-x86_64/thisroot.fail (rev 0) +++ community-testing-x86_64/thisroot.fail 2020-07-23 20:24:24 UTC (rev 665091) @@ -0,0 +1,12 @@ +#!/bin/bash + +# thisroot.* scripts should not be used to +# configure ROOT on Arch. Notify user and +# return an error + +function fail { + printf '%s\n' "$1" >&2 + exit "${2:-$1}" +} + +fail "ERROR: $(basename $0) should never be used!" 1