KellenSunderland closed pull request #9466: WIP (testing windows builds) Enable
CUDA NVTX extensions for profiler
URL: https://github.com/apache/incubator-mxnet/pull/9466
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e9aa826357..b15fa08b41 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -453,7 +453,7 @@ if(USE_CUDA)
string(REPLACE ";" " " NVCC_FLAGS_ARCH "${NVCC_FLAGS_ARCH}")
set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS_ARCH}")
set(CMAKE_CUDA_FLAGS_RELEASE "${NVCC_FLAGS_ARCH} -use_fast_math")
- list(APPEND mxnet_LINKER_LIBS cublas cufft cusolver curand)
+ list(APPEND mxnet_LINKER_LIBS cublas cufft cusolver curand nvToolsExt)
if(ENABLE_CUDA_RTC)
list(APPEND mxnet_LINKER_LIBS nvrtc cuda)
add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
@@ -472,12 +472,15 @@ if(USE_CUDA)
list(APPEND mxnet_LINKER_LIBS ${CUDA_cuda_LIBRARY})
add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
endif()
+ FIND_LIBRARY(CUDA_nvtx_LIBRARY nvToolsExt
"${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
+ list(APPEND mxnet_LINKER_LIBS ${CUDA_nvtx_LIBRARY})
FIND_LIBRARY(CUDA_cufft_LIBRARY nvrtc
"${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
list(APPEND mxnet_LINKER_LIBS "${CUDA_cufft_LIBRARY}/../cufft.lib") #
For fft operator
FIND_LIBRARY(CUDA_cusolver_LIBRARY nvrtc
"${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" "${CUDA_TOOLKIT_ROOT_DIR}/lib/win32")
list(APPEND mxnet_LINKER_LIBS
"${CUDA_cusolver_LIBRARY}/../cusolver.lib") # For cusolver
else(MSVC)
- list(APPEND mxnet_LINKER_LIBS cufft cusolver)
+
+ list(APPEND mxnet_LINKER_LIBS cufft cusolver nvToolsExt)
if(ENABLE_CUDA_RTC)
list(APPEND mxnet_LINKER_LIBS nvrtc cuda)
add_definitions(-DMXNET_ENABLE_CUDA_RTC=1)
diff --git a/Makefile b/Makefile
index b0cff74e69..3371f88c55 100644
--- a/Makefile
+++ b/Makefile
@@ -334,7 +334,7 @@ ALL_DEP = $(OBJ) $(EXTRA_OBJ) $(PLUGIN_OBJ) $(LIB_DEP)
ifeq ($(USE_CUDA), 1)
CFLAGS += -I$(ROOTDIR)/3rdparty/cub
ALL_DEP += $(CUOBJ) $(EXTRA_CUOBJ) $(PLUGIN_CUOBJ)
- LDFLAGS += -lcufft
+ LDFLAGS += -lcufft -lnvToolsExt
ifeq ($(ENABLE_CUDA_RTC), 1)
LDFLAGS += -lcuda -lnvrtc
CFLAGS += -DMXNET_ENABLE_CUDA_RTC=1
diff --git a/src/engine/profiler.cc b/src/engine/profiler.cc
index 13f8cca37b..378376377e 100644
--- a/src/engine/profiler.cc
+++ b/src/engine/profiler.cc
@@ -213,6 +213,10 @@ void SetOprStart(OprExecStat* opr_stat) {
return;
}
opr_stat->opr_start_rel_micros = NowInUsec() -
Profiler::Get()->GetInitTime();
+
+#if MXNET_USE_CUDA
+ opr_stat->nvtx_range_id = nvtxRangeStartA(opr_stat->opr_name);
+#endif
}
void SetOprEnd(OprExecStat* opr_stat) {
@@ -221,6 +225,10 @@ void SetOprEnd(OprExecStat* opr_stat) {
return;
}
opr_stat->opr_end_rel_micros = NowInUsec() -
Profiler::Get()->GetInitTime();
+
+#if MXNET_USE_CUDA
+ nvtxRangeEnd(opr_stat->nvtx_range_id);
+#endif
}
} // namespace engine
diff --git a/src/engine/profiler.h b/src/engine/profiler.h
index ebd942036c..3974a7e57c 100644
--- a/src/engine/profiler.h
+++ b/src/engine/profiler.h
@@ -31,6 +31,10 @@
#include <mutex>
#include <memory>
+#if MXNET_USE_CUDA
+#include "nvToolsExt.h"
+#endif
+
namespace mxnet {
namespace engine {
@@ -59,6 +63,11 @@ struct OprExecStat {
uint32_t dev_type;
/*! \brief device id */
uint32_t dev_id;
+
+#if MXNET_USE_CUDA
+ /*! \brief range id for NVIDIA Visual Profiler ranges */
+ nvtxRangeId_t nvtx_range_id;
+#endif
};
/*!
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services