This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 8ee8d0d0b8 [Runtime] Add "TVM_DLL" to NVTX header (#16809)
add 3ce87cba21 Fix includes of custom allreduce kernel (#16814)
add 64db9f78a0 [Runtime] Introduce MSCCLPP with NCCL equivalent interface
(#16804)
add d109573cb4 [Runtime][LLVM] Fix errors during loading of target tags
(#16808)
add f8b9a5faa4 [SLM] Add unit tests for SLM to Relax exporter (#16784)
add d2c7167913 [Cutlass] Fix usage of cuda stream for group gemm (#16818)
add cd60f6d4fe [Cmake] Allow using custom CCCL path for thrust (#16816)
add 109804cc6a [Codegen] Add check to disable invalid reinterpret (#16786)
add 5daa303ce7 [Fix] PAPI docs (#16820)
add c3be89a407 [KVCache] Support forking sequence at specific posotion
(#16813)
No new revisions were added by this update.
Summary of changes:
3rdparty/mscclpp/include/common.h | 107 ++++
3rdparty/mscclpp/include/msccl.cuh | 323 +++++++++++
3rdparty/mscclpp/include/msccl.h | 494 ++++++++++++++++
3rdparty/tensorrt_llm/custom_allreduce_kernels.cu | 3 +-
CMakeLists.txt | 5 +-
cmake/config.cmake | 4 +
cmake/modules/CUDA.cmake | 9 +-
cmake/modules/contrib/MSCCLPP.cmake | 50 ++
docker/install/ubuntu_install_papi.sh | 2 +-
docs/how_to/profile/papi.rst | 4 +-
python/tvm/target/tag.py | 6 +-
src/runtime/contrib/cutlass/group_gemm_runner.cuh | 15 +-
src/runtime/contrib/mscclpp/allreduce.cu | 184 ++++++
src/runtime/contrib/papi/papi.cc | 2 +-
src/runtime/relax_vm/kv_state.h | 5 +-
src/runtime/relax_vm/paged_kv_cache.cc | 127 +++-
src/runtime/relax_vm/rnn_state.cc | 2 +-
src/target/llvm/codegen_llvm.cc | 4 +
src/target/llvm/llvm_instance.cc | 4 +-
src/target/source/codegen_c.cc | 9 +-
src/target/tag.cc | 2 +
tests/python/codegen/test_target_codegen_cuda.py | 10 +
tests/python/relax/test_frontend_nn_exporter.py | 636 +++++++++++++++++++++
..._builtin_paged_attention_kv_cache_flashinfer.py | 102 +++-
...runtime_builtin_paged_attention_kv_cache_tir.py | 101 +++-
.../python/relax/test_runtime_builtin_rnn_state.py | 2 +-
tests/python/target/test_llvm_features_info.py | 6 +-
27 files changed, 2136 insertions(+), 82 deletions(-)
create mode 100644 3rdparty/mscclpp/include/common.h
create mode 100644 3rdparty/mscclpp/include/msccl.cuh
create mode 100644 3rdparty/mscclpp/include/msccl.h
create mode 100644 cmake/modules/contrib/MSCCLPP.cmake
create mode 100644 src/runtime/contrib/mscclpp/allreduce.cu
create mode 100644 tests/python/relax/test_frontend_nn_exporter.py