MasterJH5574 commented on code in PR #16824:
URL: https://github.com/apache/tvm/pull/16824#discussion_r1545690856
##########
src/runtime/relax_vm/paged_kv_cache.cc:
##########
@@ -636,9 +938,17 @@ class PagedAttentionKVCacheObj : public
AttentionKVCacheObj {
}
void CopySinglePage(int32_t src_page_id, int32_t tgt_page_id, int64_t
copy_length) {
+ if (copy_stream_ != compute_stream_) {
+ // Set the copy stream for copy.
+ DeviceAPI::Get(device_)->SetStream(device_, copy_stream_);
+ }
for (int layer = 0; layer < num_layers_; ++layer) {
f_copy_single_page_(pages_[layer], src_page_id, tgt_page_id,
copy_length);
}
+ if (copy_stream_ != compute_stream_) {
+ // Set the compute stream back.
+ DeviceAPI::Get(device_)->SetStream(device_, compute_stream_);
+ }
Review Comment:
In this way we execute the copy on a separate copy stream. cc @cyx-6
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]