This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new d1ac1c0202 [KVCache] Fix the aux data syncing order of paged KV cache
(#16988)
d1ac1c0202 is described below
commit d1ac1c0202b3d8cb2af268ce79c2ac710554152b
Author: Rick Zhou <[email protected]>
AuthorDate: Sun May 12 18:22:18 2024 -0700
[KVCache] Fix the aux data syncing order of paged KV cache (#16988)
Fix the aux data syncing order of paged KV cache
---
src/runtime/relax_vm/paged_kv_cache.cc | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/src/runtime/relax_vm/paged_kv_cache.cc
b/src/runtime/relax_vm/paged_kv_cache.cc
index efedac235b..9a17354fe5 100644
--- a/src/runtime/relax_vm/paged_kv_cache.cc
+++ b/src/runtime/relax_vm/paged_kv_cache.cc
@@ -1709,24 +1709,28 @@ class PagedAttentionKVCacheObj : public
AttentionKVCacheObj {
// - Reset the copy.
aux_data_manager_->ResetCopy();
- // 1. qo_indptr_on_depths
+ // 1. q_rope_position_map
+ // q_rope_position_map has to be synced first so that it has a 0 byte
offset
+ ICHECK_EQ(q_rope_position_map_host_.size(), total_append_length);
+ q_rope_position_map_view_ =
aux_data_manager_->CopyQRoPEPosMapAsync(&q_rope_position_map_host_);
+ // 2. qo_indptr_on_depths
for (int d = 0; d < num_depths_; ++d) {
qo_indptr_on_depths_view_[d] =
aux_data_manager_->CopyQOIndptrOnDepthAsync(&qo_indptr_on_depths_host_[d], d);
}
- // 2. page_indptr_on_depths
+ // 3. page_indptr_on_depths
for (int d = 0; d < num_depths_; ++d) {
ICHECK_EQ(page_indptr_on_depths_host_[d].size(),
qo_indptr_on_depths_host_[d].size());
page_indptr_on_depths_view_[d] =
aux_data_manager_->CopyPageIndptrOnDepthAsync(&page_indptr_on_depths_host_[d],
d);
}
- // 3. page_indices_on_depths
+ // 4. page_indices_on_depths
for (int d = 0; d < num_depths_; ++d) {
ICHECK_EQ(page_indices_on_depths_host_[d].size(),
page_indptr_on_depths_host_[d].back());
page_indices_on_depths_view_[d] =
aux_data_manager_->CopyPageIndicesOnDepthAsync(&page_indices_on_depths_host_[d],
d);
}
- // 4. length_info_on_depths
+ // 5. length_info_on_depths
// last_page_len_on_depths_host_;
// sliding_window_offset_on_depths_host_;
// sink_size_on_depths_host_;
@@ -1746,23 +1750,20 @@ class PagedAttentionKVCacheObj : public
AttentionKVCacheObj {
&sink_size_on_depths_host_[d], d);
}
}
- // 5. k_rope_pos_offset_on_depths
+ // 6. k_rope_pos_offset_on_depths
for (int d = 0; d < num_depths_; ++d) {
ICHECK_EQ(k_rope_pos_offset_on_depths_host_[d].size() + 1,
qo_indptr_on_depths_host_[d].size());
k_rope_pos_offset_view_[d] =
aux_data_manager_->CopyKRoPEPosOffsetOnDepthAsync(
&k_rope_pos_offset_on_depths_host_[d], d);
}
- // 6. cur_append_lengths_indptr
+ // 7. cur_append_lengths_indptr
cur_append_length_indptr_view_ =
aux_data_manager_->CopyCurAppendLengthIndptrAsync(&cur_append_lengths_indptr_host_);
- // 7. k_ragged_rope_pos_offset
+ // 8. k_ragged_rope_pos_offset
ICHECK_EQ(k_ragged_rope_pos_offset_host_.size(), num_sequences);
k_ragged_rope_pos_offset_view_ =
aux_data_manager_->CopyKRaggedRoPEPosOffsetAsync(&k_ragged_rope_pos_offset_host_);
- // 8. q_rope_position_map
- ICHECK_EQ(q_rope_position_map_host_.size(), total_append_length);
- q_rope_position_map_view_ =
aux_data_manager_->CopyQRoPEPosMapAsync(&q_rope_position_map_host_);
// 9. append_position_map
append_position_map_view_ =
aux_data_manager_->CopyAppendPositionMapAsync(&append_position_map_host_);