tqchen commented on code in PR #15910:
URL: https://github.com/apache/tvm/pull/15910#discussion_r1355092130
##########
src/runtime/relax_vm/lm_support.cc:
##########
@@ -472,6 +472,430 @@ void ApplySoftmaxWithTemperature(NDArray logits, double
temperature) {
TVM_REGISTER_GLOBAL("vm.builtin.apply_softmax_with_temperature")
.set_body_typed(ApplySoftmaxWithTemperature);
+////////////////////////////////////////////////////////////////////////
+
+class PagedAttentionKVCacheObj : public Object {
+ public:
+ int64_t num_total_seqs;
+ int64_t num_pages_in_use;
+ int64_t num_pages_allocated;
+
+ int64_t page_size;
+ int64_t nlayer;
+ int64_t nhead;
+ int64_t nfeat;
+
+ const DLDataType dtype_aux = DataType::Int(32, 1).operator DLDataType();
+
+ /********************* Page Structures *********************/
+
+ const int64_t page_chunk_size = 1;
+
+ NDArray pages;
+ std::vector<int32_t> free_page_ids;
+
+ std::vector<std::vector<int32_t>> page_table;
Review Comment:
add comment, what does each element in page table mean
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]