areusch commented on a change in pull request #5417: URL: https://github.com/apache/incubator-tvm/pull/5417#discussion_r416138054
########## File path: src/runtime/micro/micro_session.cc ########## @@ -209,59 +246,124 @@ MicroSession::~MicroSession() { low_level_device_ = nullptr; } -double MicroSession::PushToExecQueue(DevPtr func_ptr, const TVMArgs& args) { +void MicroSession::PushToTaskQueue(TargetPtr func_ptr, const TVMArgs& args) { if (thumb_mode_) { + // TODO(areusch): should be |= func_ptr += 1; } + TargetVal func_dev_addr = func_ptr.value(); + + std::tuple<TargetPtr, TargetPtr> arg_field_addrs = EncoderAppend(&batch_args_encoder_, args); + TargetVal arg_values_dev_addr{std::get<0>(arg_field_addrs).value()}; + TargetVal arg_type_codes_dev_addr{std::get<1>(arg_field_addrs).value()}; + + task_queue_.push_back( + DevTask { + .func = func_dev_addr, + .arg_values = arg_values_dev_addr, + .arg_type_codes = arg_type_codes_dev_addr, + .num_args = args.num_args + }); + + if (task_queue_.size() == MicroSession::kTaskQueueCapacity) { + FlushTaskQueue(); + } +} - // Create an allocator stream for the memory region after the most recent - // allocation in the args section. - DevPtr args_addr = GetAllocator(SectionKind::kArgs)->curr_end_addr(); - TargetDataLayoutEncoder encoder(args_addr, word_size_); - - std::tuple<DevPtr, DevPtr> arg_field_addrs = EncoderAppend(&encoder, args); - - // Flush `stream` to device memory. - DevPtr stream_dev_addr = - GetAllocator(SectionKind::kArgs)->Allocate(encoder.buf_size()); - low_level_device()->Write(stream_dev_addr, - reinterpret_cast<void*>(encoder.data()), - encoder.buf_size()); - - TargetVal arg_values_dev_addr = std::get<0>(arg_field_addrs).value(); - TargetVal arg_type_codes_dev_addr = std::get<1>(arg_field_addrs).value(); - if (word_size_ == 4) { - UTVMTask32 task = { - .func = func_ptr.value().val32, - .arg_values = arg_values_dev_addr.val32, - .arg_type_codes = arg_type_codes_dev_addr.val32, - .num_args = args.num_args, - }; - // Write the task. - DevSymbolWrite(runtime_symbol_map_, "utvm_task", task); - } else if (word_size_ == 8) { - UTVMTask64 task = { - .func = func_ptr.value().val64, - .arg_values = arg_values_dev_addr.val64, - .arg_type_codes = arg_type_codes_dev_addr.val64, - .num_args = args.num_args, - }; - // Write the task. - DevSymbolWrite(runtime_symbol_map_, "utvm_task", task); +void MicroSession::FlushTaskQueue() { + if (task_queue_.size() == 0) { + // nothing to run + return; + } + if (word_size_.bytes() == 4) { + FlushTaskQueuePriv<StructUTVMTask32>(); + } else if (word_size_.bytes() == 8) { + FlushTaskQueuePriv<StructUTVMTask64>(); } +} - DevPtr utvm_init_addr = runtime_symbol_map_["UTVMInit"]; - DevPtr utvm_done_addr = runtime_symbol_map_["UTVMDone"]; +template <typename T> +void MicroSession::FlushTaskQueuePriv() { + // std::cout << "[MicroSession::FlushTaskQueue]" << std::endl; + std::vector<T> prepped_tasks; + for (const auto& task : task_queue_) { + prepped_tasks.push_back(T(task)); + } + + // Flush `args` to device memory. + low_level_device()->Write( + batch_args_encoder_.start_addr(), + reinterpret_cast<void*>(batch_args_encoder_.data()), + batch_args_encoder_.buf_size()); + + // Flush `tasks` to device memory. +// runtime_symbol_map_.Dump(std::cout); + TargetPtr dev_tasks_addr = runtime_symbol_map_["utvm_tasks"]; + low_level_device()->Write( + dev_tasks_addr, + reinterpret_cast<void*>(prepped_tasks.data()), + prepped_tasks.size() * sizeof(T)); + DevSymbolWrite<uint32_t>(runtime_symbol_map_, "utvm_num_tasks", prepped_tasks.size()); + + TargetPtr utvm_init_addr = runtime_symbol_map_["UTVMInit"]; + TargetPtr utvm_done_addr = runtime_symbol_map_["UTVMDone"]; if (thumb_mode_) { + // TODO(areusch): should be |= utvm_init_addr += 1; } + std::chrono::time_point< + std::chrono::high_resolution_clock, std::chrono::nanoseconds> tbegin, tend; + tbegin = std::chrono::high_resolution_clock::now(); + // std::string tmp; Review comment: these ones in particular might be okay to leave in for now since they are helpful if you want to debug. after this, since we want to change the way we load code, i'd suggest we improve the debugging story here. but, it will change as we move to flashing binaries. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org