csullivan commented on code in PR #10878:
URL: https://github.com/apache/tvm/pull/10878#discussion_r843085597
##########
src/runtime/hexagon/hexagon/hexagon_buffer.cc:
##########
@@ -205,73 +205,105 @@ void HexagonBuffer::SetStorageScope(Optional<String>
scope) {
}
}
-void HexagonBuffer::CopyTo(void* data, size_t nbytes) const {
- CHECK_LE(nbytes, TotalBytes());
- CHECK(managed_allocations_.size() && "CopyTo not supported on unmanaged
`external` allocations");
+std::vector<MemoryCopy> BufferSet::MemoryCopies(const BufferSet& dest, const
BufferSet& src,
+ size_t bytes_to_copy) {
+ CHECK_LE(bytes_to_copy, src.TotalBytes());
+ CHECK_LE(bytes_to_copy, dest.TotalBytes());
+
+ auto pointer_to = [](const BufferSet& buf, size_t region_i, size_t byte_i)
-> void* {
+ void* region = buf.buffers[region_i];
+ return static_cast<unsigned char*>(region) + byte_i;
+ };
+
+ size_t num_src_regions = (bytes_to_copy + src.region_size_bytes - 1) /
src.region_size_bytes;
+
+ // First, determine all copies that do not cross boundaries in
+ // either source or destination region. This requires two loops, as
+ // a single source region may overlap one or more destination
+ // regions, and vice versa.
+ std::vector<MemoryCopy> micro_copies;
+ for (size_t src_i = 0; src_i < num_src_regions; src_i++) {
+ size_t src_region_begin = src_i * src.region_size_bytes;
+ size_t src_region_end = std::min((src_i + 1) * src.region_size_bytes,
bytes_to_copy);
+
+ size_t dest_i_begin = src_region_begin / dest.region_size_bytes;
+ size_t dest_i_end = (src_region_end - 1) / dest.region_size_bytes + 1;
+ for (size_t dest_i = dest_i_begin; dest_i < dest_i_end; dest_i++) {
+ size_t offset_begin = std::max(src_region_begin, dest_i *
dest.region_size_bytes);
+ size_t offset_end = std::min(src_region_end, (dest_i + 1) *
dest.region_size_bytes);
+
+ size_t num_bytes = offset_end - offset_begin;
+ void* src_ptr = pointer_to(src, src_i, offset_begin %
src.region_size_bytes);
+ void* dest_ptr = pointer_to(dest, dest_i, offset_begin %
dest.region_size_bytes);
+ micro_copies.push_back(MemoryCopy(dest_ptr, src_ptr, num_bytes));
+ }
+ }
+
+ return micro_copies;
+}
+
+std::vector<MemoryCopy> MemoryCopy::MergeAdjacent(std::vector<MemoryCopy>
micro_copies) {
+ std::sort(micro_copies.begin(), micro_copies.end(),
+ [](const MemoryCopy& a, const MemoryCopy& b) { return a.src <
b.src; });
- size_t copied = 0;
- for (const auto& managed_alloc : managed_allocations_) {
- size_t bytes_to_copy = std::min(nbytes - copied,
managed_alloc->allocation_nbytes_);
- if (bytes_to_copy == 0) break;
+ std::vector<MemoryCopy> macro_copies;
+ for (const auto& copy : micro_copies) {
+ if (macro_copies.size() && macro_copies.back().IsDirectlyBefore(copy)) {
+ macro_copies.back().num_bytes += copy.num_bytes;
+ } else {
+ macro_copies.push_back(copy);
+ }
+ }
- void* data_plus_copied = static_cast<void*>((static_cast<char*>(data) +
copied));
- int status = hexagon_user_dma_1d_sync(data_plus_copied,
managed_alloc->data_, bytes_to_copy);
- CHECK_EQ(status, 0);
+ return macro_copies;
+}
- copied += bytes_to_copy;
+void hexagon_buffer_copy_across_regions(const BufferSet& dest, const
BufferSet& src,
+ size_t bytes_to_copy) {
+ // First, determine all copies that do not cross boundaries in
+ // either source or destination region.
+ auto micro_copies = BufferSet::MemoryCopies(dest, src, bytes_to_copy);
+
+ // If regions are contiguously allocated, we can reduce the number
+ // of copies required by merging adjacent copies.
+ auto macro_copies = MemoryCopy::MergeAdjacent(std::move(micro_copies));
+
+ // Finally, do the memory copies.
+ for (const auto& copy : macro_copies) {
+ int error_code = hexagon_user_dma_1d_sync(copy.dest, copy.src,
copy.num_bytes);
+ CHECK_EQ(error_code, 0);
}
}
Review Comment:
❤️
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]