ekalda commented on a change in pull request #10508:
URL: https://github.com/apache/tvm/pull/10508#discussion_r822724373
##########
File path: python/tvm/contrib/ethosu/cascader/tensor_config.py
##########
@@ -58,9 +58,25 @@ class MemoryRegion(Object):
"""
- def __init__(self, name: str, size: int, read_bandwidth: int,
write_bandwidth: int):
+ def __init__(
+ self,
+ name: str,
+ size: int,
+ read_bandwidth: int,
+ write_bandwidth: int,
+ read_latency: int = 0,
+ write_latency: int = 0,
+ burst_length: int = 1,
Review comment:
Nit: add these to the docstring as well
##########
File path: src/contrib/ethosu/cascader/tensor_config.h
##########
@@ -52,19 +52,29 @@ class MemoryRegionNode : public Object {
int read_bandwidth;
/*! \brief The write bandwidth of the region in bytes per cycle */
int write_bandwidth;
+ /*! \brief The read bandwidth of the region in bytes per cycle */
Review comment:
Nit: Update the docstring
##########
File path: python/tvm/contrib/ethosu/cascader/device_config.py
##########
@@ -551,7 +551,7 @@ def get_elementwise_block_config(
)
output_cycles *= reduce(lambda a, b: a * b, output_block, 1)
output_cycles = int(math.ceil(output_cycles))
- block_config.append(BlockConfig(output_block, 0,
output_cycles))
+ block_config.append(BlockConfig(output_block, output_block, 0,
output_cycles))
Review comment:
Maybe use the larger of the input blocks?
##########
File path: src/contrib/ethosu/cascader/plan_generator.cc
##########
@@ -332,32 +349,52 @@ std::vector<Plan> GenerateSinglePlans(
}
// Calculate the bandwidth cycles by multiplying the bytes
read/written by the
// bandwidth of the memories
+ BlockConfig block_config = perf_info->block_config;
for (size_t i = 0; i < input_configs.size(); i++) {
- bandwidth_cycles +=
- perf_info->read_bytes[i] /
input_configs[i]->GetCopyRegion()->read_bandwidth;
+ Tensor tensor = input_configs[i]->GetTensor();
+ MemoryRegion home_region = input_configs[i]->GetHomeRegion();
+ MemoryRegion copy_region = input_configs[i]->GetCopyRegion();
+
if (input_configs[i]->DoCopy()) {
// This Tensor needs to be copied - Count stripes for this config
- Tensor tensor = input_configs[i]->GetTensor();
for (const auto& stripe_config :
input_configs[i]->GetStripeConfigs()) {
std::map<std::vector<int>, int> input_blocks =
CountStripes(stripe_config, true);
+ bool first_block = true;
for (const auto& block : input_blocks) {
int bytes_transferred = mul_reduce(block.first) *
tensor->GetDataType().bytes() *
tensor->GetCompressionRatio() *
block.second;
- int read_cycles =
- bytes_transferred *
input_configs[i]->GetHomeRegion()->read_bandwidth;
- int write_cycles =
- bytes_transferred *
input_configs[i]->GetCopyRegion()->write_bandwidth;
+ int read_cycles = bytes_transferred *
home_region->read_bandwidth +
+
input_configs[i]->GetHomeRegion()->read_latency;
Review comment:
nit:
```suggestion
int read_cycles = bytes_transferred *
home_region->read_bandwidth +
home_region->read_latency;
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]