mbaret commented on a change in pull request #9778:
URL: https://github.com/apache/tvm/pull/9778#discussion_r783103861
##########
File path: src/contrib/ethosu/cascader/parts/ethosu.cc
##########
@@ -32,84 +35,155 @@ namespace contrib {
namespace ethosu {
namespace cascader {
-const std::vector<int> EthosuPartNode::GetBlockShape(const StripeConfig&
output_stripe_config,
- bool is_rollling) {
- std::vector<int> block_shape;
- for (int axis : output_stripe_config->GetShape()) {
- block_shape.push_back(std::min(axis, 4));
- }
- return block_shape;
-}
+const std::vector<int64_t> EthosuPartNode::GetBytesRead(const std::vector<int>
block_shape,
+ const std::vector<int>
full_shape) {
+ std::vector<int64_t> bytes_per_input(propagators_.size(), 0);
-const std::vector<int> EthosuPartNode::GetBlockInputBytes_(const
std::vector<int>& block_shape) {
- std::vector<int> bytes_per_input;
- std::vector<float> strides;
std::vector<int> order;
std::vector<int> stripes;
std::vector<int> offset;
+ std::vector<float> strides;
for (size_t i = 0; i < block_shape.size(); i++) {
- strides.push_back(1.0);
order.push_back(1);
- stripes.push_back(1);
+ stripes.push_back(round_up_divide(full_shape[i], block_shape[i]));
offset.push_back(0);
+ strides.push_back(static_cast<float>(block_shape[i]));
}
- StripeConfig output_block_config(block_shape, block_shape, strides, order,
stripes, offset);
+
+ StripeConfig output_block_config(block_shape, full_shape, strides, order,
stripes, offset);
auto input_block_configs = CalculateInputStripeConfigs(output_block_config);
+
+ int i = 0;
for (const auto& input_block_config : input_block_configs) {
- bytes_per_input.push_back(mul_reduce(input_block_config->GetShape()));
+ std::map<std::vector<int>, int> input_blocks =
CountStripes(input_block_config, false);
+
+ for (const auto& block : input_blocks) {
+ bytes_per_input[i] += mul_reduce(block.first) * block.second;
+ }
+ i++;
}
+
+ if (weight_tensor_idx_ != -1) {
+ bytes_per_input[weight_tensor_idx_] *= (stripes[height_idx_] *
stripes[width_idx_]);
+ }
+
return bytes_per_input;
}
+const BlockConfig EthosuPartNode::GetBlockConfig(const StripeConfig&
output_stripe_config) {
+ BlockConfig best_block_config;
+ float best_cost = std::numeric_limits<float>::infinity();
+ std::vector<int> output_stripe_shape = output_stripe_config->GetShape();
+
+ for (const auto& block_config : valid_block_configs_) {
+ std::vector<int> output_block = block_config->GetOutputBlockShape();
+
+ std::vector<int64_t> bytes_per_input = GetBytesRead(output_block,
output_stripe_shape);
+ bytes_per_input[0] *= subkernels_;
+
+ // Calculate bytes read per output element
+ float relative_cost =
+ (bytes_per_input[0] + bytes_per_input[1]) /
mul_reduce(output_stripe_shape);
+
+ // Single buffering hardware optimization
+ if (mul_reduce(output_stripe_shape) <= 2 * mul_reduce(output_block)) {
+ relative_cost /= 2;
+ }
+
+ if (relative_cost < best_cost) {
+ best_block_config = block_config;
+ best_cost = relative_cost;
+ }
+ }
+
+ return best_block_config;
+}
+
const PerformanceInfo EthosuPartNode::GetPerformanceInfo(const StripeConfig&
output_stripe_config,
- bool is_rolling) {
- std::vector<int> block_shape = GetBlockShape(output_stripe_config,
is_rolling);
- std::vector<int> bytes_per_input = GetBlockInputBytes_(block_shape);
- int bytes_per_output = mul_reduce(block_shape);
- int num_blocks = 1;
+ BufferMode
buffer_mode) {
+ BlockConfig block_config = GetBlockConfig(output_stripe_config);
+ std::vector<int> block_shape = block_config->GetOutputBlockShape();
+
+ std::vector<int64_t> bytes_per_input =
+ GetBytesRead(block_shape, output_stripe_config->GetShape());
+
+ int elements_per_block = mul_reduce(block_shape);
+ int bytes_per_output = elements_per_block;
+ float num_blocks = 1.0f;
for (size_t i = 0; i < block_shape.size(); i++) {
- if (!is_rolling) {
- num_blocks *= output_stripe_config->GetShape()[i] *
output_stripe_config->GetStripes()[i] /
+ if (buffer_mode == BufferMode::RECOMPUTE) {
+ num_blocks *= static_cast<float>(output_stripe_config->GetShape()[i] *
+ output_stripe_config->GetStripes()[i]) /
block_shape[i];
} else {
- num_blocks *= output_stripe_config->GetExtent()[i] / block_shape[i];
+ num_blocks *= static_cast<float>(output_stripe_config->GetExtent()[i]) /
block_shape[i];
}
}
Review comment:
Just to mention that this logic is placeholder and will be replaced in a
later patch.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]