This is an automated email from the ASF dual-hosted git repository.
lmzheng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 965a67e [auto_scheduler] metal default hardware params (#7022)
965a67e is described below
commit 965a67e7a04612806a390b50e2cca1c0a7744900
Author: Bing Xu <[email protected]>
AuthorDate: Thu Dec 3 06:39:06 2020 -0800
[auto_scheduler] metal default hardware params (#7022)
---
src/auto_scheduler/search_task.cc | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/src/auto_scheduler/search_task.cc
b/src/auto_scheduler/search_task.cc
index 0b85a03..bd09a70 100755
--- a/src/auto_scheduler/search_task.cc
+++ b/src/auto_scheduler/search_task.cc
@@ -72,6 +72,17 @@ HardwareParams
HardwareParamsNode::GetDefaultHardwareParams(const Target& target
p_hardware_params->max_vthread_extent = p_hardware_params->warp_size / 4;
return hardware_params;
+ } else if (target->kind->device_type == kDLMetal) {
+ // Reference:
https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
+ // This setting looks working for Metal GPUs later than A10
+ auto hardware_params = HardwareParams(-1, 16, 64);
+ auto* p_hardware_params = hardware_params.CopyOnWrite();
+ p_hardware_params->max_shared_memory_per_block = 32 * 1024;
+ p_hardware_params->max_registers_per_block = 4 * 1024;
+ p_hardware_params->max_threads_per_block = 1024;
+ p_hardware_params->warp_size = 8;
+ p_hardware_params->max_vthread_extent = p_hardware_params->warp_size / 4;
+ return hardware_params;
} else {
LOG(FATAL) << "No default hardware parameters for target: " << target;
}