This is an automated email from the ASF dual-hosted git repository.
xiyou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 647be2b425 [MetaSchedule] Add `from-target` Defaults for LLVM VNNI
Targets (#13383)
647be2b425 is described below
commit 647be2b42510bffb3ed78267c19e76263adcac36
Author: Xiyou Zhou <[email protected]>
AuthorDate: Mon Nov 14 20:58:12 2022 -0800
[MetaSchedule] Add `from-target` Defaults for LLVM VNNI Targets (#13383)
---
include/tvm/meta_schedule/mutator.h | 2 +
include/tvm/meta_schedule/postproc.h | 2 +
include/tvm/meta_schedule/schedule_rule.h | 2 +
src/meta_schedule/mutator/mutator.cc | 2 +
src/meta_schedule/postproc/postproc.cc | 8 ++++
src/meta_schedule/schedule_rule/schedule_rule.cc | 45 ++++++++++++++++++++++
.../space_generator/space_generator.cc | 11 ++++++
7 files changed, 72 insertions(+)
diff --git a/include/tvm/meta_schedule/mutator.h
b/include/tvm/meta_schedule/mutator.h
index 08a8248dfd..4095d6ca03 100644
--- a/include/tvm/meta_schedule/mutator.h
+++ b/include/tvm/meta_schedule/mutator.h
@@ -131,6 +131,8 @@ class Mutator : public runtime::ObjectRef {
FApply f_apply, FClone f_clone, FAsString
f_as_string);
/*! \brief Create default mutators for LLVM */
TVM_DLL static Map<Mutator, FloatImm, void> DefaultLLVM();
+ /*! \brief Create default mutators for x86 VNNI */
+ TVM_DLL static Map<Mutator, FloatImm, void> DefaultVNNI();
/*! \brief Create default mutators for CUDA */
TVM_DLL static Map<Mutator, FloatImm, void> DefaultCUDA();
/*! \brief Create default mutators for CUDA with TensorCore */
diff --git a/include/tvm/meta_schedule/postproc.h
b/include/tvm/meta_schedule/postproc.h
index a680a64795..13fe470587 100644
--- a/include/tvm/meta_schedule/postproc.h
+++ b/include/tvm/meta_schedule/postproc.h
@@ -152,6 +152,8 @@ class Postproc : public runtime::ObjectRef {
TVM_DLL static Postproc RewriteLayout();
/*! \brief Create default postprocessors for LLVM */
TVM_DLL static Array<Postproc, void> DefaultLLVM();
+ /*! \brief Create default postprocessors for x86 VNNI */
+ TVM_DLL static Array<Postproc, void> DefaultVNNI();
/*! \brief Create default postprocessors for CUDA */
TVM_DLL static Array<Postproc, void> DefaultCUDA();
/*! \brief Create default postprocessors for CUDA with TensorCore */
diff --git a/include/tvm/meta_schedule/schedule_rule.h
b/include/tvm/meta_schedule/schedule_rule.h
index 70dec47e60..a3d6c7ef68 100644
--- a/include/tvm/meta_schedule/schedule_rule.h
+++ b/include/tvm/meta_schedule/schedule_rule.h
@@ -285,6 +285,8 @@ class ScheduleRule : public runtime::ObjectRef {
/*! \brief Create default schedule rules for LLVM */
TVM_DLL static Array<ScheduleRule, void> DefaultLLVM();
+ /*! \brief Create default schedule rules for x86 VNNI */
+ TVM_DLL static Array<ScheduleRule, void> DefaultVNNI();
/*! \brief Create default schedule rules for CUDA */
TVM_DLL static Array<ScheduleRule, void> DefaultCUDA();
/*! \brief Create default postprocessors for CUDA with TensorCore */
diff --git a/src/meta_schedule/mutator/mutator.cc
b/src/meta_schedule/mutator/mutator.cc
index 8e9bfc8bde..8f3d14b6c4 100644
--- a/src/meta_schedule/mutator/mutator.cc
+++ b/src/meta_schedule/mutator/mutator.cc
@@ -59,6 +59,8 @@ Map<Mutator, FloatImm> Mutator::DefaultLLVM() {
{Mutator::MutateParallel(/*max_jobs_per_core=*/16),
FloatImm(DataType::Float(64), 0.02)}};
}
+Map<Mutator, FloatImm> Mutator::DefaultVNNI() { return Mutator::DefaultLLVM();
}
+
Map<Mutator, FloatImm> Mutator::DefaultCUDA() {
return Map<Mutator, FloatImm>{
{Mutator::MutateTileSize(), FloatImm(DataType::Float(64), 0.9)},
diff --git a/src/meta_schedule/postproc/postproc.cc
b/src/meta_schedule/postproc/postproc.cc
index 0738c87112..c614f3230d 100644
--- a/src/meta_schedule/postproc/postproc.cc
+++ b/src/meta_schedule/postproc/postproc.cc
@@ -59,6 +59,14 @@ Array<Postproc> Postproc::DefaultLLVM() {
};
}
+Array<Postproc> Postproc::DefaultVNNI() {
+ return Array<Postproc>{
+ Postproc::DisallowDynamicLoop(),
Postproc::RewriteParallelVectorizeUnroll(),
+ Postproc::RewriteReductionBlock(),
Postproc::RewriteTensorize(/*vectorize_init_loop=*/true),
+ Postproc::RewriteLayout(),
+ };
+}
+
Array<Postproc> Postproc::DefaultCUDA() {
return Array<Postproc>{
Postproc::DisallowDynamicLoop(),
diff --git a/src/meta_schedule/schedule_rule/schedule_rule.cc
b/src/meta_schedule/schedule_rule/schedule_rule.cc
index b1e8c3695d..e4f97c1fa6 100644
--- a/src/meta_schedule/schedule_rule/schedule_rule.cc
+++ b/src/meta_schedule/schedule_rule/schedule_rule.cc
@@ -85,6 +85,51 @@ Array<ScheduleRule> ScheduleRule::DefaultLLVM() {
};
}
+Array<ScheduleRule> ScheduleRule::DefaultVNNI() {
+ return {
+ ScheduleRule::ApplyCustomRule(),
+ ScheduleRule::InlineConstantScalars(),
+ ScheduleRule::AutoInline(
+ /*into_producer=*/false,
+ /*into_consumer=*/true,
+ /*inline_const_tensor=*/true,
+ /*disallow_if_then_else=*/true,
+ /*require_injective=*/true,
+ /*require_ordered=*/true,
+ /*disallow_op=*/Array<String>{"tir.exp"}),
+ ScheduleRule::AddRFactor(
+ /*max_jobs_per_core=*/16,
+ /*max_innermost_factor=*/Integer(64)),
+ ScheduleRule::MultiLevelTilingWithIntrin(
+ /*intrin_name=*/"dot_16x4_vnni",
+ /*structure=*/"SSRSRS",
+ /*tile_binds=*/NullOpt,
+ /*max_innermost_factor=*/Integer(64),
+ /*vector_load_lens=*/NullOpt,
+ /*reuse_read=*/NullOpt,
+ /*reuse_write=*/
+ Map<String, ObjectRef>{{"req", String("may")},
+ {"levels", Array<Integer>{1, 2}},
+ {"scope", String("global")}}),
+ ScheduleRule::MultiLevelTiling(
+ /*structure=*/"SSRSRS",
+ /*tile_binds=*/NullOpt,
+ /*max_innermost_factor=*/Integer(64),
+ /*vector_load_lens=*/NullOpt,
+ /*reuse_read=*/NullOpt,
+ /*reuse_write=*/
+ Map<String, ObjectRef>{{"req", String("may")},
+ {"levels", Array<Integer>{1, 2}},
+ {"scope", String("global")}}),
+ ScheduleRule::ParallelizeVectorizeUnroll(
+ /*max_jobs_per_core=*/16,
+ /*max_vectorize_extent=*/64,
+ /*unroll_max_steps=*/Array<Integer>{0, 16, 64, 512},
+ /*unroll_explicit=*/true),
+ ScheduleRule::RandomComputeLocation(),
+ };
+}
+
Array<ScheduleRule> ScheduleRule::DefaultCUDA() {
return {
ScheduleRule::ApplyCustomRule(),
diff --git a/src/meta_schedule/space_generator/space_generator.cc
b/src/meta_schedule/space_generator/space_generator.cc
index bcc0673e59..bd124511b8 100644
--- a/src/meta_schedule/space_generator/space_generator.cc
+++ b/src/meta_schedule/space_generator/space_generator.cc
@@ -23,6 +23,13 @@ namespace meta_schedule {
String GetRuleKindFromTarget(const Target& target) {
if (target->kind->name == "llvm") {
+ static const PackedFunc* f_check_vnni =
+ runtime::Registry::Get("tvm.topi.x86.utils.target_has_vnni");
+ ICHECK(*f_check_vnni != nullptr) << "The `target_has_vnni` func is not in
tvm registry.";
+ if (target->GetAttr<String>("mcpu") &&
+ (*f_check_vnni)(target->GetAttr<String>("mcpu").value())) {
+ return "vnni";
+ }
return "llvm";
}
if (target->kind->name == "hexagon") {
@@ -79,6 +86,10 @@ void SpaceGeneratorNode::InitializeWithTuneContext(const
TuneContext& context) {
default_sch_rules = ScheduleRule::DefaultHexagon();
default_postprocs = Postproc::DefaultHexagon();
default_mutator_probs = Mutator::DefaultHexagon();
+ } else if (kind == "vnni") {
+ default_sch_rules = ScheduleRule::DefaultVNNI();
+ default_postprocs = Postproc::DefaultVNNI();
+ default_mutator_probs = Mutator::DefaultVNNI();
} else {
LOG(FATAL) << "Unsupported kind: " << kind;
throw;