junrushao commented on code in PR #12895:
URL: https://github.com/apache/tvm/pull/12895#discussion_r982931215
##########
src/meta_schedule/schedule_rule/schedule_rule.cc:
##########
@@ -51,6 +51,125 @@ ScheduleRule ScheduleRule::PyScheduleRule(
return ScheduleRule(n);
}
+Array<ScheduleRule> ScheduleRule::DefaultLLVM() {
+ return {
+ ScheduleRule::AutoInline(
+ /*into_producer=*/false,
+ /*into_consumer=*/true,
+ /*inline_const_tensor=*/true,
+ /*disallow_if_then_else=*/true,
+ /*require_injective=*/true,
+ /*require_ordered=*/true,
+ /*disallow_op=*/Array<String>{"tir.exp"}),
+ ScheduleRule::AddRFactor(
+ /*max_jobs_per_core=*/16,
+ /*max_innermost_factor=*/Integer(64)),
+ ScheduleRule::MultiLevelTiling(
+ /*structure=*/"SSRSRS",
+ /*tile_binds=*/NullOpt,
+ /*max_innermost_factor=*/Integer(64),
+ /*vector_load_lens=*/NullOpt,
+ /*reuse_read=*/NullOpt,
+ /*reuse_write=*/
+ Map<String, ObjectRef>{{"req", String("may")},
+ {"levels", Array<Integer>{1, 2}},
+ {"scope", String("global")}}),
+ ScheduleRule::ParallelizeVectorizeUnroll(
+ /*max_jobs_per_core=*/16,
+ /*max_vectorize_extent=*/64,
+ /*unroll_max_steps=*/Array<Integer>{0, 16, 64, 512},
+ /*unroll_explicit=*/true),
+ ScheduleRule::RandomComputeLocation(),
+ };
+}
+
+Array<ScheduleRule> ScheduleRule::DefaultCUDA() {
+ return {
+ ScheduleRule::MultiLevelTiling(
+ /*structure=*/"SSSRRSRS",
+ /*tile_binds=*/Array<String>{"blockIdx.x", "vthread.x",
"threadIdx.x"},
+ /*max_innermost_factor=*/Integer(64),
+ /*vector_load_lens=*/Array<Integer>{1, 2, 3, 4, 8, 16},
+ /*reuse_read=*/
+ Map<String, ObjectRef>{{"req", String("must")},
+ {"levels", Array<Integer>{4}}, //
+ {"scope", String("shared")}},
+ /*reuse_write=*/
+ Map<String, ObjectRef>{{"req", String("must")},
+ {"levels", Array<Integer>{3}}, //
+ {"scope", String("local")}}),
+ ScheduleRule::AutoInline(
+ /*into_producer=*/true,
+ /*into_consumer=*/true,
+ /*inline_const_tensor=*/true,
+ /*disallow_if_then_else=*/false,
+ /*require_injective=*/false,
+ /*require_ordered=*/false,
+ /*disallow_op=*/Array<String>{}),
+ ScheduleRule::CrossThreadReduction(
+ /*thread_extents=*/Array<Integer>{4, 8, 16, 32, 64, 128, 256, 512}),
+ ScheduleRule::ParallelizeVectorizeUnroll(
+ /*max_jobs_per_core=*/-1,
+ /*max_vectorize_extent=*/-1,
+ /*unroll_max_steps=*/Array<Integer>{0, 16, 64, 512, 1024},
+ /*unroll_explicit=*/true),
+ ScheduleRule::AutoBind(
+ /*max_threadblocks=*/256,
+ /*thread_extents*/ Array<Integer>{32, 64, 128, 256, 512, 1024}),
+ };
+}
+
+Array<ScheduleRule> ScheduleRule::DefaultCUDATensorCore() {
+ Array<Map<String, String>> intrin_groups = {
+ {
+ {"init", "wmma_fill_16x16x16_f16"},
+ {"load_a", "wmma_load_16x16x16_f16_a"},
+ {"load_b", "wmma_load_16x16x16_f16_b"},
+ {"compute", "wmma_sync_16x16x16_f16f16f16"},
+ {"store", "wmma_store_16x16x16_f16_shared"},
+ },
+ {
+ {"init", "wmma_fill_16x16x16_f16"},
+ {"load_a", "wmma_load_16x16x16_f16_a"},
+ {"load_b", "wmma_load_16x16x16_f16_b_trans"},
+ {"compute", "wmma_sync_16x16x16_f16f16f16_trans"},
+ {"store", "wmma_store_16x16x16_f16_shared"},
+ },
+ {
+ {"init", "wmma_fill_16x16x16_s32"},
+ {"load_a", "wmma_load_16x16x16_s8_a"},
+ {"load_b", "wmma_load_16x16x16_s8_b"},
+ {"compute", "wmma_sync_16x16x16_s8s8s32"},
+ {"store", "wmma_store_16x16x16_s32_shared"},
+ },
+ {
+ {"init", "wmma_fill_16x16x16_s32"},
+ {"load_a", "wmma_load_16x16x16_s8_a"},
+ {"load_b", "wmma_load_16x16x16_s8_b_trans"},
+ {"compute", "wmma_sync_16x16x16_s8s8s32_trans"},
+ {"store", "wmma_store_16x16x16_s32_shared"},
+ },
+ };
+ Array<ScheduleRule> results{ScheduleRule::MultiLevelTilingTensorCore(
+ /*intrin_groups=*/intrin_groups,
+ /*structure=*/"SSSRRSRS",
+ /*tile_binds=*/Array<String>{"blockIdx.x", "vthread.x", "threadIdx.x"},
+ /*max_innermost_factor=*/Integer(64),
+ /*vector_load_lens=*/Array<Integer>{1, 2, 3, 4, 8, 16},
+ /*reuse_read=*/
+ Map<String, ObjectRef>{{"req", String("must")},
+ {"levels", Array<Integer>{4}}, //
+ {"scope", String("shared")}},
+ /*reuse_write=*/
+ Map<String, ObjectRef>{{"req", String("must")},
+ {"levels", Array<Integer>{3}}, //
+ {"scope", String("local")}},
Review Comment:
ooop. would you mind pointing out the correct one? thanks a lot!
##########
src/meta_schedule/schedule_rule/schedule_rule.cc:
##########
@@ -51,6 +51,125 @@ ScheduleRule ScheduleRule::PyScheduleRule(
return ScheduleRule(n);
}
+Array<ScheduleRule> ScheduleRule::DefaultLLVM() {
+ return {
+ ScheduleRule::AutoInline(
+ /*into_producer=*/false,
+ /*into_consumer=*/true,
+ /*inline_const_tensor=*/true,
+ /*disallow_if_then_else=*/true,
+ /*require_injective=*/true,
+ /*require_ordered=*/true,
+ /*disallow_op=*/Array<String>{"tir.exp"}),
+ ScheduleRule::AddRFactor(
+ /*max_jobs_per_core=*/16,
+ /*max_innermost_factor=*/Integer(64)),
+ ScheduleRule::MultiLevelTiling(
+ /*structure=*/"SSRSRS",
+ /*tile_binds=*/NullOpt,
+ /*max_innermost_factor=*/Integer(64),
+ /*vector_load_lens=*/NullOpt,
+ /*reuse_read=*/NullOpt,
+ /*reuse_write=*/
+ Map<String, ObjectRef>{{"req", String("may")},
+ {"levels", Array<Integer>{1, 2}},
+ {"scope", String("global")}}),
+ ScheduleRule::ParallelizeVectorizeUnroll(
+ /*max_jobs_per_core=*/16,
+ /*max_vectorize_extent=*/64,
+ /*unroll_max_steps=*/Array<Integer>{0, 16, 64, 512},
+ /*unroll_explicit=*/true),
+ ScheduleRule::RandomComputeLocation(),
+ };
+}
+
+Array<ScheduleRule> ScheduleRule::DefaultCUDA() {
+ return {
+ ScheduleRule::MultiLevelTiling(
+ /*structure=*/"SSSRRSRS",
+ /*tile_binds=*/Array<String>{"blockIdx.x", "vthread.x",
"threadIdx.x"},
+ /*max_innermost_factor=*/Integer(64),
+ /*vector_load_lens=*/Array<Integer>{1, 2, 3, 4, 8, 16},
+ /*reuse_read=*/
+ Map<String, ObjectRef>{{"req", String("must")},
+ {"levels", Array<Integer>{4}}, //
+ {"scope", String("shared")}},
+ /*reuse_write=*/
+ Map<String, ObjectRef>{{"req", String("must")},
+ {"levels", Array<Integer>{3}}, //
+ {"scope", String("local")}}),
+ ScheduleRule::AutoInline(
+ /*into_producer=*/true,
+ /*into_consumer=*/true,
+ /*inline_const_tensor=*/true,
+ /*disallow_if_then_else=*/false,
+ /*require_injective=*/false,
+ /*require_ordered=*/false,
+ /*disallow_op=*/Array<String>{}),
+ ScheduleRule::CrossThreadReduction(
+ /*thread_extents=*/Array<Integer>{4, 8, 16, 32, 64, 128, 256, 512}),
+ ScheduleRule::ParallelizeVectorizeUnroll(
+ /*max_jobs_per_core=*/-1,
+ /*max_vectorize_extent=*/-1,
+ /*unroll_max_steps=*/Array<Integer>{0, 16, 64, 512, 1024},
+ /*unroll_explicit=*/true),
+ ScheduleRule::AutoBind(
+ /*max_threadblocks=*/256,
+ /*thread_extents*/ Array<Integer>{32, 64, 128, 256, 512, 1024}),
+ };
+}
+
+Array<ScheduleRule> ScheduleRule::DefaultCUDATensorCore() {
+ Array<Map<String, String>> intrin_groups = {
+ {
+ {"init", "wmma_fill_16x16x16_f16"},
+ {"load_a", "wmma_load_16x16x16_f16_a"},
+ {"load_b", "wmma_load_16x16x16_f16_b"},
+ {"compute", "wmma_sync_16x16x16_f16f16f16"},
+ {"store", "wmma_store_16x16x16_f16_shared"},
+ },
+ {
+ {"init", "wmma_fill_16x16x16_f16"},
+ {"load_a", "wmma_load_16x16x16_f16_a"},
+ {"load_b", "wmma_load_16x16x16_f16_b_trans"},
+ {"compute", "wmma_sync_16x16x16_f16f16f16_trans"},
+ {"store", "wmma_store_16x16x16_f16_shared"},
+ },
+ {
+ {"init", "wmma_fill_16x16x16_s32"},
+ {"load_a", "wmma_load_16x16x16_s8_a"},
+ {"load_b", "wmma_load_16x16x16_s8_b"},
+ {"compute", "wmma_sync_16x16x16_s8s8s32"},
+ {"store", "wmma_store_16x16x16_s32_shared"},
+ },
+ {
+ {"init", "wmma_fill_16x16x16_s32"},
+ {"load_a", "wmma_load_16x16x16_s8_a"},
+ {"load_b", "wmma_load_16x16x16_s8_b_trans"},
+ {"compute", "wmma_sync_16x16x16_s8s8s32_trans"},
+ {"store", "wmma_store_16x16x16_s32_shared"},
+ },
+ };
+ Array<ScheduleRule> results{ScheduleRule::MultiLevelTilingTensorCore(
+ /*intrin_groups=*/intrin_groups,
+ /*structure=*/"SSSRRSRS",
+ /*tile_binds=*/Array<String>{"blockIdx.x", "vthread.x", "threadIdx.x"},
+ /*max_innermost_factor=*/Integer(64),
+ /*vector_load_lens=*/Array<Integer>{1, 2, 3, 4, 8, 16},
+ /*reuse_read=*/
+ Map<String, ObjectRef>{{"req", String("must")},
+ {"levels", Array<Integer>{4}}, //
+ {"scope", String("shared")}},
+ /*reuse_write=*/
+ Map<String, ObjectRef>{{"req", String("must")},
+ {"levels", Array<Integer>{3}}, //
+ {"scope", String("local")}},
Review Comment:
ooops! would you mind pointing out the correct one? thanks a lot!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]