This is an automated email from the ASF dual-hosted git repository.
junrushao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 6ce41be327 [MetaSchedule] Modify Profiler Timers (#11735)
6ce41be327 is described below
commit 6ce41be3279fc5368fe4e0f406d412eeee20c92d
Author: Xiyou Zhou <[email protected]>
AuthorDate: Wed Jun 15 13:40:10 2022 -0700
[MetaSchedule] Modify Profiler Timers (#11735)
Minor modification to scoped timers to cover 99% of all the time cost
during MS tuning. Allow `ApplyHistoryBest` and `TaskExtraction` time to be
counted during tune_relay.
---
python/tvm/meta_schedule/tune.py | 17 ++--
.../measure_callback/add_to_database.cc | 2 +-
.../measure_callback/echo_statistics.cc | 2 +-
.../measure_callback/measure_callback.cc | 2 +-
.../measure_callback/remove_build_artifact.cc | 3 +-
.../measure_callback/update_cost_model.cc | 2 +-
src/meta_schedule/profiler.cc | 4 +-
.../search_strategy/evolutionary_search.cc | 108 ++++++++++++---------
src/meta_schedule/task_scheduler/task_scheduler.cc | 1 +
.../unittest/test_meta_schedule_tune_relay.py | 33 +++----
10 files changed, 95 insertions(+), 79 deletions(-)
diff --git a/python/tvm/meta_schedule/tune.py b/python/tvm/meta_schedule/tune.py
index 0622b8d773..fd31760c11 100644
--- a/python/tvm/meta_schedule/tune.py
+++ b/python/tvm/meta_schedule/tune.py
@@ -38,6 +38,7 @@ from .extracted_task import ExtractedTask
from .measure_callback import MeasureCallback
from .mutator import Mutator
from .postproc import Postproc
+from .profiler import Profiler
from .runner import Runner
from .schedule_rule import ScheduleRule
from .search_strategy import EvolutionarySearch, ReplayFunc, ReplayTrace
@@ -563,7 +564,8 @@ def tune_relay(
target = default_config.target(target)
# pylint: enable=protected-access,
# parse the tuning contexts
- extracted_tasks = extract_task_from_relay(mod, target, params)
+ with Profiler.timeit("TaskExtraction"):
+ extracted_tasks = extract_task_from_relay(mod, target, params)
database = tune_extracted_tasks(
extracted_tasks,
config,
@@ -579,9 +581,10 @@ def tune_relay(
mutator_probs=mutator_probs,
num_threads=num_threads,
)
- with target, autotvm_silencer(), ApplyHistoryBest(database):
- with PassContext(
- opt_level=3,
- config={"relay.backend.use_meta_schedule": True},
- ):
- return relay_build(mod, target=target, params=params)
+ with Profiler.timeit("ApplyHistoryBest"):
+ with target, autotvm_silencer(), ApplyHistoryBest(database):
+ with PassContext(
+ opt_level=3,
+ config={"relay.backend.use_meta_schedule": True},
+ ):
+ return relay_build(mod, target=target, params=params)
diff --git a/src/meta_schedule/measure_callback/add_to_database.cc
b/src/meta_schedule/measure_callback/add_to_database.cc
index e86da3720f..26399276c9 100644
--- a/src/meta_schedule/measure_callback/add_to_database.cc
+++ b/src/meta_schedule/measure_callback/add_to_database.cc
@@ -30,7 +30,7 @@ class AddToDatabaseNode : public MeasureCallbackNode {
if (!task_scheduler->database.defined()) {
return;
}
- auto _ = Profiler::TimedScope("AddToDatabase");
+ auto _ = Profiler::TimedScope("MeasureCallback/AddToDatabase");
TuneContext task = task_scheduler->tasks[task_id];
Database database = task_scheduler->database.value();
Workload workload = database->CommitWorkload(task->mod.value());
diff --git a/src/meta_schedule/measure_callback/echo_statistics.cc
b/src/meta_schedule/measure_callback/echo_statistics.cc
index 5f3dce06f0..fb10642665 100644
--- a/src/meta_schedule/measure_callback/echo_statistics.cc
+++ b/src/meta_schedule/measure_callback/echo_statistics.cc
@@ -79,10 +79,10 @@ class EchoStatisticsNode : public MeasureCallbackNode {
const Array<MeasureCandidate>& measure_candidates,
const Array<BuilderResult>& builder_results,
const Array<RunnerResult>& runner_results) final {
+ auto _ = Profiler::TimedScope("MeasureCallback/EchoStatistics");
if (this->task_info.empty()) {
SetupTaskInfo(task_scheduler->tasks);
}
- auto _ = Profiler::TimedScope("EchoStatistics");
ICHECK_EQ(measure_candidates.size(), builder_results.size());
ICHECK_EQ(measure_candidates.size(), runner_results.size());
int n = measure_candidates.size();
diff --git a/src/meta_schedule/measure_callback/measure_callback.cc
b/src/meta_schedule/measure_callback/measure_callback.cc
index e49f5216ec..ebe63e7b76 100644
--- a/src/meta_schedule/measure_callback/measure_callback.cc
+++ b/src/meta_schedule/measure_callback/measure_callback.cc
@@ -27,7 +27,7 @@ void PyMeasureCallbackNode::Apply(const TaskScheduler&
task_scheduler,
const Array<BuilderResult>& builds,
//
const Array<RunnerResult>& results) {
ICHECK(f_apply != nullptr) << "PyMeasureCallback's Apply method not
implemented!";
- auto _ = Profiler::TimedScope(this->f_as_string());
+ auto _ = Profiler::TimedScope("MeasureCallback/" + this->f_as_string());
return f_apply(task_scheduler, task_id, measure_candidates, builds, results);
}
diff --git a/src/meta_schedule/measure_callback/remove_build_artifact.cc
b/src/meta_schedule/measure_callback/remove_build_artifact.cc
index 67267dff91..0abbebf3b4 100644
--- a/src/meta_schedule/measure_callback/remove_build_artifact.cc
+++ b/src/meta_schedule/measure_callback/remove_build_artifact.cc
@@ -28,7 +28,8 @@ class RemoveBuildArtifactNode : public MeasureCallbackNode {
const Array<BuilderResult>& builder_results,
const Array<RunnerResult>& runner_results) final {
static const PackedFunc* f_rm =
runtime::Registry::Get("meta_schedule.remove_build_dir");
- auto _ = Profiler::TimedScope("RemoveBuildArtifact");
+ ICHECK(*f_rm != nullptr) << "The `remove_build_dir` func is not in tvm
registry.";
+ auto _ = Profiler::TimedScope("MeasureCallback/RemoveBuildArtifact");
for (const BuilderResult& build_result : builder_results) {
if (Optional<String> path = build_result->artifact_path) {
(*f_rm)(path.value());
diff --git a/src/meta_schedule/measure_callback/update_cost_model.cc
b/src/meta_schedule/measure_callback/update_cost_model.cc
index 5b6208581c..8851345c43 100644
--- a/src/meta_schedule/measure_callback/update_cost_model.cc
+++ b/src/meta_schedule/measure_callback/update_cost_model.cc
@@ -27,7 +27,7 @@ class UpdateCostModelNode : public MeasureCallbackNode {
const Array<MeasureCandidate>& measure_candidates,
const Array<BuilderResult>& builder_results,
const Array<RunnerResult>& runner_results) final {
- auto _ = Profiler::TimedScope("UpdateCostModel");
+ auto _ = Profiler::TimedScope("MeasureCallback/UpdateCostModel");
TuneContext task = task_scheduler->tasks[task_id];
ICHECK(task_scheduler->cost_model.defined())
<< "Cost model must be defined for the task scheduler!";
diff --git a/src/meta_schedule/profiler.cc b/src/meta_schedule/profiler.cc
index d3f72bb705..2f955ebf09 100644
--- a/src/meta_schedule/profiler.cc
+++ b/src/meta_schedule/profiler.cc
@@ -63,6 +63,7 @@ String ProfilerNode::Table() const {
p.Row() << i << table_entry[i].name << table_entry[i].minutes <<
table_entry[i].percentage;
}
}
+ p.Separator();
return p.AsStr();
}
@@ -79,7 +80,8 @@ PackedFunc ProfilerTimedScope(String name) {
tik =
std::chrono::high_resolution_clock::now(), //
name = std::move(name)]() {
auto tok = std::chrono::high_resolution_clock::now();
- double duration = std::chrono::duration_cast<std::chrono::seconds>(tok -
tik).count();
+ double duration =
+ std::chrono::duration_cast<std::chrono::nanoseconds>(tok -
tik).count() / 1e9;
profiler->stats_sec[name] += duration;
});
}
diff --git a/src/meta_schedule/search_strategy/evolutionary_search.cc
b/src/meta_schedule/search_strategy/evolutionary_search.cc
index acde7f65a8..3b672639aa 100644
--- a/src/meta_schedule/search_strategy/evolutionary_search.cc
+++ b/src/meta_schedule/search_strategy/evolutionary_search.cc
@@ -499,9 +499,13 @@ std::vector<Schedule>
EvolutionarySearchNode::State::SampleInitPopulation(int nu
std::vector<Schedule> EvolutionarySearchNode::State::EvolveWithCostModel(
std::vector<Schedule> population, int num) {
- ICHECK_GT(num, 0);
- // The heap to record best schedule, we do not consider schedules that are
already measured
- IRModuleSet exists = this->measured_workloads_;
+ IRModuleSet exists;
+ {
+ auto _ =
Profiler::TimedScope("EvoSearch/Evolve/Misc/CopyMeasuredWorkloads");
+ ICHECK_GT(num, 0);
+ // The heap to record best schedule, we do not consider schedules that are
already measured
+ exists = this->measured_workloads_;
+ }
SizedHeap heap(num);
for (int iter = 0;; ++iter) {
// Predict normalized score with the cost model,
@@ -509,31 +513,35 @@ std::vector<Schedule>
EvolutionarySearchNode::State::EvolveWithCostModel(
GetRef<TuneContext>(self->context_), //
this->cost_model_,
//
this->args_info_);
- ICHECK_EQ(scores.size(), population.size());
- for (int i = 0, n = population.size(); i < n; ++i) {
- Schedule sch = population.at(i);
- IRModule mod = sch->mod();
- size_t shash = StructuralHash()(mod);
- double score = scores.at(i);
- if (!exists.Has(mod, shash)) {
- exists.Add(mod, shash);
- heap.Push(sch, score);
+
+ {
+ auto _ = Profiler::TimedScope("EvoSearch/Evolve/Misc");
+ ICHECK_EQ(scores.size(), population.size());
+ for (int i = 0, n = population.size(); i < n; ++i) {
+ Schedule sch = population.at(i);
+ IRModule mod = sch->mod();
+ size_t shash = StructuralHash()(mod);
+ double score = scores.at(i);
+ if (!exists.Has(mod, shash)) {
+ exists.Add(mod, shash);
+ heap.Push(sch, score);
+ }
+ }
+ // Discontinue once it reaches end of search
+ if (iter == self->genetic_num_iters) {
+ break;
+ }
+ // Set threaded samplers, with probability from predicated normalized
throughput
+ for (PerThreadData& data : this->per_thread_data_) {
+ data.Set(scores, self->genetic_mutate_prob,
self->context_->mutator_probs);
}
}
- // Discontinue once it reaches end of search
- if (iter == self->genetic_num_iters) {
- break;
- }
- // Set threaded samplers, with probability from predicated normalized
throughput
- for (PerThreadData& data : this->per_thread_data_) {
- data.Set(scores, self->genetic_mutate_prob,
self->context_->mutator_probs);
- }
- ThreadedTraceApply pp(self->context_->postprocs);
- ConcurrentBitmask cbmask(self->population_size);
- std::vector<Schedule> next_population(self->population_size,
Schedule{nullptr});
- // The worker function
{
auto _ = Profiler::TimedScope("EvoSearch/Evolve/Mutation");
+ ThreadedTraceApply pp(self->context_->postprocs);
+ ConcurrentBitmask cbmask(self->population_size);
+ std::vector<Schedule> next_population(self->population_size,
Schedule{nullptr});
+ // The worker function
auto f_find_candidate = [&cbmask, &population, &next_population, &pp,
this](int thread_id,
int trace_id) {
// Prepare samplers
@@ -571,40 +579,46 @@ std::vector<Schedule>
EvolutionarySearchNode::State::EvolveWithCostModel(
};
support::parallel_for_dynamic(0, self->population_size,
self->context_->num_threads,
f_find_candidate);
+
+ population.swap(next_population);
+ TVM_PY_LOG(INFO, self->context_->logging_func)
+ << "Evolve iter #" << iter << " done. Summary:\n"
+ << pp.SummarizeFailures();
}
- population.swap(next_population);
- TVM_PY_LOG(INFO, self->context_->logging_func) << "Evolve iter #" << iter
<< " done. Summary:\n"
- << pp.SummarizeFailures();
}
// Return the best states from the heap, sorting from higher score to lower
ones
- std::sort(heap.heap.begin(), heap.heap.end());
- std::vector<Schedule> results;
- results.reserve(num);
- for (const SizedHeap::Item& item : heap.heap) {
- results.push_back(item.sch);
- }
+ {
+ auto _ = Profiler::TimedScope("EvoSearch/Evolve/Misc");
+ std::sort(heap.heap.begin(), heap.heap.end());
+ std::vector<Schedule> results;
+ results.reserve(num);
+ for (const SizedHeap::Item& item : heap.heap) {
+ results.push_back(item.sch);
+ }
- constexpr int kNumScoresPerLine = 16;
- std::ostringstream os;
- int n = heap.heap.size();
- for (int st = 0; st < n; st += kNumScoresPerLine) {
- os << std::endl;
- int ed = std::min(st + kNumScoresPerLine, n);
- os << "[" << (st + 1) << " : " << ed << "]:\t";
- for (int i = st; i < ed; ++i) {
- if (i != st) {
- os << " ";
+ constexpr int kNumScoresPerLine = 16;
+ std::ostringstream os;
+ int n = heap.heap.size();
+ for (int st = 0; st < n; st += kNumScoresPerLine) {
+ os << std::endl;
+ int ed = std::min(st + kNumScoresPerLine, n);
+ os << "[" << (st + 1) << " : " << ed << "]:\t";
+ for (int i = st; i < ed; ++i) {
+ if (i != st) {
+ os << " ";
+ }
+ os << std::fixed << std::setprecision(4) << heap.heap.at(i).score;
}
- os << std::fixed << std::setprecision(4) << heap.heap.at(i).score;
}
+ TVM_PY_LOG(INFO, self->context_->logging_func)
+ << "Scores of the best " << n << " candidates:" << os.str();
+ return results;
}
- TVM_PY_LOG(INFO, self->context_->logging_func)
- << "Scores of the best " << n << " candidates:" << os.str();
- return results;
}
std::vector<Schedule> EvolutionarySearchNode::State::PickWithEpsGreedy(
const std::vector<Schedule>& unmeasured, const std::vector<Schedule>&
bests, int num) {
+ auto _ = Profiler::TimedScope("EvoSearch/PickWithEpsGreedy");
int num_rands = num * self->eps_greedy;
int num_bests = num - num_rands;
std::vector<int> rands =
diff --git a/src/meta_schedule/task_scheduler/task_scheduler.cc
b/src/meta_schedule/task_scheduler/task_scheduler.cc
index 9c1f451414..ea233648f4 100644
--- a/src/meta_schedule/task_scheduler/task_scheduler.cc
+++ b/src/meta_schedule/task_scheduler/task_scheduler.cc
@@ -22,6 +22,7 @@ namespace tvm {
namespace meta_schedule {
void TaskSchedulerNode::InitializeTask(int task_id) {
+ auto _ = Profiler::TimedScope("InitializeTask");
TuneContext task = this->tasks[task_id];
TVM_PY_LOG(INFO, this->logging_func)
<< "Initializing Task #" << task_id << ": " << task->task_name;
diff --git a/tests/python/unittest/test_meta_schedule_tune_relay.py
b/tests/python/unittest/test_meta_schedule_tune_relay.py
index c2baf8d2b9..d86b6fe48b 100644
--- a/tests/python/unittest/test_meta_schedule_tune_relay.py
+++ b/tests/python/unittest/test_meta_schedule_tune_relay.py
@@ -138,25 +138,20 @@ def test_meta_schedule_tune_relay(
mod, params, (input_name, _, _) = get_network(name=model_name,
input_shape=input_shape)
target = Target(target)
with tempfile.TemporaryDirectory() as work_dir:
- rt_mod1: tvm.runtime.Module = ms.tune_relay(
- mod=mod,
- params=params,
- target=target,
- config=ms.TuneConfig(
- strategy="evolutionary",
- num_trials_per_iter=32,
- max_trials_per_task=20000,
- max_trials_global=20000,
- search_strategy_config={
- "genetic_num_iters": 10,
- },
- ),
- work_dir=work_dir,
- database=ms.database.JSONDatabase(
- osp.join(work_dir, "workload.json"),
- osp.join(work_dir, "records.json"),
- ),
- )
+ with ms.Profiler() as profiler:
+ rt_mod1: tvm.runtime.Module = ms.tune_relay(
+ mod=mod,
+ params=params,
+ target=target,
+ config=ms.TuneConfig(
+ strategy="evolutionary",
+ num_trials_per_iter=32,
+ max_trials_per_task=20000,
+ max_trials_global=20000,
+ ),
+ work_dir=work_dir,
+ )
+ print(profiler.table())
# Compile without meta-scheduler for correctness check
with tvm.transform.PassContext(opt_level=0):
rt_mod2 = relay.build(mod, target=target, params=params)