This is an automated email from the ASF dual-hosted git repository.
masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new bc29367b04 Move WrapTimeEvaluator from RPC to profiling, NFC (#11172)
bc29367b04 is described below
commit bc29367b044c552bfcf53aae69ae123652fdc4fb
Author: Krzysztof Parzyszek <[email protected]>
AuthorDate: Thu Apr 28 21:20:51 2022 -0500
Move WrapTimeEvaluator from RPC to profiling, NFC (#11172)
---
include/tvm/runtime/profiling.h | 23 +++++++++
.../graph_executor/debug/graph_executor_debug.cc | 2 +-
src/runtime/profiling.cc | 55 ++++++++++++++++++++
src/runtime/rpc/rpc_module.cc | 59 +---------------------
src/runtime/rpc/rpc_session.h | 23 ---------
5 files changed, 81 insertions(+), 81 deletions(-)
diff --git a/include/tvm/runtime/profiling.h b/include/tvm/runtime/profiling.h
index 606bf502c1..3cfb73f58e 100644
--- a/include/tvm/runtime/profiling.h
+++ b/include/tvm/runtime/profiling.h
@@ -511,6 +511,29 @@ String ShapeString(const std::vector<int64_t>& shape,
DLDataType dtype);
PackedFunc ProfileFunction(Module mod, std::string func_name, int device_type,
int device_id,
int warmup_iters, Array<MetricCollector>
collectors);
+/*!
+ * \brief Wrap a timer function to measure the time cost of a given packed
function.
+ * \param f The function argument.
+ * \param dev The device.
+ * \param number The number of times to run this function for taking average.
+ * We call these runs as one `repeat` of measurement.
+ * \param repeat The number of times to repeat the measurement.
+ * In total, the function will be invoked (1 + number x repeat) times,
+ * where the first one is warm up and will be discarded.
+ * The returned result contains `repeat` costs,
+ * each of which is an average of `number` costs.
+ * \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
+ * By default, one `repeat` contains `number` runs. If this parameter
is set,
+ * the parameters `number` will be dynamically adjusted to meet the
+ * minimum duration requirement of one `repeat`.
+ * i.e., When the run time of one `repeat` falls below this time,
+ * the `number` parameter will be automatically increased.
+ * \param f_preproc The function to be executed before we excetute time
evaluator.
+ * \return f_timer A timer function.
+ */
+PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat,
int min_repeat_ms,
+ PackedFunc f_preproc = nullptr);
+
} // namespace profiling
} // namespace runtime
} // namespace tvm
diff --git a/src/runtime/graph_executor/debug/graph_executor_debug.cc
b/src/runtime/graph_executor/debug/graph_executor_debug.cc
index cf7a4cd049..97d89206f5 100644
--- a/src/runtime/graph_executor/debug/graph_executor_debug.cc
+++ b/src/runtime/graph_executor/debug/graph_executor_debug.cc
@@ -113,7 +113,7 @@ class GraphExecutorDebug : public GraphExecutor {
// assume host runs things which is first device
Device& d = devices_[0];
- PackedFunc time_evaluator = WrapTimeEvaluator(
+ PackedFunc time_evaluator = profiling::WrapTimeEvaluator(
TypedPackedFunc<void()>([this, node_index]() {
this->RunOpHost(node_index); }), d, number,
repeat, min_repeat_ms);
std::string result = time_evaluator();
diff --git a/src/runtime/profiling.cc b/src/runtime/profiling.cc
index 037cd1ce79..6d95a0fbd2 100644
--- a/src/runtime/profiling.cc
+++ b/src/runtime/profiling.cc
@@ -739,6 +739,61 @@ TVM_REGISTER_GLOBAL("runtime.profiling.ProfileFunction")
}
});
+PackedFunc WrapTimeEvaluator(PackedFunc pf, Device dev, int number, int
repeat, int min_repeat_ms,
+ PackedFunc f_preproc) {
+ ICHECK(pf != nullptr);
+
+ if (static_cast<int>(dev.device_type) == static_cast<int>(kDLMicroDev)) {
+ auto get_micro_time_evaluator =
runtime::Registry::Get("micro._GetMicroTimeEvaluator");
+ ICHECK(get_micro_time_evaluator != nullptr) << "micro backend not enabled";
+ return (*get_micro_time_evaluator)(pf, dev, number, repeat);
+ }
+
+ auto ftimer = [pf, dev, number, repeat, min_repeat_ms, f_preproc](TVMArgs
args,
+
TVMRetValue* rv) mutable {
+ TVMRetValue temp;
+ std::ostringstream os;
+ // skip first time call, to activate lazy compilation components.
+ pf.CallPacked(args, &temp);
+
+ DeviceAPI::Get(dev)->StreamSync(dev, nullptr);
+
+ for (int i = 0; i < repeat; ++i) {
+ if (f_preproc != nullptr) {
+ f_preproc.CallPacked(args, &temp);
+ }
+ double duration_ms = 0.0;
+
+ do {
+ if (duration_ms > 0.0) {
+ number = static_cast<int>(std::max((min_repeat_ms / (duration_ms /
number) + 1),
+ number * 1.618)); // 1.618 is
chosen by random
+ }
+
+ Timer t = Timer::Start(dev);
+ // start timing
+ for (int i = 0; i < number; ++i) {
+ pf.CallPacked(args, &temp);
+ }
+ t->Stop();
+ int64_t t_nanos = t->SyncAndGetElapsedNanos();
+ duration_ms = t_nanos / 1e6;
+ } while (duration_ms < min_repeat_ms);
+
+ double speed = duration_ms / 1e3 / number;
+ os.write(reinterpret_cast<char*>(&speed), sizeof(speed));
+ }
+
+ std::string blob = os.str();
+ TVMByteArray arr;
+ arr.size = blob.length();
+ arr.data = blob.data();
+ // return the time.
+ *rv = arr;
+ };
+ return PackedFunc(ftimer);
+}
+
} // namespace profiling
} // namespace runtime
} // namespace tvm
diff --git a/src/runtime/rpc/rpc_module.cc b/src/runtime/rpc/rpc_module.cc
index a139211957..8e558fb627 100644
--- a/src/runtime/rpc/rpc_module.cc
+++ b/src/runtime/rpc/rpc_module.cc
@@ -357,61 +357,6 @@ inline void CPUCacheFlush(int begin_index, const TVMArgs&
args) {
}
}
-PackedFunc WrapTimeEvaluator(PackedFunc pf, Device dev, int number, int
repeat, int min_repeat_ms,
- PackedFunc f_preproc) {
- ICHECK(pf != nullptr);
-
- if (static_cast<int>(dev.device_type) == static_cast<int>(kDLMicroDev)) {
- auto get_micro_time_evaluator =
runtime::Registry::Get("micro._GetMicroTimeEvaluator");
- ICHECK(get_micro_time_evaluator != nullptr) << "micro backend not enabled";
- return (*get_micro_time_evaluator)(pf, dev, number, repeat);
- }
-
- auto ftimer = [pf, dev, number, repeat, min_repeat_ms, f_preproc](TVMArgs
args,
-
TVMRetValue* rv) mutable {
- TVMRetValue temp;
- std::ostringstream os;
- // skip first time call, to activate lazy compilation components.
- pf.CallPacked(args, &temp);
-
- DeviceAPI::Get(dev)->StreamSync(dev, nullptr);
-
- for (int i = 0; i < repeat; ++i) {
- if (f_preproc != nullptr) {
- f_preproc.CallPacked(args, &temp);
- }
- double duration_ms = 0.0;
-
- do {
- if (duration_ms > 0.0) {
- number = static_cast<int>(std::max((min_repeat_ms / (duration_ms /
number) + 1),
- number * 1.618)); // 1.618 is
chosen by random
- }
-
- Timer t = Timer::Start(dev);
- // start timing
- for (int i = 0; i < number; ++i) {
- pf.CallPacked(args, &temp);
- }
- t->Stop();
- int64_t t_nanos = t->SyncAndGetElapsedNanos();
- duration_ms = t_nanos / 1e6;
- } while (duration_ms < min_repeat_ms);
-
- double speed = duration_ms / 1e3 / number;
- os.write(reinterpret_cast<char*>(&speed), sizeof(speed));
- }
-
- std::string blob = os.str();
- TVMByteArray arr;
- arr.size = blob.length();
- arr.data = blob.data();
- // return the time.
- *rv = arr;
- };
- return PackedFunc(ftimer);
-}
-
TVM_REGISTER_GLOBAL("runtime.RPCTimeEvaluator")
.set_body_typed([](Optional<Module> opt_mod, std::string name, int
device_type, int device_id,
int number, int repeat, int min_repeat_ms, std::string
f_preproc_name) {
@@ -434,7 +379,7 @@ TVM_REGISTER_GLOBAL("runtime.RPCTimeEvaluator")
}
PackedFunc pf = m.GetFunction(name, true);
CHECK(pf != nullptr) << "Cannot find " << name << " in the global
registry";
- return WrapTimeEvaluator(pf, dev, number, repeat, min_repeat_ms,
f_preproc);
+ return profiling::WrapTimeEvaluator(pf, dev, number, repeat,
min_repeat_ms, f_preproc);
}
} else {
auto* pf = runtime::Registry::Get(name);
@@ -446,7 +391,7 @@ TVM_REGISTER_GLOBAL("runtime.RPCTimeEvaluator")
<< "Cannot find " << f_preproc_name << " in the global function";
f_preproc = *pf_preproc;
}
- return WrapTimeEvaluator(*pf, dev, number, repeat, min_repeat_ms,
f_preproc);
+ return profiling::WrapTimeEvaluator(*pf, dev, number, repeat,
min_repeat_ms, f_preproc);
}
});
diff --git a/src/runtime/rpc/rpc_session.h b/src/runtime/rpc/rpc_session.h
index 8923103157..d78b3219bf 100644
--- a/src/runtime/rpc/rpc_session.h
+++ b/src/runtime/rpc/rpc_session.h
@@ -282,29 +282,6 @@ struct RemoteSpace {
std::shared_ptr<RPCSession> sess;
};
-/*!
- * \brief Wrap a timer function to measure the time cost of a given packed
function.
- * \param f The function argument.
- * \param dev The device.
- * \param number The number of times to run this function for taking average.
- * We call these runs as one `repeat` of measurement.
- * \param repeat The number of times to repeat the measurement.
- * In total, the function will be invoked (1 + number x repeat) times,
- * where the first one is warm up and will be discarded.
- * The returned result contains `repeat` costs,
- * each of which is an average of `number` costs.
- * \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
- * By default, one `repeat` contains `number` runs. If this parameter
is set,
- * the parameters `number` will be dynamically adjusted to meet the
- * minimum duration requirement of one `repeat`.
- * i.e., When the run time of one `repeat` falls below this time,
- * the `number` parameter will be automatically increased.
- * \param f_preproc The function to be executed before we excetute time
evaluator.
- * \return f_timer A timer function.
- */
-PackedFunc WrapTimeEvaluator(PackedFunc f, Device dev, int number, int repeat,
int min_repeat_ms,
- PackedFunc f_preproc = nullptr);
-
/*!
* \brief Create a Global RPC module that refers to the session.
* \param sess The RPC session of the global module.