areusch commented on a change in pull request #7472:
URL: https://github.com/apache/tvm/pull/7472#discussion_r582396601



##########
File path: include/tvm/runtime/profiling.h
##########
@@ -37,55 +37,110 @@
 namespace tvm {
 namespace runtime {
 
+/*! \brief Base class for all implementations.
+ *
+ * New implementations of this interface should make sure that `Start` and 
`Stop`
+ * are as lightweight as possible. Expensive state synchronization should be
+ * done in `SyncAndGetTime`.
+ */
+class TimerNode : public Object {
+ public:
+  /*! \brief Start the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  virtual void Start() = 0;
+  /*! \brief Stop the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  virtual void Stop() = 0;
+  /*! \brief Synchronize timer state and return elapsed time between `Start` 
and `Stop`.
+   * \return The time in nanoseconds between `Start` and `Stop`.
+   *
+   * This function is necessary because we want to avoid timing the overhead of
+   * doing timing. When using multiple timers, it is recommended to stop all of
+   * them before calling `SyncAndGetTime` on any of them.
+   *
+   * Note: this function should be only called once per object. It may incur
+   * a large synchronization overhead (for example, with GPUs).
+   */
+  virtual int64_t SyncAndGetTime() = 0;
+
+  virtual ~TimerNode() {}
+
+  static constexpr const char* _type_key = "TimerNode";
+  TVM_DECLARE_BASE_OBJECT_INFO(TimerNode, Object);
+};
+
+/*! \brief Timer for a specific device.
+ *
+ * You should not construct this class directly. Instead use `StartTimer`.
+ */
+class Timer : public ObjectRef {
+ public:
+  /*! \brief Start the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  void Start() { operator->()->Start(); }
+  /*! \brief Stop the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  void Stop() { operator->()->Stop(); }
+  /*! \brief Synchronize timer state and return elapsed time between `Start` 
and `Stop`.
+   * \return The time in nanoseconds between `Start` and `Stop`.
+   *
+   * This function is necessary because we want to avoid timing the overhead of
+   * doing timing. When using multiple timers, it is recommended to stop all of
+   * them before calling `SyncAndGetTime` on any of them.
+   *
+   * Note: this function should be only called once per object. It may incur
+   * a large synchronization overhead (for example, with GPUs).
+   */
+  int64_t SyncAndGetTime() { return operator->()->SyncAndGetTime(); }
+  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Timer, ObjectRef, TimerNode);
+};
+
 /*!
  * \brief Default timer if one does not exist to the platform.

Review comment:
       platform or context?

##########
File path: include/tvm/runtime/profiling.h
##########
@@ -37,55 +37,110 @@
 namespace tvm {
 namespace runtime {
 
+/*! \brief Base class for all implementations.
+ *
+ * New implementations of this interface should make sure that `Start` and 
`Stop`
+ * are as lightweight as possible. Expensive state synchronization should be
+ * done in `SyncAndGetTime`.
+ */
+class TimerNode : public Object {
+ public:
+  /*! \brief Start the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  virtual void Start() = 0;
+  /*! \brief Stop the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  virtual void Stop() = 0;
+  /*! \brief Synchronize timer state and return elapsed time between `Start` 
and `Stop`.
+   * \return The time in nanoseconds between `Start` and `Stop`.
+   *
+   * This function is necessary because we want to avoid timing the overhead of
+   * doing timing. When using multiple timers, it is recommended to stop all of
+   * them before calling `SyncAndGetTime` on any of them.
+   *
+   * Note: this function should be only called once per object. It may incur
+   * a large synchronization overhead (for example, with GPUs).
+   */
+  virtual int64_t SyncAndGetTime() = 0;
+
+  virtual ~TimerNode() {}
+
+  static constexpr const char* _type_key = "TimerNode";
+  TVM_DECLARE_BASE_OBJECT_INFO(TimerNode, Object);
+};
+
+/*! \brief Timer for a specific device.
+ *
+ * You should not construct this class directly. Instead use `StartTimer`.
+ */
+class Timer : public ObjectRef {
+ public:
+  /*! \brief Start the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  void Start() { operator->()->Start(); }
+  /*! \brief Stop the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  void Stop() { operator->()->Stop(); }
+  /*! \brief Synchronize timer state and return elapsed time between `Start` 
and `Stop`.
+   * \return The time in nanoseconds between `Start` and `Stop`.
+   *
+   * This function is necessary because we want to avoid timing the overhead of
+   * doing timing. When using multiple timers, it is recommended to stop all of
+   * them before calling `SyncAndGetTime` on any of them.
+   *
+   * Note: this function should be only called once per object. It may incur
+   * a large synchronization overhead (for example, with GPUs).
+   */
+  int64_t SyncAndGetTime() { return operator->()->SyncAndGetTime(); }
+  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Timer, ObjectRef, TimerNode);
+};
+
 /*!
  * \brief Default timer if one does not exist to the platform.
  * \param ctx The context to time on.
  *
  * Note that this timer performs synchronization between the device and CPU,
  * which can lead to overhead in the reported results.
  */
-TypedPackedFunc<TypedPackedFunc<int64_t()>()> DefaultTimer(TVMContext ctx);
+Timer DefaultTimer(TVMContext ctx);
 
 /*!
  * \brief Get a device specific timer.
  * \param ctx The device context to time.
- * \return A function, that when called starts a timer. The results from this
- *         function is another function that will stop the timer and return
- *         another function that returns the elapsed time in nanoseconds. The
- *         third function should be called as late as possible to avoid
- *         synchronization overhead.
- *
- * This three function approach is complicated, but it is necessary to avoid
- * synchronization overhead on GPUs. On GPUs, the first two function generate
- * start and stop events respectively. The third function synchronizes the GPU
- * with the CPU and gets the elapsed time between events.
- *
- * Users can register a timer for a device by registering a packed function
- * with the name "profiler.timer.device_name". This function should take a
- * TVMContext and return a new function. The new function will stop the timer
- * when called and returns a third function. The third function should return
- * the elapsed time between the first and second call in nanoseconds.
- *
- * Note that timers are specific to a context (and by extension device stream).
- * The code being timed should run on the specific context only, otherwise you
- * may get mixed results. Furthermore, the context should not be modified
- * between the start and end of the timer (i.e. do not call 
TVMDeviceSetStream).
+ * \return A `Timer` that has already been started.
  *
  * Example usage:
  * \code{.cpp}
- * auto timer_stop = StartTimer(TVMContext::cpu());
+ * Timer t = StartTimer(TVMContext::cpu());
  * my_long_running_function();
- * auto get_elapsed = timer_stop();
+ * t.Stop();
  * ... // some more computation
- * int64_t nanosecs = get_elapsed() // elapsed time in nanoseconds
+ * int64_t nanosecs = t.SyncAndGetTime() // elapsed time in nanoseconds
  * \endcode
+ *
+ * To add a new device-specific timer, register a new function
+ * "profiler.timer.my_device" (where `my_device` is the `DeviceName` of your
+ * device). This function should accept a `TVMContext` and return a new `Timer`
+ * that has already been started.
  */
-inline TypedPackedFunc<TypedPackedFunc<int64_t()>()> StartTimer(TVMContext 
ctx) {
+inline Timer StartTimer(TVMContext ctx) {

Review comment:
       make static on Timer?

##########
File path: include/tvm/runtime/profiling.h
##########
@@ -37,55 +37,110 @@
 namespace tvm {
 namespace runtime {
 
+/*! \brief Base class for all implementations.
+ *
+ * New implementations of this interface should make sure that `Start` and 
`Stop`
+ * are as lightweight as possible. Expensive state synchronization should be
+ * done in `SyncAndGetTime`.
+ */
+class TimerNode : public Object {
+ public:
+  /*! \brief Start the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  virtual void Start() = 0;
+  /*! \brief Stop the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  virtual void Stop() = 0;
+  /*! \brief Synchronize timer state and return elapsed time between `Start` 
and `Stop`.
+   * \return The time in nanoseconds between `Start` and `Stop`.
+   *
+   * This function is necessary because we want to avoid timing the overhead of
+   * doing timing. When using multiple timers, it is recommended to stop all of
+   * them before calling `SyncAndGetTime` on any of them.
+   *
+   * Note: this function should be only called once per object. It may incur
+   * a large synchronization overhead (for example, with GPUs).
+   */
+  virtual int64_t SyncAndGetTime() = 0;
+
+  virtual ~TimerNode() {}
+
+  static constexpr const char* _type_key = "TimerNode";
+  TVM_DECLARE_BASE_OBJECT_INFO(TimerNode, Object);
+};
+
+/*! \brief Timer for a specific device.
+ *
+ * You should not construct this class directly. Instead use `StartTimer`.
+ */
+class Timer : public ObjectRef {
+ public:
+  /*! \brief Start the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  void Start() { operator->()->Start(); }
+  /*! \brief Stop the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  void Stop() { operator->()->Stop(); }
+  /*! \brief Synchronize timer state and return elapsed time between `Start` 
and `Stop`.
+   * \return The time in nanoseconds between `Start` and `Stop`.
+   *
+   * This function is necessary because we want to avoid timing the overhead of
+   * doing timing. When using multiple timers, it is recommended to stop all of
+   * them before calling `SyncAndGetTime` on any of them.
+   *
+   * Note: this function should be only called once per object. It may incur
+   * a large synchronization overhead (for example, with GPUs).
+   */
+  int64_t SyncAndGetTime() { return operator->()->SyncAndGetTime(); }
+  TVM_DEFINE_MUTABLE_OBJECT_REF_METHODS(Timer, ObjectRef, TimerNode);
+};
+
 /*!
  * \brief Default timer if one does not exist to the platform.
  * \param ctx The context to time on.
  *
  * Note that this timer performs synchronization between the device and CPU,
  * which can lead to overhead in the reported results.
  */
-TypedPackedFunc<TypedPackedFunc<int64_t()>()> DefaultTimer(TVMContext ctx);
+Timer DefaultTimer(TVMContext ctx);

Review comment:
       make static on Timer?

##########
File path: include/tvm/runtime/profiling.h
##########
@@ -37,55 +37,110 @@
 namespace tvm {
 namespace runtime {
 
+/*! \brief Base class for all implementations.
+ *
+ * New implementations of this interface should make sure that `Start` and 
`Stop`
+ * are as lightweight as possible. Expensive state synchronization should be
+ * done in `SyncAndGetTime`.
+ */
+class TimerNode : public Object {
+ public:
+  /*! \brief Start the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  virtual void Start() = 0;
+  /*! \brief Stop the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  virtual void Stop() = 0;
+  /*! \brief Synchronize timer state and return elapsed time between `Start` 
and `Stop`.

Review comment:
       maybe include a comment that this calls TVMSynchronize under the hood (I 
think?)? and update the declaration below too

##########
File path: tests/cpp/profiling.cc
##########
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <gtest/gtest.h>
+#include <tvm/runtime/profiling.h>
+
+#include <chrono>
+#include <thread>
+
+namespace tvm {
+namespace runtime {
+TEST(DefaultTimer, Basic) {
+  using namespace tvm::runtime;
+  DLContext ctx;
+  ctx.device_type = kDLCPU;
+  ctx.device_id = 0;
+
+  Timer t = StartTimer(ctx);
+  std::this_thread::sleep_for(std::chrono::milliseconds(10));
+  t.Stop();
+  int64_t elapsed = t.SyncAndGetTime();
+  CHECK_GT(elapsed, 0);

Review comment:
       theoretically could be >10ms, no? I guess there is the possibility of 
timer adjusting..thoughts?

##########
File path: src/runtime/vm/profiler/vm.h
##########
@@ -51,7 +52,7 @@ class VirtualMachineDebug : public VirtualMachine {
                     const std::vector<ObjectRef>& args) final;
 
   std::unordered_map<Index, std::string> packed_index_map_;
-  std::unordered_map<Index, std::vector<TypedPackedFunc<int64_t()>>> 
op_durations_;
+  std::unordered_map<Index, std::vector<Timer>> op_durations_;

Review comment:
       maybe could call this op_timers_ now?

##########
File path: src/runtime/vm/profiler/vm.cc
##########
@@ -49,8 +48,8 @@ PackedFunc VirtualMachineDebug::GetFunction(const 
std::string& name,
       std::unordered_map<Index, std::vector<double>> op_durations;
       for (auto kv : op_durations_) {
         std::vector<double> durations;

Review comment:
       can we add units here?

##########
File path: include/tvm/runtime/profiling.h
##########
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file include/tvm/runtime/profiling.h
+ * \brief Runtime profiling including timers.
+ */
+#ifndef TVM_RUNTIME_PROFILING_H_
+#define TVM_RUNTIME_PROFILING_H_
+
+#include <dlpack/dlpack.h>
+#include <tvm/runtime/c_runtime_api.h>
+#include <tvm/runtime/device_api.h>
+#include <tvm/runtime/object.h>
+#include <tvm/runtime/registry.h>
+
+#include <chrono>
+#include <map>
+#include <string>
+
+namespace tvm {
+namespace runtime {
+
+/*! \brief Base class for all implementations.
+ *
+ * New implementations of this interface should make sure that `Start` and 
`Stop`
+ * are as lightweight as possible. Expensive state synchronization should be
+ * done in `SyncAndGetTime`.
+ */
+class TimerNode : public Object {
+ public:
+  /*! \brief Start the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  virtual void Start() = 0;
+  /*! \brief Stop the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  virtual void Stop() = 0;
+  /*! \brief Synchronize timer state and return elapsed time between `Start` 
and `Stop`.
+   * \return The time in nanoseconds between `Start` and `Stop`.
+   *
+   * This function is necessary because we want to avoid timing the overhead of
+   * doing timing. When using multiple timers, it is recommended to stop all of
+   * them before calling `SyncAndGetTime` on any of them.
+   *
+   * Note: this function should be only called once per object. It may incur
+   * a large synchronization overhead (for example, with GPUs).
+   */
+  virtual int64_t SyncAndGetTime() = 0;
+
+  virtual ~TimerNode() {}
+
+  static constexpr const char* _type_key = "TimerNode";
+  TVM_DECLARE_BASE_OBJECT_INFO(TimerNode, Object);
+};
+
+/*! \brief Timer for a specific device.
+ *
+ * You should not construct this class directly. Instead use `StartTimer`.
+ */
+class Timer : public ObjectRef {
+ public:
+  /*! \brief Start the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  void Start() { operator->()->Start(); }
+  /*! \brief Stop the timer.
+   *
+   * Note: this function should only be called once per object.
+   */
+  void Stop() { operator->()->Stop(); }
+  /*! \brief Synchronize timer state and return elapsed time between `Start` 
and `Stop`.
+   * \return The time in nanoseconds between `Start` and `Stop`.
+   *
+   * This function is necessary because we want to avoid timing the overhead of
+   * doing timing. When using multiple timers, it is recommended to stop all of
+   * them before calling `SyncAndGetTime` on any of them.
+   *
+   * Note: this function should be only called once per object. It may incur
+   * a large synchronization overhead (for example, with GPUs).
+   */
+  int64_t SyncAndGetTime() { return operator->()->SyncAndGetTime(); }

Review comment:
       maybe would advocate for `SyncAndGetElapsedNanos` or something with units

##########
File path: src/runtime/vm/profiler/vm.cc
##########
@@ -45,7 +45,15 @@ PackedFunc VirtualMachineDebug::GetFunction(const 
std::string& name,
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
       ICHECK_EQ(args.size(), 1U);
       std::vector<std::pair<Index, double>> op_acc_time;
+      std::unordered_map<Index, std::vector<double>> op_durations;
       for (auto kv : op_durations_) {
+        std::vector<double> durations;
+        for (auto t : kv.second) {
+          durations.push_back(t.SyncAndGetTime() / 1e3);

Review comment:
       I think you want to cast to double before dividing




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to