piiswrong commented on a change in pull request #8972: Profiling enhancements, 
python API, vtune and chrome tracing objects, etc.
URL: https://github.com/apache/incubator-mxnet/pull/8972#discussion_r155400486
 
 

 ##########
 File path: src/engine/profiler.h
 ##########
 @@ -94,81 +254,816 @@ struct DevStat {
 class Profiler {
  public:
   enum ProfilerMode {
-      kOnlySymbolic = 0,
-      kAllOperator  = 1
+      kSymbolic = 1,
+      kImperative = 2,
+      kAPI = 4,
+      kMemory = 8
   };
   enum ProfilerState {
       kNotRunning = 0,
       kRunning = 1
   };
+
   /*! \brief set state of profiler */
   void SetState(ProfilerState state);
   /*! \return state of profiler */
   inline ProfilerState GetState() const {
     return this->state_;
   }
   /*! \brief set configure of profiler */
-  void SetConfig(ProfilerMode mode, std::string output_filename);
+  void SetConfig(int mode, std::string output_filename);
   /*! \return mode of profiler */
-  inline ProfilerMode GetMode() const {
+  inline int GetMode() const {
     return this->mode_;
   }
   /*! \return whether the profiler is enabled to output */
   inline bool IsEnableOutput() const {
     return this->enable_output_;
   }
-  /*! \brief dump the profile file */
-  void DumpProfile();
+  /*!
+   * \brief dump the profile file
+   * \param peform_cleanup Close off the json trace structures (ie last pass)
+   */
+  void DumpProfile(bool peform_cleanup = true);
+
+  /*!
+   * \brief Set whether calls to DumpProfile() should append or truncate the 
output file
+   * \param append true if profile information should be appended to the 
existing file
+   */
+  void SetDumpProfileAppendMode(bool append);
+
+  /*!
+   * \brief Set continuous asynchronous profile dump
+   * \param continuous_dump Whether to continuously dump profile information
+   * \param delay_in_seconds Delay between asynchronous dumps
+   */
+  void SetContinuousProfileDump(bool continuous_dump, float delay_in_seconds = 
1.0f);
+
   /*! \return the profiler init time, time unit is microsecond (10^-6) s */
   inline uint64_t GetInitTime() const {
     return init_time_;
   }
-  /*! \brief add one operation execution record in
-   *   corresponding device statistics */
-  OprExecStat* AddOprStat(int dev_type, uint32_t dev_id);
+  /*!
+   * \brief add one operation execution record in corresponding device 
statistics
+   * \tparam SetExtraInfoFunction
+   * \param dev_type
+   * \param dev_id
+   * \param set_extra_info_function
+   * \note Because when this function exits, the object is written to the 
profile queue,
+   *       and at that point, could be consumed and/or destroyed at any moment,
+   *       any preprocessing on the object is to be done in the 
set_extra_info_function
+   *       callback.  Another option is to use the 
CreateProfileStat()/AddProfileStat() pair,
+   *       adding it only after
+   */
+  template<typename StatType, typename SetExtraInfoFunction, typename ...Args>
+  void AddNewProfileStat(SetExtraInfoFunction set_extra_info_function, Args... 
args) {
+    if (!paused_) {
+      std::unique_ptr<StatType> stat = CreateProfileStat<StatType>(args...);
+      set_extra_info_function(stat.get());
+      AddProfileStat(&stat);
+    }
+  }
+
   /*! \return Profiler singleton */
   static Profiler* Get();
 
+  /*!DurationStat
+   * \brief Check the last append mode sent to DumpProfile (false by default)
+   * \return true if profile is in append mode, false if DumpFile() will 
truncate
+   */
+  bool append_mode() const { return append_profile_; }
+
+  /*!
+   * \brief Set whether statistic collection is to be paused
+   * \param paused true if statistic collection is to be paused, otherwise
+   * resume statistic collection
+   * \note Pause/Resume is not recursive
+   */
+  void set_paused(bool paused) { paused_ = paused; }
+
+  /*!
+   * \brief Get the calculated device count (numb er of devices to track in 
profile data).
+   * \return Device count
+   * \note Number of CPU's + Number of GPU's + One for CPU-Pinned
+   */
+  MSHADOW_CINLINE size_t DeviceCount() const { return cpu_num_ + gpu_num_ + 2; 
}
+
+  /*!
+   * \brief Compute device index given device type and id
+   * \param dev_type Device type
+   * \param dev_id Device ID
+   * \return Device index for indexing into device-specific data
+   */
+  size_t DeviceIndex(mxnet::Context::DeviceType dev_type, int32_t dev_id);
+
+  /*!
+   * \brief Device name
+   * \param dev_type Device type
+   * \param dev_id Device ID
+   * \return Character pointer to device name
+   */
+  const char *DeviceName(mxnet::Context::DeviceType dev_type, int32_t dev_id);
+
+
+  /*!
+   * \brief Device name
+   * \param dev_type Device type
+   * \param dev_id Device ID
+   * \return Character pointer to device name
+   */
+  const char *DeviceName(const size_t index);
+
  protected:
   /*! \brief make constructor protected. */
   Profiler();
+  /*! \brief Destructor */
+  ~Profiler();
 
  private:
+  /*!
+   * \brief Create a new profile statistic object
+   * \tparam StatType The type of the profile statistic object
+   * \tparam Args Argument types to pass to the new object's constructor
+   * \param args Arguments to pass to the new object's constructor
+   * \return A unique_ptr to the new statistic object
+   */
+  template<typename StatType, typename ...Args>
+  static std::unique_ptr<typename std::enable_if<std::is_base_of<ProfileStat, 
StatType>::value,
+    StatType>::type> CreateProfileStat(Args... args) {
+    return std::unique_ptr<StatType>(new StatType(args...));
+  }
+
+  /*!
+   * \brief Add a general profile statistic object
+   * \tparam StatType Type of the statistic object
+   * \param stat The statistic object
+   */
+  template<typename StatType>
+  inline void AddProfileStat(std::unique_ptr<StatType> *stat) {
+    general_stats_.opr_exec_stats_->enqueue(stat->release());
+  }
+
   /*! \brief generate device information following chrome profile file format 
*/
-  void EmitPid(std::ostream *os, const std::string& name, uint32_t pid);
-  /*! \brief generate event information following chrome profile file format */
-  void EmitEvent(std::ostream *os, const std::string& name,
-          const std::string& category, const std::string& ph,
-          uint64_t ts, uint32_t pid, uint32_t tid);
+  void EmitPid(std::ostream *os, const std::string& name, size_t pid);
+
   /*! \brief Profiler instance */
   static Profiler* instance_;
   /*! \brief internal mutex of the profiler */
-  std::mutex m_;
+  std::recursive_mutex m_;
   /*! \brief indicate whether the profiler is running */
   ProfilerState state_;
   /*! \brief once running, enable profiler to output */
   bool enable_output_;
   /*! \brief indicate what operator the profiler will record */
-  ProfilerMode mode_;
+  int mode_;
   /*! \brief filename to output profile file */
   std::string filename_;
   /*! \brief profile statistics consist of multiple device statistics */
-  DevStat* profile_stat;
+  DeviceStats* profile_stat;
+  /*! \brief Stats not associated directly with a device */
+  DeviceStats  general_stats_;
+  /*! \brief Map category -> pid */
+  std::unordered_map<std::string, size_t> category_to_pid_;
   /*! \brief cpu number on the machine */
   unsigned int cpu_num_;
   /*! \brief gpu number on the machine */
   unsigned int gpu_num_;
   /*! \brief the profiler init time */
   uint64_t init_time_;
+  /*! \brief Last profile append mode */
+  volatile bool append_profile_ = false;
+  /*! \brief Number of non-meta profiling record emitted */
+  volatile uint64_t num_records_emitted_ = 0;
+  /*! \brief Number of times profile was dumped */
+  volatile uint64_t profile_dump_count_;
+  /*! \brief Whether profiling is paused */
+  volatile bool paused_ = false;
+  /*! \brief Asynchronous operation thread lifecycly control object */
+  std::shared_ptr<dmlc::ThreadGroup> thread_group_ = 
std::make_shared<dmlc::ThreadGroup>();
+  /* !\brief pids */
+  std::unordered_set<uint32_t> process_ids_;
 };
 
-/*! \return current clock time, time unit is microsecond (10^-6 s) */
-inline uint64_t NowInUsec();
-/*! \brief set operation execution start timestamp */
-void SetOprStart(OprExecStat* opr_stat);
-/*! \brief set operation execution end timestamp */
-void SetOprEnd(OprExecStat* opr_stat);
+#ifdef MXNET_USE_VTUNE
+#define VTUNE_ONLY_CODE(...) __VA_ARGS__  /* This is undefined at the bottom 
of this file */
+#else
+#define VTUNE_ONLY_CODE(...) /* */        /* This is undefined at the bottom 
of this file */
+#endif
+
+/**
+ *  _____              __  _  _  _                ____  _     _            _
+ * |  __ \            / _|(_)| |(_)              / __ \| |   (_)          | |
+ * | |__) |_ __  ___ | |_  _ | | _ _ __   __ _  | |  | | |__  _  ___   ___| |_ 
 ___
 
 Review comment:
   did you copy paste this from somewhere? License issues?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to