leezu commented on a change in pull request #17841: Gluon data 2.0: c++ 
dataloader and built-in image/bbox transforms
URL: https://github.com/apache/incubator-mxnet/pull/17841#discussion_r409252348
 
 

 ##########
 File path: include/mxnet/io.h
 ##########
 @@ -113,5 +120,104 @@ struct DataIteratorReg
  */
 #define MXNET_REGISTER_IO_ITER(name)                                    \
   DMLC_REGISTRY_REGISTER(::mxnet::DataIteratorReg, DataIteratorReg, name)
+
+/*!
+ * \brief A random accessable dataset which provides GetLen() and GetItem().
+ * Unlike DataIter, it's a static lookup storage which is friendly to random 
access.
+ * The dataset itself should NOT contain data processing, which should be 
applied during
+ * data augmentation or transformation processes.
+ */
+class Dataset {
+ public:
+  /*!
+  *  \brief Initialize the Operator by setting the parameters
+  *  This function need to be called before all other functions.
+  *  \param kwargs the keyword arguments parameters
+  */
+  virtual void Init(const std::vector<std::pair<std::string, std::string> >& 
kwargs) = 0;
+  /*!
+  *  \brief Get the size of the dataset
+  */
+  virtual uint64_t GetLen(void) const = 0;
+  /*!
+  *  \brief Create a copy of dataset for threaded worker
+  */
+  virtual Dataset* Clone(void) const = 0;
+  /*!
+  *  \brief Get the ndarray items given index in dataset
+  *  \param idx the integer index for required data
+  *  \param ret the returned ndarray items
+  */
+  virtual bool GetItem(uint64_t idx, std::vector<NDArray>* ret) = 0;
+  // virtual destructor
+  virtual ~Dataset(void) {}
+};  // class Dataset
+
+/*! \brief typedef the factory function of dataset */
+typedef std::function<Dataset *()> DatasetFactory;
 
 Review comment:
   Need to change DatasetFactory type to accept kwargs.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to