parallelgithub closed pull request #9752: Overload ndarrayiter URL: https://github.com/apache/incubator-mxnet/pull/9752
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/scala-package/core/src/main/scala/ml/dmlc/mxnet/io/NDArrayIter.scala b/scala-package/core/src/main/scala/ml/dmlc/mxnet/io/NDArrayIter.scala index e7dd51b190..d39b587102 100644 --- a/scala-package/core/src/main/scala/ml/dmlc/mxnet/io/NDArrayIter.scala +++ b/scala-package/core/src/main/scala/ml/dmlc/mxnet/io/NDArrayIter.scala @@ -38,15 +38,14 @@ import scala.collection.immutable.ListMap * the size of data does not match batch_size. Roll over is intended * for training and can cause problems if used for prediction. */ -class NDArrayIter (data: IndexedSeq[NDArray], label: IndexedSeq[NDArray] = IndexedSeq.empty, - private val dataBatchSize: Int = 1, shuffle: Boolean = false, - lastBatchHandle: String = "pad", - dataName: String = "data", labelName: String = "label") extends DataIter { +class NDArrayIter (data: IndexedSeq[(String, NDArray)], label: IndexedSeq[(String, NDArray)], + private val dataBatchSize: Int, shuffle: Boolean, + lastBatchHandle: String) extends DataIter { private val logger = LoggerFactory.getLogger(classOf[NDArrayIter]) - private val (_dataList: IndexedSeq[NDArray], - _labelList: IndexedSeq[NDArray]) = { + private val (_dataList: IndexedSeq[(String, NDArray)], + _labelList: IndexedSeq[(String, NDArray)]) = { // data should not be null and size > 0 require(data != null && data.size > 0, "data should not be null and data.size should not be zero") @@ -59,13 +58,13 @@ class NDArrayIter (data: IndexedSeq[NDArray], label: IndexedSeq[NDArray] = Index // discard final part if lastBatchHandle equals discard if (lastBatchHandle.equals("discard")) { - val dataSize = data(0).shape(0) + val dataSize = data(0)._2.shape(0) require(dataBatchSize <= dataSize, "batch_size need to be smaller than data size when not padding.") val keepSize = dataSize - dataSize % dataBatchSize - val dataList = data.map(ndArray => {ndArray.slice(0, keepSize)}) + val dataList = data.map { case(name, ndArray) => (name, {ndArray.slice(0, keepSize)}) } if (!label.isEmpty) { - val labelList = label.map(ndArray => {ndArray.slice(0, keepSize)}) + val labelList = label.map { case(name, ndArray) => (name, {ndArray.slice(0, keepSize)}) } (dataList, labelList) } else { (dataList, label) @@ -75,10 +74,25 @@ class NDArrayIter (data: IndexedSeq[NDArray], label: IndexedSeq[NDArray] = Index } } + def this( + data: IndexedSeq[NDArray], + label: IndexedSeq[NDArray] = IndexedSeq.empty, + dataBatchSize: Int = 1, + shuffle: Boolean = false, + lastBatchHandle: String = "pad", + dataName: String = "data", + labelName: String = "label") = { + this( + IO.initData(data, false, dataName), + IO.initData(label, true, labelName), + dataBatchSize, + shuffle, + lastBatchHandle) + } - val initData: IndexedSeq[(String, NDArray)] = IO.initData(_dataList, false, dataName) - val initLabel: IndexedSeq[(String, NDArray)] = IO.initData(_labelList, true, labelName) - val numData = _dataList(0).shape(0) + val initData: IndexedSeq[(String, NDArray)] = _dataList + val initLabel: IndexedSeq[(String, NDArray)] = _labelList + val numData = _dataList(0)._2.shape(0) val numSource = initData.size var cursor = -dataBatchSize @@ -173,7 +187,7 @@ class NDArrayIter (data: IndexedSeq[NDArray], label: IndexedSeq[NDArray] = Index * @return the data of current batch */ override def getData(): IndexedSeq[NDArray] = { - _getData(_dataList) + _getData(_dataList.map(_._2)) } /** @@ -181,7 +195,7 @@ class NDArrayIter (data: IndexedSeq[NDArray], label: IndexedSeq[NDArray] = Index * @return the label of current batch */ override def getLabel(): IndexedSeq[NDArray] = { - _getData(_labelList) + _getData(_labelList.map(_._2)) } /** ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services