Neutron3529 opened a new pull request #19738:
URL: https://github.com/apache/incubator-mxnet/pull/19738
there may be a ~3% performance gain with a new dataloader
---
codes that compare the old `dataloader` and the new one:
old:
```
import mxnet as mx
from mxnet.gluon.data import DataLoader,ArrayDataset
from time import sleep,perf_counter_ns
train_data=ArrayDataset(mx.nd.array([[i] for i in
range(50000)]),mx.nd.array([[99-i] for i in range(50000)]))
test_data=ArrayDataset(mx.nd.array([[i] for i in
range(10000)]),mx.nd.array([[99-i] for i in range(10000)]))
def transform_train(sample):
sleep(0.0016)
return sample
def transform_test(sample):
sleep(0.0008)
return sample
train_iter=DataLoader(train_data.transform_first(transform_train),batch_size=500,num_workers=10)
test_iter =DataLoader(test_data .transform_first(transform_test
),batch_size=500,num_workers=10)
if True:
tic=perf_counter_ns()
for epoch in range(10):
print("epoch"+str(epoch)+" start at
"+str(round((perf_counter_ns()-tic)*1e-9,2))+"s")
for i in train_iter:
sleep(0.1)
print(" finished train phase at
"+str(round((perf_counter_ns()-tic)*1e-9,2))+"s")
for i in test_iter:
sleep(0.05)
print(" finished test phase at
"+str(round((perf_counter_ns()-tic)*1e-9,2))+"s")
print("cost="+str((perf_counter_ns()-tic)*1e-9)+"s")
```
new:
```
import mxnet as mx
from mxnet.gluon.data import DataLoader,ArrayDataset
from mxnet.gluon.data import sampler as _sampler
from time import sleep,perf_counter_ns
train_data=ArrayDataset(mx.nd.array([[i] for i in
range(50000)]),mx.nd.array([[99-i] for i in range(50000)]))
test_data=ArrayDataset(mx.nd.array([[i] for i in
range(10000)]),mx.nd.array([[99-i] for i in range(10000)]))
def transform_train(sample):
sleep(0.0016)
return sample
def transform_test(sample):
sleep(0.0008)
return sample
import pickle
import io
import sys
import signal
import multiprocessing
import multiprocessing.queues
from multiprocessing.reduction import ForkingPickler
from multiprocessing.pool import ThreadPool
import threading
import numpy as np
try:
import multiprocessing.resource_sharer
except ImportError:
pass
class MyDataLoader(DataLoader):
def __init__(self, dataset, batch_size=None, shuffle=False, sampler=None,
last_batch=None, batch_sampler=None, batchify_fn=None,
num_workers=0, pin_memory=False, pin_device_id=0,
prefetch=None, thread_pool=False, timeout=120):
super(MyDataLoader,self).__init__(dataset, batch_size, shuffle,
sampler,
last_batch, batch_sampler, batchify_fn,
num_workers, pin_memory, pin_device_id,
prefetch, thread_pool, timeout)
self._iter=self.__iter__()
def iter_mod(self):
t=self._iter
self._iter=self.__iter__()
return t
train_iter=MyDataLoader(train_data.transform_first(transform_train),batch_size=500,num_workers=10)
test_iter =MyDataLoader(test_data .transform_first(transform_test
),batch_size=500,num_workers=10)
if True:
tic=perf_counter_ns()
for epoch in range(10):
print("epoch"+str(epoch)+" start at
"+str(round((perf_counter_ns()-tic)*1e-9,2))+"s")
for i in train_iter.iter_mod():
sleep(0.1)
print(" finished train phase at
"+str(round((perf_counter_ns()-tic)*1e-9,2))+"s")
for i in test_iter.iter_mod():
sleep(0.05)
print(" finished test phase at
"+str(round((perf_counter_ns()-tic)*1e-9,2))+"s")
print("cost="+str((perf_counter_ns()-tic)*1e-9)+"s")
```
log(old):
```
epoch0 start at 0.0s
finished train phase at 11.32s
finished test phase at 12.94s
epoch1 start at 12.94s
finished train phase at 24.31s
finished test phase at 25.91s
epoch2 start at 25.91s
finished train phase at 37.34s
finished test phase at 38.95s
epoch3 start at 38.95s
finished train phase at 50.28s
finished test phase at 51.89s
epoch4 start at 51.89s
finished train phase at 63.2s
finished test phase at 64.81s
epoch5 start at 64.81s
finished train phase at 76.14s
finished test phase at 77.75s
epoch6 start at 77.75s
finished train phase at 89.13s
finished test phase at 90.74s
epoch7 start at 90.74s
finished train phase at 102.11s
finished test phase at 103.72s
epoch8 start at 103.72s
finished train phase at 115.08s
finished test phase at 116.7s
epoch9 start at 116.7s
finished train phase at 128.06s
finished test phase at 129.67s
cost=129.67192333600002s
```
log(new):
```
epoch0 start at 0.0s
finished train phase at 13.13s
finished test phase at 14.19s
epoch1 start at 14.19s
finished train phase at 25.54s
finished test phase at 26.6s
epoch2 start at 26.6s
finished train phase at 37.94s
finished test phase at 39.01s
epoch3 start at 39.01s
finished train phase at 50.31s
finished test phase at 51.37s
epoch4 start at 51.37s
finished train phase at 62.64s
finished test phase at 63.7s
epoch5 start at 63.7s
finished train phase at 75.01s
finished test phase at 76.07s
epoch6 start at 76.07s
finished train phase at 87.39s
finished test phase at 88.45s
epoch7 start at 88.45s
finished train phase at 99.72s
finished test phase at 100.78s
epoch8 start at 100.78s
finished train phase at 112.1s
finished test phase at 113.16s
epoch9 start at 113.16s
finished train phase at 124.43s
finished test phase at 125.5s
cost=125.498369854s
```
(using mxnet with version `1.7.0`, but things may not change with `v1.x` )
## Description ##
(Brief description on what this PR is about)
## Checklist ##
### Essentials ###
- [ ] PR's title starts with a category (e.g. [BUGFIX], [MODEL], [TUTORIAL],
[FEATURE], [DOC], etc)
- [ ] Changes are complete (i.e. I finished coding on this PR)
- [ ] All changes have test coverage
- [ ] Code is well-documented
### Changes ###
- [ ] Feature1, tests, (and when applicable, API doc)
- [ ] Feature2, tests, (and when applicable, API doc)
## Comments ##
- If this change is a backward incompatible change, why must this change be
made.
- Interesting edge cases to note here
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]