[GitHub] ThomasDelteil commented on issue #10042: [MXNET-86] Gluon dataloader crash on speech recognition training

GitBox Mon, 19 Mar 2018 14:14:56 -0700

ThomasDelteil commented on issue #10042: [MXNET-86] Gluon dataloader crash on 
speech recognition training
URL: 
https://github.com/apache/incubator-mxnet/issues/10042#issuecomment-374378588
 
 
   Thanks @Jerryzcn 
   here is a reproducible example:
   Play with the `NUM_WORKERS` and `optimized` parameters to surface the 
problems i mentioned above
   
   ```python
   import mxnet as mx
   print(mx.__version__)
   from mxnet import nd, autograd, gluon
   import os
   import pandas as pd
   from mxnet.gluon.data import ArrayDataset
   from mxnet.gluon.data import DataLoader
   import numpy as np
   import multiprocessing
   import wget
   
   if not os.path.isfile('pickleddata.pkl'):
       
wget.download('https://s3.us-east-2.amazonaws.com/tdelteil-test-mxnet/pickleddata.pkl')
   data = pd.read_pickle('pickleddata.pkl')
   
   
   # /!\ The important bit:
   NUM_WORKERS = multiprocessing.cpu_count() # number of workers used in the 
data loading
   optimized = True
   
   
   categories = [
       'Home_and_Kitchen',
       'Books', 
       'CDs_and_Vinyl', 
       'Movies_and_TV', 
       'Cell_Phones_and_Accessories',
       'Sports_and_Outdoors', 
       'Clothing_Shoes_and_Jewelry'
   ]
   
   ALPHABET = 
list("abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+ =<>()[]{}") 
# The 69 characters as specified in the paper
   ALPHABET_INDEX = {letter: index for index, letter in enumerate(ALPHABET)} # 
{ a: 0, b: 1, etc}
   FEATURE_LEN = 1014 # max-length in characters for one document
   BATCH_SIZE = 128 # number of documents per batch
   
   def encode(text):
       encoded = np.zeros([len(ALPHABET), FEATURE_LEN], dtype='float32')
       review = text.lower()[:FEATURE_LEN-1:-1]
       i = 0
       for letter in text:
           if i >= FEATURE_LEN:
               break;
           if letter in ALPHABET_INDEX:
               encoded[ALPHABET_INDEX[letter]][i] = 1
           i += 1
       return encoded
   
   class AmazonDataSet(ArrayDataset):
       # We pre-process the documents on the fly
       def __getitem__(self, idx):
           return encode(self._data[0][idx]), self._data[1][idx]
   
   
   # Data loaders:
   split = 0.8
   split_index = int(split*len(data))
   train_data_X = data['X'][:split_index].as_matrix()
   train_data_Y = data['Y'][:split_index].as_matrix()
   test_data_X = data['X'][split_index:].as_matrix()
   test_data_Y = data['Y'][split_index:].as_matrix()
   train_dataset = AmazonDataSet(train_data_X, train_data_Y)
   test_dataset = AmazonDataSet(test_data_X, test_data_Y)
   
   train_dataloader = DataLoader(train_dataset, shuffle=True, 
batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, last_batch='discard')
   test_dataloader = DataLoader(test_dataset, shuffle=True, 
batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, last_batch='discard')
   
   # context:
   ctx = mx.gpu() # to run on GPU
   
   # build network
   NUM_FILTERS = 256 # number of convolutional filters per convolutional layer
   NUM_OUTPUTS = len(categories) # number of classes
   FULLY_CONNECTED = 1024 # number of unit in the fully connected dense layer
   DROPOUT_RATE = 0.5 # probability of node drop out
   LEARNING_RATE = 0.01 # learning rate of the gradient
   MOMENTUM = 0.9 # momentum of the gradient
   WDECAY = 0.00001 # regularization term to limit size of weights
   
   net = gluon.nn.HybridSequential()
   with net.name_scope():
       net.add(gluon.nn.Conv1D(channels=NUM_FILTERS, kernel_size=7, 
activation='relu'))
       net.add(gluon.nn.MaxPool1D(pool_size=3, strides=3))
       net.add(gluon.nn.Conv1D(channels=NUM_FILTERS, kernel_size=7, 
activation='relu'))
       net.add(gluon.nn.MaxPool1D(pool_size=3, strides=3))
       net.add(gluon.nn.Conv1D(channels=NUM_FILTERS, kernel_size=3, 
activation='relu'))
       net.add(gluon.nn.Conv1D(channels=NUM_FILTERS, kernel_size=3, 
activation='relu'))
       net.add(gluon.nn.Conv1D(channels=NUM_FILTERS, kernel_size=3, 
activation='relu'))
       net.add(gluon.nn.Conv1D(channels=NUM_FILTERS, kernel_size=3, 
activation='relu'))
       net.add(gluon.nn.MaxPool1D(pool_size=3, strides=3))
       net.add(gluon.nn.Flatten())
       net.add(gluon.nn.Dense(FULLY_CONNECTED, activation='relu'))
       net.add(gluon.nn.Dropout(DROPOUT_RATE))
       net.add(gluon.nn.Dense(FULLY_CONNECTED, activation='relu'))
       net.add(gluon.nn.Dropout(DROPOUT_RATE))
       net.add(gluon.nn.Dense(NUM_OUTPUTS))
   
   
   
   net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
   
   # loss
   softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
   
   # optimizer
   trainer = gluon.Trainer(net.collect_params(), 'sgd', 
                           {'learning_rate': LEARNING_RATE, 
                            'wd':WDECAY, 
                            'momentum':MOMENTUM})
   
   # Training Loop
   
   import time
   start_epoch = 6
   number_epochs = 7
   smoothing_constant = .01
   tick = time.time()
   net.hybridize()
   for e in range(start_epoch, number_epochs):
       for i, (review, label) in enumerate(train_dataloader):
           review = review.as_in_context(ctx)
           label = label.as_in_context(ctx)
           with autograd.record():
               output = net(review)
               loss = softmax_cross_entropy(output, label)
           loss.backward()
           trainer.step(review.shape[0])
           
           # moving average of the loss
           if optimized:
               curr_loss = nd.mean(loss)
           else:
               curr_loss = nd.mean(loss).asscalar()
           moving_loss = (curr_loss if (i == 0) 
                          else (1 - smoothing_constant) * moving_loss + 
(smoothing_constant) * curr_loss)
   
           if (i%100 == 0):
               tock = time.time()
               if optimized:
                   print('Batch {}:{},{},{} seconds for 100 batches'.format(i, 
curr_loss.asscalar(),moving_loss.asscalar(), tock-tick))
               else:
                   print('Batch {}:{},{},{} seconds for 100 batches'.format(i, 
curr_loss, moving_loss, tock-tick))
               tick = tock
   
       print("Epoch %s. Loss: %s, Test_acc %s" % (e, moving_loss.asscalar(), 
test_accuracy))
   
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] ThomasDelteil commented on issue #10042: [MXNET-86] Gluon dataloader crash on speech recognition training

Reply via email to