fedorzh commented on issue #9185: Gluon provided ResNet does not get desirable accuracy on CIFAR10 URL: https://github.com/apache/incubator-mxnet/issues/9185#issuecomment-356107939 Hey sorry I was on vacation and didn't have access to the laptop Here are some of the snippets from how I train ```model_name = 'resnet18_v2' gpu_count = 1 _ctx_list = [mx.gpu(i) for i in range(gpu_count)] net = gluon.model_zoo.vision.get_model(model_name, pretrained=False, classes=len(np.unique(y)), ctx=_ctx_list) net.collect_params().initialize(mx.init.Xavier(magnitude=2, rnd_type='gaussian', factor_type="in"), ctx=_ctx_list) loss = gluon.loss.SoftmaxCrossEntropyLoss() learning_rate = 0.5 momentum = 0.9 wd = 0.0001 def multi_factor_scheduler(begin_epoch, epoch_size, step=[60, 75, 90], factor=0.1): step_ = [epoch_size * (x-begin_epoch) for x in step if x-begin_epoch > 0] return mx.lr_scheduler.MultiFactorScheduler(step=step_, factor=factor) if len(step_) else None mfs = multi_factor_scheduler(0, 12, step=[120, 160], factor=0.1) trainer = gluon.Trainer(net.collect_params(), 'sgd', optimizer_params=dict(learning_rate=learning_rate, momentum=momentum, wd=wd, lr_scheduler=mfs), kvstore='device' if len(_ctx_list) > 0 else 'local') ``` and ```epochs = 100 _batch_size=464 train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(X, y), _batch_size, shuffle=True) accuracy = np.zeros(epochs) for e in range(epochs): tic = time.time() for batch in train_data: cur_contexts = _ctx_list if batch[0].shape[0] < len(_ctx_list): cur_contexts = cur_contexts[:batch[0].shape[0]] data = gluon.utils.split_and_load(batch[0], ctx_list=cur_contexts, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=cur_contexts, batch_axis=0, even_split=False) Ls = [] with autograd.record(): # Start recording the derivatives for x_, y_ in zip(data, label): L = loss(net(x_), y_) # store the loss and do backward after we have done forward # on all GPUs for better speed on multiple GPUs. Ls.append(L) for L in Ls: L.backward() trainer.step(batch[0].shape[0]) scores_test = predict_scores(X_test) predictions_test = np.argmax(scores_test, axis=1) accuracy[e] = np.mean(predictions_test == y_test) print("Epoch {}. Time {:.2f}. Current Test Accuracy {}".format(e, time.time() - tic, accuracy[e]))```
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
