[GitHub] eric-haibin-lin closed pull request #10983: [MXNET-427] Fix trainer.load_state by removing param_dict from optimizer state pickle

GitBox Thu, 17 May 2018 14:08:49 -0700

eric-haibin-lin closed pull request #10983: [MXNET-427] Fix trainer.load_state 
by removing param_dict from optimizer state pickle
URL: https://github.com/apache/incubator-mxnet/pull/10983


This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/python/mxnet/gluon/trainer.py b/python/mxnet/gluon/trainer.py
index da67fc0b1d9..e25aa7e68b9 100644
--- a/python/mxnet/gluon/trainer.py
+++ b/python/mxnet/gluon/trainer.py
@@ -303,6 +303,8 @@ def load_states(self, fname):
         if self._update_on_kvstore:
             self._kvstore.load_optimizer_states(fname)
             self._optimizer = self._kvstore._updater.optimizer
+            param_dict = {i: param for i, param in enumerate(self._params)}
+            self._optimizer.param_dict = param_dict
         else:
             with open(fname, 'rb') as f:
                 states = f.read()
diff --git a/python/mxnet/optimizer.py b/python/mxnet/optimizer.py
index 1d2fd2e73df..0c3fc904fb1 100644
--- a/python/mxnet/optimizer.py
+++ b/python/mxnet/optimizer.py
@@ -426,6 +426,17 @@ def _get_wd(self, index):
             wd *= self.wd_mult.get(self.idx2name[index], 1.0)
         return wd
 
+    def __getstate__(self):
+        ret = self.__dict__.copy()
+        # do not include param_dict in the state
+        del ret['param_dict']
+        return ret
+
+    def __setstate__(self, state):
+        self.__dict__ = state
+        # param_dict needs to be explicitly set by the trainer
+        self.param_dict = {}
+
 # convenience wrapper for Optimizer.Register
 register = Optimizer.register   # pylint: disable=invalid-name
 
diff --git a/tests/python/unittest/test_gluon.py 
b/tests/python/unittest/test_gluon.py
index fb73e53bc05..a2688282650 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -509,10 +509,10 @@ def dict_equ(a, b):
 
     assert (x.data(mx.cpu(1)).asnumpy() == -4).all()
 
-    trainer.save_states('test.states')
+    trainer.save_states('test_trainer.states')
     states = deepcopy(trainer._kvstore._updater.states) if 
trainer._update_on_kvstore \
              else deepcopy(trainer._updaters[0].states)
-    trainer.load_states('test.states')
+    trainer.load_states('test_trainer.states')
     if trainer._update_on_kvstore:
         dict_equ(trainer._kvstore._updater.states, states)
         assert trainer._optimizer == trainer._kvstore._updater.optimizer
@@ -538,6 +538,22 @@ def dict_equ(a, b):
 
     assert (x.data(mx.cpu(1)).asnumpy() == -1).all(), 
x.data(mx.cpu(1)).asnumpy()
 
+@with_seed()
+def test_trainer_save_load():
+    x = gluon.Parameter('x', shape=(10,), lr_mult=1.0)
+    x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
+    trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 0.1})
+    with mx.autograd.record():
+        for w in x.list_data():
+            y = w + 1
+            y.backward()
+    trainer.step(1)
+    assert trainer._kvstore._updater.optimizer._get_lr(0) == 0.1
+    trainer.save_states('test_trainer_save_load.states')
+    trainer.load_states('test_trainer_save_load.states')
+    x.lr_mult = 2.0
+    # check if parameter dict is correctly associated with optimizer after 
load_state
+    assert trainer._kvstore._updater.optimizer._get_lr(0) == 0.2
 
 @with_seed()
 def test_block_attr_hidden():


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

[GitHub] eric-haibin-lin closed pull request #10983: [MXNET-427] Fix trainer.load_state by removing param_dict from optimizer state pickle

Reply via email to