eric-haibin-lin closed pull request #10983: [MXNET-427] Fix trainer.load_state by removing param_dict from optimizer state pickle URL: https://github.com/apache/incubator-mxnet/pull/10983
This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/python/mxnet/gluon/trainer.py b/python/mxnet/gluon/trainer.py index da67fc0b1d9..e25aa7e68b9 100644 --- a/python/mxnet/gluon/trainer.py +++ b/python/mxnet/gluon/trainer.py @@ -303,6 +303,8 @@ def load_states(self, fname): if self._update_on_kvstore: self._kvstore.load_optimizer_states(fname) self._optimizer = self._kvstore._updater.optimizer + param_dict = {i: param for i, param in enumerate(self._params)} + self._optimizer.param_dict = param_dict else: with open(fname, 'rb') as f: states = f.read() diff --git a/python/mxnet/optimizer.py b/python/mxnet/optimizer.py index 1d2fd2e73df..0c3fc904fb1 100644 --- a/python/mxnet/optimizer.py +++ b/python/mxnet/optimizer.py @@ -426,6 +426,17 @@ def _get_wd(self, index): wd *= self.wd_mult.get(self.idx2name[index], 1.0) return wd + def __getstate__(self): + ret = self.__dict__.copy() + # do not include param_dict in the state + del ret['param_dict'] + return ret + + def __setstate__(self, state): + self.__dict__ = state + # param_dict needs to be explicitly set by the trainer + self.param_dict = {} + # convenience wrapper for Optimizer.Register register = Optimizer.register # pylint: disable=invalid-name diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index fb73e53bc05..a2688282650 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -509,10 +509,10 @@ def dict_equ(a, b): assert (x.data(mx.cpu(1)).asnumpy() == -4).all() - trainer.save_states('test.states') + trainer.save_states('test_trainer.states') states = deepcopy(trainer._kvstore._updater.states) if trainer._update_on_kvstore \ else deepcopy(trainer._updaters[0].states) - trainer.load_states('test.states') + trainer.load_states('test_trainer.states') if trainer._update_on_kvstore: dict_equ(trainer._kvstore._updater.states, states) assert trainer._optimizer == trainer._kvstore._updater.optimizer @@ -538,6 +538,22 @@ def dict_equ(a, b): assert (x.data(mx.cpu(1)).asnumpy() == -1).all(), x.data(mx.cpu(1)).asnumpy() +@with_seed() +def test_trainer_save_load(): + x = gluon.Parameter('x', shape=(10,), lr_mult=1.0) + x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros') + trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 0.1}) + with mx.autograd.record(): + for w in x.list_data(): + y = w + 1 + y.backward() + trainer.step(1) + assert trainer._kvstore._updater.optimizer._get_lr(0) == 0.1 + trainer.save_states('test_trainer_save_load.states') + trainer.load_states('test_trainer_save_load.states') + x.lr_mult = 2.0 + # check if parameter dict is correctly associated with optimizer after load_state + assert trainer._kvstore._updater.optimizer._get_lr(0) == 0.2 @with_seed() def test_block_attr_hidden(): ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services