zheng-da opened a new issue #12269: get memory error when running a model exported from gluon model zoo URL: https://github.com/apache/incubator-mxnet/issues/12269 I tried to export resnet from the Gluon model zoo and run it in the Symbol executor, but got a serious memory error. The memory error is also non-deterministic. Here is the minimal code. I need to run it multiple times to reproduce the error. ```python import numpy as np import mxnet as mx import copy import random import itertools from numpy.testing import assert_allclose, assert_array_equal from mxnet.test_utils import * from mxnet.gluon.model_zoo.vision import get_model def test_make_subgraph(): def create_weights(shapes, names): nd_dict = {} assert len(shapes) == len(names) for i in range(len(shapes)): nd_dict[names[i]] = mx.nd.array(np.ones(shapes[i]), ctx=default_context()) return nd_dict def make_subgraph4(stype): model = get_model('resnet18_v1') model.hybridize() model.initialize() shape = (1, 3, 32, 32) data = mx.nd.random.normal(shape=shape) out = model(data) model.export('resnet18') orig = mx.sym.load('resnet18-symbol.json') arg_shapes, out_shapes, aux_shapes = orig.infer_shape(data=shape) weight_shapes = arg_shapes[1:] weight_names = orig.list_arguments()[1:] weight_dict = create_weights(weight_shapes, weight_names) aux_dict = create_weights(aux_shapes, orig.list_auxiliary_states()) arr = mx.nd.random.uniform(-1, 1, shape=shape, ctx=default_context()).tostype(stype) arg_dict = weight_dict arg_dict['data'] = arr return (orig, arg_dict, aux_dict) orig, inputs, aux_states = make_subgraph4('default') all_inputs = copy.deepcopy(inputs) all_inputs.update(aux_states) args_grad = {key : mx.nd.empty(shape=all_inputs[key].shape) for key in all_inputs.keys()} e1 = orig.bind(ctx=default_context(), args=all_inputs, args_grad=args_grad, aux_states=all_inputs) args_grad = {key : mx.nd.empty(shape=all_inputs[key].shape) for key in all_inputs.keys()} e1.forward() mx.nd.waitall() out_grads = [mx.nd.random.uniform(-1, 1, shape=out.shape, ctx=default_context()) for out in e1.outputs] e1.backward(out_grads) mx.nd.waitall() test_make_subgraph() ```
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
