kice opened a new issue #9438: MXNet crash while getting output of the network URL: https://github.com/apache/incubator-mxnet/issues/9438 Windows 10 + Python 3.6 mxnet-cu90 1.0.1b20171229 class Scale(nn.HybridBlock): def __init__(self, channels, **kwargs): super(Scale, self).__init__(**kwargs) self.conv = nn.Conv2D(channels, kernel_size=1, padding=0, strides=1, use_bias=False) def hybrid_forward(self, F, x): return self.conv(x) class FENet(nn.HybridBlock): def __init__(self, c=64, **kwargs): super(FENet, self).__init__(**kwargs) self.net = nn.HybridSequential() with self.name_scope(): self.net.add(nn.BatchNorm()) self.net.add(nn.Conv2D(c, kernel_size=3, padding=1, strides=1, use_bias=False)) def hybrid_forward(self, F, x): out = F.relu(self.net[0](x)) out = self.net[1](out) return out class ResBlock(nn.HybridBlock): def __init__(self, c=64, **kwargs): super(ResBlock, self).__init__(**kwargs) self.conv1 = nn.Conv2D(c, kernel_size=3, padding=1, strides=1, use_bias=False) self.conv2 = nn.Conv2D(c, kernel_size=3, padding=1, strides=1, use_bias=False) self.bn1 = nn.BatchNorm() self.bn2 = nn.BatchNorm() def hybrid_forward(self, F, x): out = F.relu(self.bn1(x)) out = self.conv1(out) out = F.relu(self.bn2(out)) out = self.conv2(out) return out + x class GateUnit(nn.HybridBlock): def __init__(self, c=64, **kwargs): super(GateUnit, self).__init__(**kwargs) self.gate = nn.HybridSequential() with self.name_scope(): self.gate.add(nn.BatchNorm()) self.gate.add(nn.Conv2D(c, kernel_size=1, padding=0, strides=1, use_bias=False)) def hybrid_forward(self, F, x): out = F.relu(self.gate[0](x)) out = self.gate[1](out) return out class MemBlock(nn.HybridBlock): def __init__(self, r=6, c=64, **kwargs): super(MemBlock, self).__init__(**kwargs) self.r = r self.res_block = nn.HybridSequential() with self.name_scope(): for i in range(r): self.res_block.add(ResBlock(c=c)) def hybrid_forward(self, F, x): out = x mem = [] for i in range(self.r): out = self.res_block[i](out) mem.append(out) return mem class ReconNet(nn.HybridBlock): def __init__(self, channels, **kwargs): super(ReconNet, self).__init__(**kwargs) self.net = nn.HybridSequential() with self.name_scope(): self.net.add(nn.BatchNorm()) self.net.add(nn.Conv2D(channels, kernel_size=3, padding=1, strides=1, use_bias=False)) def hybrid_forward(self, F, x): out = F.relu(self.net[0](x)) return self.net[1](out) class MemNet(nn.HybridBlock): def __init__(self, m=6, r=6, c=64, channels=3, verbose=False, **kwargs): super(MemNet, self).__init__(**kwargs) self.verbose = verbose self.m = m with self.name_scope(): self.net = nn.HybridSequential() self.net.add(FENet(c=c)) for i in range(m): self.net.add(MemBlock(r=r, c=c)) self.net.add(GateUnit(c=c)) self.net.add(ReconNet(channels=channels)) self.net.add(Scale(channels=channels)) def hybrid_forward(self, F, x): last = self.net[0](x) transition = [] transition.append(last) HR_recovery_x = [] # for Multi-Supervised HR_recovery = [] # final output for i in range(1, self.m*4, 4): mem = self.net[i](last) # mem block concat = [*transition] concat.extend(mem) last = self.net[i+1](F.concat(*concat, dim=1)) # Gate transition.append(last) recon = self.net[i+2](last) + x # Recon HR_recovery_x.append(recon) HR_recovery.append(self.net[i+3](recon)) # Scale HR = HR_recovery[0] for i in range(1, len(HR_recovery)): HR = HR + HR_recovery[i] return [HR, *HR_recovery_x] def pred(name, out, epoch, comment="", compare=True, no_bias=False): from collections import namedtuple from PIL import Image def get_psnr(imageA, imageB): err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2) err /= float(imageA.shape[0] * imageA.shape[1]) return 10* math.log10(1/err) Batch = namedtuple('Batch', ['data']) orginal = Image.open(name) orginal = orginal.crop((0,0, orginal.size[0] // 2 * 2, orginal.size[1] // 2 * 2)) image = orginal.resize((orginal.size[0] // 2, orginal.size[1] // 2), resample=Image.BILINEAR) image = image.resize((image.size[0]*2, image.size[1]*2), resample=Image.LANCZOS) org = np.array(orginal) / 255.0 img = np.array(image) / 255.0 height, width = img.shape[:2] data = mx.nd.expand_dims(mx.nd.array(img), axis=0) data = mx.nd.transpose(data, axes=(0, 3, 1, 2)).astype('float32') ctx = mx.gpu(0) model = mx.mod.Module(mx.symbol.load("./MemNet_RGBx2_m6r6_so-symbol.json"), context=ctx, data_names=['data']) param = mx.nd.load("./param1/MemNet_%03d-0000.params"%epoch) # bias_map = {} # esp = 1e-05 # for k, v in param.items(): # if k.find("batchnorm0_gamma") != -1: # name = k.split(":")[1].split("batchnorm0")[0] # gamma = param[k].asnumpy() # beta = param["arg:" + name + "batchnorm0_beta"].asnumpy() # running_mean = param["aux:" + name + "batchnorm0_running_mean"].asnumpy() # running_var = param["aux:" + name + "batchnorm0_running_var"].asnumpy() # running_std = np.sqrt(running_var + esp) # weight = param["arg:" + name + "conv0_weight"].asnumpy() # if no_bias: # bias = np.zeros(gamma.shape) # else: # bias = param["arg:" + name + "conv0_bias"].asnumpy() # output_channel = gamma.shape[0] # for i in range(output_channel): # weight[i,:,:,:] *= gamma[i] / running_std[i] # bias[i] *= gamma[i] / running_std[i] # bias[i] += beta[i] - (gamma[i] * running_mean[i]) / running_std[i] # param["arg:" + name + "conv0_weight"] = mx.nd.array(weight) # if no_bias: # bias_map["arg:" + name + "conv0_bias"] = mx.nd.array(bias) # else: # param["arg:" + name + "conv0_bias"] = mx.nd.array(bias) # if no_bias: # for k,v in bias_map.items(): # param[k] = mx.nd.array(v) arg_param = {} aux_param = {} for k, v in param.items(): if k.find("arg") != -1: arg_param[k.split(":")[1]] = v if k.find("aux") != -1: aux_param[k.split(":")[1]] = v model.bind(data_shapes=[('data', data.shape)], for_training=False) model.set_params(arg_params=arg_param, aux_params=aux_param) model.forward(data_batch=Batch([data]), is_train=False) # this line will crash model.get_outputs()[0] pred = mx.nd.transpose(model.get_outputs()[0], axes=(0, 2, 3, 1)).asnumpy() pred = np.squeeze(pred, axis=0) output = np.maximum(np.minimum(pred, 1.0), 0.0) output = Image.fromarray((output * 255).astype(np.uint8), 'RGB') output.save(fp=out, compress_level=9) I can train the network using gluon and save the params. But if I want to get the output of the network, mxnet will crash; and there is not any error message for the crash. I also used C++ to get the output but there is a dmlc::Error is thrown and nobody catch it. Did I do something wrong with the code? Ref: [MemNet: A Persistent Memory Network for Image Restoration](https://arxiv.org/abs/1708.02209)
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
