Hello, @ThomasDelteil
I am having the same problem, but in my case I am implementing my network from
zero. I am implementing an Unet but when started the training show this error:
**UserWarning: Gradient of Parameter `unet0_conv0_bias` on context gpu(0) has
not been updated by backward since last `step`. This could mean a bug in your
model that made it only use a subset of the Parameters (Blocks) for this
iteration. If you are intentionally only using a subset, call step with
ignore_stale_grad=True to suppress this warning and skip updating of Parameters
with stale gradient**
Any idea which layer is not being used by my trainer?
My code:
class UNet(nn.HybridBlock):
def __init__(self, num_classes, nfilter=64, **kwargs):
# nn.HybridBlock.__init__(self, **kwargs)
super(UNet, self).__init__(**kwargs)
with self.name_scope():
# Applying padding=1 to have 'same' padding
# Remeber formula to know output of a convolution:
# o = (width - k + 2p)/s + 1 --> p = (k - 1)/2
# Check this:
https://www.quora.com/How-can-I-calculate-the-size-of-output-of-convolutional-layer
# Encoder
# self.encoder = nn.HybridSequential()
self.conv1_1 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
self.conv1_2 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
nfilter *= 2 # 128
self.conv2_1 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
self.conv2_2 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
nfilter *= 2 # 256
self.conv3_1 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
self.conv3_2 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
nfilter *= 2 # 512
self.conv4_1 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
self.conv4_2 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
nfilter *= 2 # 1024
self.conv5_1 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
self.conv5_2 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
# Decoder
nfilter //= 2 # 512
self.upconv6 = nn.Conv2D(nfilter, kernel_size=1, padding=0,
use_bias=False, activation='relu')
self.conv6_1 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
self.conv6_2 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
nfilter //= 2 # 256
self.upconv7 = nn.Conv2D(nfilter, kernel_size=1, padding=0,
use_bias=False, activation='relu')
self.conv7_1 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
self.conv7_2 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
nfilter //= 2 # 128
self.upconv8 = nn.Conv2D(nfilter, kernel_size=1, padding=0,
use_bias=False, activation='relu')
self.conv8_1 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
self.conv8_2 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
nfilter //= 2 # 64
self.upconv9 = nn.Conv2D(nfilter, kernel_size=1, padding=0,
use_bias=False, activation='relu')
self.conv9_1 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
self.conv9_2 = nn.Conv2D(nfilter, kernel_size=3, padding=1)
# self.pool = nn.MaxPool2D()
self.pool1 = nn.MaxPool2D()
self.pool2 = nn.MaxPool2D()
self.pool3 = nn.MaxPool2D()
self.pool4 = nn.MaxPool2D()
self.conv_pred = nn.Conv2D(num_classes, kernel_size=1)
# Using Hybrid Sequential avoids this error:
# UserWarning: Gradient of Parameter `unet0_conv0_bias` on context
gpu(0) has not been updated by backward since last `step`.
# Don't know why
# self.conv_pred = nn.HybridSequential()
# self.conv_pred.add(nn.Conv2D(num_classes, kernel_size=1))
def hybrid_forward(self, F, x):
# Encoder
# conv block 1
print(x.shape)
conv1_1 = self.conv1_1(x)
print(conv1_1.shape)
conv1_1 = F.relu(conv1_1)
conv1_2 = self.conv1_2(conv1_1)
conv1_2 = F.relu(conv1_2)
pool1 = self.pool1(conv1_2)
# conv block 2
print(pool1.shape)
conv2_1 = self.conv2_1(pool1)
conv2_1 = F.relu(conv2_1)
print(conv2_1.shape)
conv2_2 = self.conv2_2(conv2_1)
conv2_2 = F.relu(conv2_2)
pool2 = self.pool2(conv2_2)
# conv block 3
conv3_1 = self.conv3_1(pool2)
conv3_1 = F.relu(conv3_1)
conv3_2 = self.conv3_2(conv3_1)
conv3_2 = F.relu(conv3_2)
pool3 = self.pool3(conv3_2)
# conv block 4
conv4_1 = self.conv4_1(pool3)
conv4_1 = F.relu(conv4_1)
conv4_2 = self.conv4_2(conv4_1)
conv4_2 = F.relu(conv4_2)
pool4 = self.pool4(conv4_2)
# Middle
# conv block 5 n_f=1024
conv_middle = self.conv5_1(pool4)
conv_middle = F.relu(conv_middle)
conv_middle = self.conv5_2(conv_middle)
conv_middle = F.relu(conv_middle)
# Decoder
# [TODO] All convolutions after upsample needs a relu
# Upsampling conv block 6 -- n_f=512
up6 = F.UpSampling(conv_middle, scale=2, sample_type='nearest')
up6 = self.upconv6(up6)
# Concatenate along channel's dimension
merge6 = F.concat(up6, conv4_2, dim=1)
conv6_1 = self.conv6_1(merge6)
conv6_1 = F.relu(conv6_1)
conv6_2 = self.conv6_2(conv6_1)
conv6_2 = F.relu(conv6_2)
# Upsampling conv block 7 -- n_f=256
up7 = F.UpSampling(conv6_2, scale=2, sample_type='nearest')
up7 = self.upconv7(up7)
# Concatenate along channel's dimension
merge7 = F.concat(up7, conv3_2, dim=1)
conv7_1 = self.conv7_1(merge7)
conv7_1 = F.relu(conv7_1)
conv7_2 = self.conv7_2(conv7_1)
conv7_2 = F.relu(conv7_2)
# Upsampling conv block 8 -- n_f=128
up8 = F.UpSampling(conv7_2, scale=2, sample_type='nearest')
up8 = self.upconv8(up8)
# Concatenate along channel's dimension
merge8 = F.concat(up8, conv2_2, dim=1)
conv8_1 = self.conv8_1(merge8)
conv8_1 = F.relu(conv8_1)
conv8_2 = self.conv8_2(conv8_1)
conv8_2 = F.relu(conv8_2)
# Upsampling conv block 9 -- n_f=64
up9 = F.UpSampling(conv8_2, scale=2, sample_type='nearest')
up9 = self.upconv9(up9)
# Concatenate along channel's dimension
merge9 = F.concat(up9, conv1_2, dim=1)
conv9_1 = self.conv9_1(merge9)
conv9_1 = F.relu(conv9_1)
conv9_2 = self.conv9_2(conv9_1)
conv9_2 = F.relu(conv9_2)
out = self.conv_pred(conv9_2)
return out
---
[Visit
Topic](https://discuss.mxnet.apache.org/t/fine-tuning-error-gradient-has-not-been-updated-by-backward-since-last-step/4705/3)
or reply to this email to respond.
You are receiving this because you enabled mailing list mode.
To unsubscribe from these emails, [click
here](https://discuss.mxnet.apache.org/email/unsubscribe/b52b272774e02d598108d4c9ea9b615020cf27494e8075eeaf9b52ec62b1df6a).