Vikas89 opened a new issue #14029: Out of memory error in 3d Conv for matrix splits > 10, CUDNN strange behaviour URL: https://github.com/apache/incubator-mxnet/issues/14029 ## Description Memory bloat(OOM) in 3D Conv when matrix split size is greater than 10 . If I run this code on ec2 p2.xl with 12 GB of GPU memory, program runs well for a = net(x[:, :, :, :10, :10]) print(a.shape) a = net(x[:, :, :, :9, :9]) print(a.shape) but starts getting cuda OOM error for : a = net(x[:, :, :, :11, :11]) print(a.shape) `` import os os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine' os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' import mxnet as mx from mxnet.gluon import nn # from resnet_i3d import BasicBlockV1 def _conv3x3(channels, stride, in_channels): return nn.Conv3D(channels, kernel_size=3, strides=stride, padding=1, use_bias=False, in_channels=in_channels) # Blocks class BasicBlockV1(mx.gluon.HybridBlock): r"""BasicBlock V1 from `"Deep Residual Learning for Image Recognition" <http://arxiv.org/abs/1512.03385>`_ paper. This is used for ResNet V1 for 18, 34 layers. Parameters ---------- channels : int Number of output channels. stride : int Stride size. downsample : bool, default False Whether to downsample the input. in_channels : int, default 0 Number of input channels. Default is 0, to infer from the graph. """ def __init__(self, channels, stride, downsample=False, in_channels=0, **kwargs): super(BasicBlockV1, self).__init__(**kwargs) with self.name_scope(): self.body = nn.HybridSequential(prefix='') self.body.add(_conv3x3(channels, stride, in_channels)) self.body.add(nn.BatchNorm()) self.body.add(nn.Activation('relu')) self.body.add(_conv3x3(channels, 1, channels)) self.body.add(nn.BatchNorm()) if downsample: self.downsample = nn.HybridSequential(prefix='') self.downsample.add(nn.Conv3D(channels, kernel_size=1, strides=stride, use_bias=False, in_channels=in_channels)) self.downsample.add(nn.BatchNorm()) else: self.downsample = None def hybrid_forward(self, F, x): residual = x x = self.body(x) if self.downsample: residual = self.downsample(residual) x = F.Activation(residual+x, act_type='relu') return x ctx = mx.gpu(0) net = nn.HybridSequential(prefix='') channels = [256, 512] net.add(BasicBlockV1(channels[-1], 1, downsample=True, in_channels=channels[-2], prefix='')) net.add(BasicBlockV1(channels[-1], 1, False, in_channels=channels[-1], prefix='')) net.initialize(ctx=ctx) x = mx.nd.random.normal(0, 1, (300, 256, 2, 14, 14), ctx=ctx) a = net(x[:, :, :, :10, :10]) print(a.shape) a = net(x[:, :, :, :9, :9]) print(a.shape) b = net(x[:, :, :, :11, :11]) print(b.shape) `` ## Environment info (Required) mxnet-cu92==1.3.1, gluoncv==0.3.0 Cuda 9.2, cudnn 7.1 instance used : [p2.xl on ec2](https://aws.amazon.com/blogs/aws/new-p2-instance-type-for-amazon-ec2-up-to-16-gpus/) , 12 GB of GPU memory ``` What to do: copy script as given above to my_test.py run on p2.xl using python my_test.py ```
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
