awsbillz commented on issue #14766: MKLDNN doesn't support 6 dimensions URL: https://github.com/apache/incubator-mxnet/issues/14766#issuecomment-486409649 The minimal reproducible code is below ```python """ A combination of 1) 8 channel on Conv2D and 2) hybridized net seem to cause the crash """ import mxnet as mx from mxnet.gluon import nn import mxnet.ndarray as F from mxnet.gluon.block import HybridBlock class Reshape2D(HybridBlock): def __init__(self, factor): super(Reshape2D, self).__init__() self._factors = (int(factor),) * 2 def hybrid_forward(self, F, x): f1, f2 = self._factors # (N, f1*f2*C, H, W) x = F.reshape(x, (0, -4, -1, f1 * f2, 0, 0)) # (N, C, f1*f2, H, W) x = F.reshape(x, (0, 0, -4, f1, f2, 0, 0)) # (N, C, f1, f2, H, W) x = F.transpose(x, (0, 1, 4, 2, 5, 3)) # (N, C, H, f1, W, f2) x = F.reshape(x, (0, 0, -3, -3)) # (N, C, H*f1, W*f2) return x class Net(HybridBlock): def __init__(self, **kwargs): super(Net, self).__init__(**kwargs) with self.name_scope(): self.conv1 = nn.Conv2D(8, kernel_size=5) # <- 1) self.reshape2D = Reshape2D(2) def hybrid_forward(self, F, x): x = self.conv1(x) x = self.reshape2D(x) return x net = Net() net.initialize(mx.init.Xavier(), ctx=mx.cpu()) net.hybridize() # <- 2) data = mx.nd.random_normal(shape=(1, 3, 600, 600)) # dummy data output = net(data) a = output.asnumpy() print(a) ``` We experimented turning on/off hybridization and there does appear to be a difference. Without hybridization. The code runs correctly ```bash root@6f1ae84d7a5a:/crowdcounting# MKLDNN_VERBOSE=1 python test.py mkldnn_verbose,exec,reorder,jit:uni,undef,in:f32_oihw out:f32_Ohwi8o,num:1,8x3x5x5,0.00317383 mkldnn_verbose,exec,convolution,jit:avx2,forward_inference,fsrc:nchw fwei:Ohwi8o fbia:x fdst:nChw8c,alg:convolution_direct,mb1_g1ic3oc8_ih600oh596kh5sh1dh0ph0_iw600ow596kw5sw1dw0pw0,2.40796 mkldnn_verbose,exec,reorder,jit:uni,undef,in:f32_oihw out:f32_Ohwi8o,num:1,8x3x5x5,0.00219727 mkldnn_verbose,exec,reorder,jit:uni,undef,in:f32_nChw8c out:f32_nchw,num:1,1x8x596x596,1.37305 [[[[ 0.63157237 -0.13570794 -0.13340439 ... 1.1704854 0.4845633 1.5047258 ] [ 1.2083259 0.37998998 -1.1894836 ... -0.48679283 0.65234876 0.07821631] [-0.5278215 -1.3345666 -0.61814046 ... -0.78595865 -0.6419563 0.01790139] ... [ 0.9739233 0.26053143 0.10977446 ... -1.0813308 -0.79446477 0.59918064] [ 0.6313175 -1.1710652 0.14362349 ... 0.14761467 1.2973223 0.07053347] [ 0.595051 0.53777224 -0.7015837 ... -0.9797133 0.46997756 -0.06855441]] [[ 0.08321354 -0.43839744 0.25719148 ... -0.48135263 0.2331308 -0.20161374] [-0.13869795 -1.0949888 0.67054224 ... -0.7056279 0.4550503 -0.39589623] [ 0.45498207 -0.2215124 -0.7991557 ... -1.1719621 -0.6238807 0.7984329 ] ... [-0.6333465 -0.48290926 0.6820876 ... 1.4864701 0.6147204 -0.41900736] [ 1.0353158 -0.06823038 -0.11530954 ... -0.06267852 -0.56047356 -0.70380664] [-0.51138985 -0.12400899 -1.7057749 ... -1.1799597 -0.4319848 -0.0879695 ]]]] ``` With hybridization, however, we see the stacktrace below: ```bash root@6f1ae84d7a5a:/crowdcounting# MKLDNN_VERBOSE=1 python test.py mkldnn_verbose,exec,reorder,jit:uni,undef,in:f32_oihw out:f32_Ohwi8o,num:1,8x3x5x5,0.00292969 mkldnn_verbose,exec,convolution,jit:avx2,forward_inference,fsrc:nchw fwei:Ohwi8o fbia:x fdst:nChw8c,alg:convolution_direct,mb1_g1ic3oc8_ih600oh596kh5sh1dh0ph0_iw600ow596kw5sw1dw0pw0,2.36499 mkldnn_verbose,exec,reorder,jit:uni,undef,in:f32_nChw8c out:f32_nchw,num:1,1x8x596x596,1.36206 mkldnn_verbose,exec,reorder,jit:uni,undef,in:f32_nchw out:f32_nChw8c,num:1,1x8x596x596,0.417969 mkldnn_verbose,exec,reorder,jit:uni,undef,in:f32_nChw8c out:f32_nchw,num:1,1x8x596x596,1.21411 mkldnn_verbose,exec,reorder,simple:any,undef,in:f32_goihw out:f32_goihw,num:1,1x2x4x596x596,1.04883 mkldnn_verbose,exec,reorder,jit:uni,undef,in:f32_oihw out:f32_Ohwi8o,num:1,8x3x5x5,0.000976562 Traceback (most recent call last): File "test.py", line 45, in <module> a = output.asnumpy() File "/usr/local/lib/python2.7/dist-packages/mxnet/ndarray/ndarray.py", line 1980, in asnumpy ctypes.c_size_t(data.size))) File "/usr/local/lib/python2.7/dist-packages/mxnet/base.py", line 252, in check_call raise MXNetError(py_str(_LIB.MXGetLastError())) mxnet.base.MXNetError: [18:53:06] src/ndarray/ndarray.cc:481: MKLDNN doesn't support 6 dimensions Stack trace returned 10 entries: [bt] (0) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(+0x21d6d4) [0x7f00719ec6d4] [bt] (1) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(+0x21dab1) [0x7f00719ecab1] [bt] (2) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(mxnet::NDArray::Chunk::SetMKLMem(nnvm::TShape const&, int)+0xe7) [0x7f007450a317] [bt] (3) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(mxnet::NDArray::GetMKLDNNData() const+0x5e) [0x7f007451482e] [bt] (4) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(void mxnet::op::CastStorageComputeImpl<mshadow::cpu>(mxnet::OpContext const&, mxnet::NDArray const&, mxnet::NDArray const&)+0x267) [0x7f007239ebd7] [bt] (5) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(mxnet::common::CastNonDefaultStorage(std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, std::vector<mxnet::NDArray, std::allocator<mxnet::NDArray> > const&, mxnet::OpContext const&, bool)+0x24f) [0x7f007434207f] [bt] (6) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(mxnet::imperative::PushFCompute(std::function<void (nnvm::NodeAttrs const&, mxnet::OpContext const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&, std::vector<mxnet::TBlob, std::allocator<mxnet::TBlob> > const&)> const&, nnvm::Op const*, nnvm::NodeAttrs const&, mxnet::Context const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::engine::Var*, std::allocator<mxnet::engine::Var*> > const&, std::vector<mxnet::Resource, std::allocator<mxnet::Resource> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<mxnet::NDArray*, std::allocator<mxnet::NDArray*> > const&, std::vector<unsigned int, std::allocator<unsigned int> > const&, std::vector<mxnet::OpReqType, std::allocator<mxnet::OpReqType> > const&)::{lambda(mxnet::RunContext)#1}::operator()(mxnet::RunContext) const+0x372) [0x7f00743d2ee2] [bt] (7) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(+0x2b5ba6d) [0x7f007432aa6d] [bt] (8) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(+0x2b5ba57) [0x7f007432aa57] [bt] (9) /usr/local/lib/python2.7/dist-packages/mxnet/libmxnet.so(+0x2b5ba57) [0x7f007432aa57] ```
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
