ryujaehun opened a new pull request #8704: URL: https://github.com/apache/tvm/pull/8704
When testing DensetNet-121 using AutoTvm, I saw unrealistic kernels like input channel 1 output channel 512. there are some problems with the internal implementation of [densenet](https://github.com/apache/tvm/blob/b893774f38c648536645c3ac5775428b3e9d25b0/python/tvm/relay/testing/densenet.py) It is a autotvm task when creating densetnet121. You can see the input and output channels are weird. ``` import tvm import tvm.relay.testing def get_network(name, batch_size): """Get the symbol definition and random weight of a network""" input_shape = (batch_size, 3, 224, 224) output_shape = (batch_size, 1000) if "densenet" in name: n_layer = int(name.split('-')[1]) mod, params = relay.testing.densenet.get_workload(densenet_size=n_layer,batch_size=batch_size, dtype=dtype) else: raise ValueError("Unsupported network: " + name) return mod, params, input_shape, output_shape target = tvm.target.cuda() dtype = "float32" mod, params, input_shape, out_shape = get_network('densenet-121', batch_size=1) tasks = autotvm.task.extract_from_program( mod["main"], target=target, params=params, ops=(relay.op.get("nn.conv2d"),) ) for task in reversed(tasks): print(task) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 7, 7), 'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 7, 7), 'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 32, 7, 7), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 32, 7, 7), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 7, 7), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 7, 7), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 512, 7, 7), 'float32'), ('TENSOR', (32, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 7, 7), 'float32'), ('TENSOR', (32, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 14, 14), 'float32'), ('TENSOR', (512, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 14, 14), 'float32'), ('TENSOR', (512, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 14, 14), 'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 14, 14), 'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 32, 14, 14), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 32, 14, 14), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 14, 14), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 14, 14), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 256, 14, 14), 'float32'), ('TENSOR', (32, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 14, 14), 'float32'), ('TENSOR', (32, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 28, 28), 'float32'), ('TENSOR', (256, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 28, 28), 'float32'), ('TENSOR', (256, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 28, 28), 'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 28, 28), 'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 32, 28, 28), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 32, 28, 28), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 28, 28), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 28, 28), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 128, 28, 28), 'float32'), ('TENSOR', (32, 128, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 28, 28), 'float32'), ('TENSOR', (32, 128, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 56, 56), 'float32'), ('TENSOR', (128, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 56, 56), 'float32'), ('TENSOR', (128, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 56, 56), 'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 56, 56), 'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 32, 56, 56), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 32, 56, 56), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 56, 56), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 56, 56), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 64, 56, 56), 'float32'), ('TENSOR', (32, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 56, 56), 'float32'), ('TENSOR', (32, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 3, 224, 224), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 3, 224, 224), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'float32')) ``` ## Problem1 The call of the arguments was incorrect. https://github.com/apache/tvm/blob/b893774f38c648536645c3ac5775428b3e9d25b0/python/tvm/relay/testing/densenet.py#L82 The order of growth_rate,bn_size should be changed. https://github.com/apache/tvm/blob/b893774f38c648536645c3ac5775428b3e9d25b0/python/tvm/relay/testing/densenet.py#L44 ## Solution1 ``` for i, num_layers in enumerate(block_config): layer_out = _make_dense_block(layer_out, num_layers, bn_size,growth_rate, i) num_features = num_features + num_layers * growth_rate ``` ## Problem2 The implementation of [mxnet](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/model_zoo/vision/densenet.py) describes bn_size to "Multiplicative factor for number of bottle neck layers".So it is not batch_size. So it must be changed `get_workload ` function. May be addition variable(bn_size) is a simple solution. https://github.com/apache/tvm/blob/b893774f38c648536645c3ac5775428b3e9d25b0/python/tvm/relay/testing/densenet.py#L137-L139 https://github.com/apache/tvm/blob/b893774f38c648536645c3ac5775428b3e9d25b0/python/tvm/relay/testing/densenet.py#L62-L64 ## Solution2 ``` bn_size = 4 net = _make_dense_net( num_init_features, growth_rate, block_config, data_shape, dtype, bn_size, classes ) ``` ## Problem 3 It hasn't been implemented about concatenate operation:( https://github.com/apache/tvm/blob/b893774f38c648536645c3ac5775428b3e9d25b0/python/tvm/relay/testing/densenet.py#L44-L49 ## Solution3 ``` def _make_dense_block(data, num_layers, bn_size, growth_rate, index): """Makes a block of dense layers of the specified size.""" layer_out = data block = [] for i in range(num_layers): layer_out = _make_dense_layer(layer_out, growth_rate, bn_size, "%s_%s" % (index, i)) block.append(layer_out) out = relay.concatenate(block, 1) return out ``` ## Environment TVM: commit 2d1847c9d3ce70daed518d8b3d9dbf750ae34672 CUDA version: 10.2 System: Ubuntu 20.04 GCC 7.5 Build options: -DUSE_LLVM=ON -DUSE_CUDA=ON This is the autotvm task output when applying the 3 solutions. ``` Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 3, 224, 224), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 3, 224, 224), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 64, 56, 56), 'float32'), ('TENSOR', (128, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 56, 56), 'float32'), ('TENSOR', (128, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 128, 56, 56), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 56, 56), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 128, 56, 56), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 128, 56, 56), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 56, 56), 'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 56, 56), 'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 192, 56, 56), 'float32'), ('TENSOR', (128, 192, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 192, 56, 56), 'float32'), ('TENSOR', (128, 192, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 128, 28, 28), 'float32'), ('TENSOR', (128, 128, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 28, 28), 'float32'), ('TENSOR', (128, 128, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 128, 28, 28), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 28, 28), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 128, 28, 28), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 128, 28, 28), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 28, 28), 'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 28, 28), 'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 384, 28, 28), 'float32'), ('TENSOR', (256, 384, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 384, 28, 28), 'float32'), ('TENSOR', (256, 384, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 256, 14, 14), 'float32'), ('TENSOR', (128, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 14, 14), 'float32'), ('TENSOR', (128, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 128, 14, 14), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 14, 14), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 128, 14, 14), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 128, 14, 14), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 14, 14), 'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 14, 14), 'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 768, 14, 14), 'float32'), ('TENSOR', (512, 768, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 768, 14, 14), 'float32'), ('TENSOR', (512, 768, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 512, 7, 7), 'float32'), ('TENSOR', (128, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 7, 7), 'float32'), ('TENSOR', (128, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 128, 7, 7), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 7, 7), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 128, 7, 7), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 128, 7, 7), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32')) Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 7, 7), 'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 7, 7), 'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32')) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
