ryujaehun opened a new pull request #8704:
URL: https://github.com/apache/tvm/pull/8704


   When testing DensetNet-121 using AutoTvm, I saw unrealistic kernels like 
input channel 1 output channel 512.
   
   there are some problems with the internal implementation of 
[densenet](https://github.com/apache/tvm/blob/b893774f38c648536645c3ac5775428b3e9d25b0/python/tvm/relay/testing/densenet.py)
   
   It is a autotvm task when creating densetnet121. You can see the input and 
output channels are weird.
   ```
   import tvm
   import tvm.relay.testing
   
   def get_network(name, batch_size):
       """Get the symbol definition and random weight of a network"""
       input_shape = (batch_size, 3, 224, 224)
       output_shape = (batch_size, 1000)
       if "densenet" in name:
           n_layer = int(name.split('-')[1])
           mod, params = 
relay.testing.densenet.get_workload(densenet_size=n_layer,batch_size=batch_size,
 dtype=dtype)
       else:
           raise ValueError("Unsupported network: " + name)
       return mod, params, input_shape, output_shape
   target = tvm.target.cuda()
   
   dtype = "float32"
   mod, params, input_shape, out_shape = get_network('densenet-121', 
batch_size=1)
   tasks = autotvm.task.extract_from_program(
       mod["main"], target=target, params=params, 
ops=(relay.op.get("nn.conv2d"),)
   )
   for task in reversed(tasks):
       print(task)
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 7, 7), 'float32'), 
('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'), 
kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 7, 7), 'float32'), 
('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 32, 7, 7), 
'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 
32, 7, 7), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 
1), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 7, 7), 'float32'), 
('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'), 
kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 7, 7), 'float32'), 
('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 512, 7, 7), 
'float32'), ('TENSOR', (32, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 7, 
7), 'float32'), ('TENSOR', (32, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 14, 14), 
'float32'), ('TENSOR', (512, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 14, 
14), 'float32'), ('TENSOR', (512, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 14, 14), 
'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 14, 14), 
'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 
'float32'))
   Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 32, 14, 14), 
'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 
32, 14, 14), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 
1, 1), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 14, 14), 
'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 14, 
14), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 256, 14, 14), 
'float32'), ('TENSOR', (32, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 
14, 14), 'float32'), ('TENSOR', (32, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 
0), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 28, 28), 
'float32'), ('TENSOR', (256, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 28, 
28), 'float32'), ('TENSOR', (256, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 28, 28), 
'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 28, 28), 
'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 
'float32'))
   Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 32, 28, 28), 
'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 
32, 28, 28), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 
1, 1), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 28, 28), 
'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 28, 
28), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 128, 28, 28), 
'float32'), ('TENSOR', (32, 128, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 
28, 28), 'float32'), ('TENSOR', (32, 128, 1, 1), 'float32'), (1, 1), (0, 0, 0, 
0), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 56, 56), 
'float32'), ('TENSOR', (128, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 56, 
56), 'float32'), ('TENSOR', (128, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 1, 56, 56), 
'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 1, 56, 56), 
'float32'), ('TENSOR', (32, 1, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 
'float32'))
   Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 32, 56, 56), 
'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', (1, 
32, 56, 56), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 
1, 1), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 56, 56), 
'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 56, 
56), 'float32'), ('TENSOR', (1, 32, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 64, 56, 56), 
'float32'), ('TENSOR', (32, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 56, 
56), 'float32'), ('TENSOR', (32, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 3, 224, 224), 
'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 3, 224, 
224), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), 
(1, 1), 'float32'))
   ```
   
   
   ## Problem1
   
   The call of the arguments was incorrect.  
   
   
https://github.com/apache/tvm/blob/b893774f38c648536645c3ac5775428b3e9d25b0/python/tvm/relay/testing/densenet.py#L82
   
   The order of growth_rate,bn_size should be changed. 
   
   
https://github.com/apache/tvm/blob/b893774f38c648536645c3ac5775428b3e9d25b0/python/tvm/relay/testing/densenet.py#L44
   
   ## Solution1
   
   ```
       for i, num_layers in enumerate(block_config):
           layer_out = _make_dense_block(layer_out, num_layers, 
bn_size,growth_rate,  i)
           num_features = num_features + num_layers * growth_rate
   ```
   
   ## Problem2
   
   The implementation of 
[mxnet](https://github.com/apache/incubator-mxnet/blob/master/python/mxnet/gluon/model_zoo/vision/densenet.py)
 describes bn_size to "Multiplicative factor for number of bottle neck 
layers".So it is not batch_size. 
   So it must be changed `get_workload ` function. 
   May be addition variable(bn_size) is a simple solution.
   
   
https://github.com/apache/tvm/blob/b893774f38c648536645c3ac5775428b3e9d25b0/python/tvm/relay/testing/densenet.py#L137-L139
   
   
https://github.com/apache/tvm/blob/b893774f38c648536645c3ac5775428b3e9d25b0/python/tvm/relay/testing/densenet.py#L62-L64
   
   ## Solution2
   
   ```
    bn_size = 4
       net = _make_dense_net(
           num_init_features, growth_rate, block_config, data_shape, dtype, 
bn_size, classes
       )
   ```
   
   ## Problem 3 
   
   It hasn't been implemented about concatenate operation:(
   
   
https://github.com/apache/tvm/blob/b893774f38c648536645c3ac5775428b3e9d25b0/python/tvm/relay/testing/densenet.py#L44-L49
   
   ## Solution3 
   
   ```
   def _make_dense_block(data, num_layers, bn_size, growth_rate, index):
       """Makes a block of dense layers of the specified size."""
       layer_out = data
       block = []
       for i in range(num_layers):
           layer_out = _make_dense_layer(layer_out, growth_rate, bn_size, 
"%s_%s" % (index, i))
           block.append(layer_out)
       out = relay.concatenate(block, 1)
       return out
   ```
   
   ## Environment
   
   TVM: commit 2d1847c9d3ce70daed518d8b3d9dbf750ae34672
   CUDA version: 10.2
   System: Ubuntu 20.04
   GCC 7.5
   Build options:  -DUSE_LLVM=ON -DUSE_CUDA=ON
   
   
   This is the autotvm task output when applying the 3 solutions.
   
   ```
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 3, 224, 224), 
'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 3, 224, 
224), 'float32'), ('TENSOR', (64, 3, 7, 7), 'float32'), (2, 2), (3, 3, 3, 3), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 64, 56, 56), 
'float32'), ('TENSOR', (128, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 56, 
56), 'float32'), ('TENSOR', (128, 64, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 128, 56, 56), 
'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 
56, 56), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 
1), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 128, 56, 56), 
'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', 
(1, 128, 56, 56), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), 
(1, 1, 1, 1), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 56, 56), 
'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 56, 
56), 'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 192, 56, 56), 
'float32'), ('TENSOR', (128, 192, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 192, 
56, 56), 'float32'), ('TENSOR', (128, 192, 1, 1), 'float32'), (1, 1), (0, 0, 0, 
0), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 128, 28, 28), 
'float32'), ('TENSOR', (128, 128, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 
28, 28), 'float32'), ('TENSOR', (128, 128, 1, 1), 'float32'), (1, 1), (0, 0, 0, 
0), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 128, 28, 28), 
'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 
28, 28), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 
1), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 128, 28, 28), 
'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', 
(1, 128, 28, 28), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), 
(1, 1, 1, 1), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 28, 28), 
'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 28, 
28), 'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 384, 28, 28), 
'float32'), ('TENSOR', (256, 384, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 384, 
28, 28), 'float32'), ('TENSOR', (256, 384, 1, 1), 'float32'), (1, 1), (0, 0, 0, 
0), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 256, 14, 14), 
'float32'), ('TENSOR', (128, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 
14, 14), 'float32'), ('TENSOR', (128, 256, 1, 1), 'float32'), (1, 1), (0, 0, 0, 
0), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 128, 14, 14), 
'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 
14, 14), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 
1), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 128, 14, 14), 
'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', 
(1, 128, 14, 14), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), 
(1, 1, 1, 1), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 14, 14), 
'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 14, 
14), 'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 768, 14, 14), 
'float32'), ('TENSOR', (512, 768, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 768, 
14, 14), 'float32'), ('TENSOR', (512, 768, 1, 1), 'float32'), (1, 1), (0, 0, 0, 
0), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 512, 7, 7), 
'float32'), ('TENSOR', (128, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 7, 
7), 'float32'), ('TENSOR', (128, 512, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 128, 7, 7), 
'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 7, 
7), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), 
(1, 1), 'float32'))
   Task(func_name=conv2d_nchw_winograd.cuda, args=(('TENSOR', (1, 128, 7, 7), 
'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 
1), 'float32'), kwargs={}, workload=('conv2d_nchw_winograd.cuda', ('TENSOR', 
(1, 128, 7, 7), 'float32'), ('TENSOR', (32, 128, 3, 3), 'float32'), (1, 1), (1, 
1, 1, 1), (1, 1), 'float32'))
   Task(func_name=conv2d_nchw.cuda, args=(('TENSOR', (1, 32, 7, 7), 'float32'), 
('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 1), 
'float32'), kwargs={}, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 32, 7, 7), 
'float32'), ('TENSOR', (128, 32, 1, 1), 'float32'), (1, 1), (0, 0, 0, 0), (1, 
1), 'float32'))
   
   
   
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to