[GitHub] [incubator-tvm] jackwish commented on a change in pull request #3859: [TOPI][AutoTVM] NHWC conv2d templates for ARM

2019-11-21 Thread GitBox
jackwish commented on a change in pull request #3859: [TOPI][AutoTVM] NHWC 
conv2d templates for ARM
URL: https://github.com/apache/incubator-tvm/pull/3859#discussion_r349421231
 
 

 ##
 File path: topi/python/topi/arm_cpu/conv2d_spatial_pack.py
 ##
 @@ -196,3 +196,160 @@ def schedule_conv2d_spatial_pack_nchw(cfg, s, data_vec, 
kernel_vec,
 s[kernel_vec].parallel(co)
 
 return s
+
+def conv2d_spatial_pack_nhwc(cfg, data, kernel, strides, padding, dilation, 
out_dtype):
+"""Spatial pack compute for Conv2d NHWC"""
+out_dtype = out_dtype or data.dtype
+
+N, IH, IW, IC = get_const_tuple(data.shape)
+assert len(kernel.shape) == 4, "AlterOpLayout not enabled for NHWC yet"
+KH, KW, _, OC = get_const_tuple(kernel.shape)
+
+if isinstance(dilation, int):
+dilation_h = dilation_w = dilation
+else:
+dilation_h, dilation_w = dilation
+
+dilated_kernel_h = (KH - 1) * dilation_h + 1
+dilated_kernel_w = (KW - 1) * dilation_w + 1
+
+pad_top, pad_left, pad_down, pad_right = \
+get_pad_tuple(padding, (dilated_kernel_h, dilated_kernel_w))
+HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, 
strides)
+
+OH = (IH + pad_top + pad_down - dilated_kernel_h) // HSTR + 1
+OW = (IW + pad_left + pad_right - dilated_kernel_w) // WSTR + 1
+data_pad = nn.pad(data, [0, pad_top, pad_left, 0], [0, pad_down, 
pad_right, 0])
+
+#  define configuration space 
+n, oc, oh, ow = cfg.axis(N), cfg.axis(OC), cfg.axis(OH), cfg.axis(OW)
+ic, kh, kw = cfg.reduce_axis(IC), cfg.reduce_axis(KH), cfg.reduce_axis(KW)
+
+oco, oci = cfg.define_split('tile_co', oc, num_outputs=2)
+oho, ohi = cfg.define_split('tile_oh', oh, num_outputs=2)
+owo, owi = cfg.define_split('tile_ow', ow, num_outputs=2)
+
+cfg.define_reorder('reorder_conv',
+   [n, oho, owo, oco, kh, kw, ic, ohi, owi, oci],
+   policy='candidate', candidate=[
+   [n, oho, owo, oco, kh, kw, ic, ohi, owi, oci],
+   [n, oho, owo, oco, ohi, kh, kw, ic, owi, oci],
+   [n, oho, owo, oco, ohi, kh, kw, owi, ic, oci],
+   [n, oho, owo, ohi, oco, kh, kw, owi, ic, oci]])
+
+cfg.define_annotate("ann_reduce", [kh, kw], policy='try_unroll')
+cfg.define_annotate("ann_spatial", [ohi, owi, oci], 
policy='try_unroll_vec')
+# 
+
+OCI = cfg['tile_co'].size[-1]
+OHI = cfg['tile_oh'].size[-1]
+OWI = cfg['tile_ow'].size[-1]
+OCO = OC // OCI
+OHO = OH // OHI
+OWO = OW // OWI
+
+kvshape = (OCO, KH, KW, IC, OCI)
+ovshape = (N, OHO, OWO, OCO, OHI, OWI, OCI)
+oshape = (N, OH, OW, OC)
+
+if dilation_h != 1 or dilation_w != 1:
+# undilate input data
+dvshape = (N, OHO, OWO, KH, KW, IC, OHI, OWI)
+data_vec = tvm.compute(dvshape, lambda n, oho, owo, kh, kw, ic, ohi, 
owi:
+   data_pad[n][(oho*OHI+ohi)*HSTR+kh*dilation_h]
+   [(owo*OWI+owi)*WSTR+kw*dilation_w][ic],
+   name='data_vec_undilated')
+else:
+dvshape = (N, OHO, OWO, KH + (OHI-1)*HSTR, KW + (OWI-1)*WSTR, IC)
+data_vec = tvm.compute(dvshape, lambda n, oho, owo, ohi, owi, ic:
+   
data_pad[n][oho*OHI*HSTR+ohi][owo*OWI*WSTR+owi][ic],
+   name='data_vec')
+kernel_vec = tvm.compute(kvshape, lambda oco, kh, kw, ic, oci: \
+ kernel[kh][kw][ic][oco*OCI+oci],
+ name='kernel_vec')
+
+ic = tvm.reduce_axis((0, IC), name='ic')
+kh = tvm.reduce_axis((0, KH), name='kh')
+kw = tvm.reduce_axis((0, KW), name='kw')
+
+if dilation_h != 1 or dilation_w != 1:
+conv = tvm.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \
+tvm.sum(data_vec[n, oho, owo, kh, kw, ohi, owi, 
ic].astype(out_dtype) *
+kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype),
+axis=[ic, kh, kw]), name='conv')
+else:
+conv = tvm.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \
+tvm.sum(data_vec[n, oho, owo, ohi*HSTR+kh, owi*WSTR+kw, 
ic].astype(out_dtype) *
+kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype),
+axis=[ic, kh, kw]), name='conv')
+
+output = tvm.compute(oshape, lambda n, oho, owo, oc:
+ 
conv[n][oho//OHI][owo//OWI][oc//OCI][oho%OHI][owo%OWI][oc%OCI],
+ name='output_unpack', tag='spatial_conv_output_NHWC')
+return output
+
+def schedule_conv2d_spatial_pack_nhwc(cfg, s, op, output):
+"""Spatial Pack schedule for Conv2d NHWC"""
+unpack = op.output(0)
+conv = unpack.op.input_tensors[0]
+data_vec = 

[GitHub] [incubator-tvm] jackwish commented on a change in pull request #3859: [TOPI][AutoTVM] NHWC conv2d templates for ARM

2019-11-21 Thread GitBox
jackwish commented on a change in pull request #3859: [TOPI][AutoTVM] NHWC 
conv2d templates for ARM
URL: https://github.com/apache/incubator-tvm/pull/3859#discussion_r349421208
 
 

 ##
 File path: topi/python/topi/arm_cpu/conv2d_spatial_pack.py
 ##
 @@ -196,3 +196,160 @@ def schedule_conv2d_spatial_pack_nchw(cfg, s, data_vec, 
kernel_vec,
 s[kernel_vec].parallel(co)
 
 return s
+
+def conv2d_spatial_pack_nhwc(cfg, data, kernel, strides, padding, dilation, 
out_dtype):
+"""Spatial pack compute for Conv2d NHWC"""
+out_dtype = out_dtype or data.dtype
+
+N, IH, IW, IC = get_const_tuple(data.shape)
+assert len(kernel.shape) == 4, "AlterOpLayout not enabled for NHWC yet"
+KH, KW, _, OC = get_const_tuple(kernel.shape)
+
+if isinstance(dilation, int):
+dilation_h = dilation_w = dilation
+else:
+dilation_h, dilation_w = dilation
+
+dilated_kernel_h = (KH - 1) * dilation_h + 1
+dilated_kernel_w = (KW - 1) * dilation_w + 1
+
+pad_top, pad_left, pad_down, pad_right = \
+get_pad_tuple(padding, (dilated_kernel_h, dilated_kernel_w))
+HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, 
strides)
+
+OH = (IH + pad_top + pad_down - dilated_kernel_h) // HSTR + 1
+OW = (IW + pad_left + pad_right - dilated_kernel_w) // WSTR + 1
+data_pad = nn.pad(data, [0, pad_top, pad_left, 0], [0, pad_down, 
pad_right, 0])
+
+#  define configuration space 
+n, oc, oh, ow = cfg.axis(N), cfg.axis(OC), cfg.axis(OH), cfg.axis(OW)
+ic, kh, kw = cfg.reduce_axis(IC), cfg.reduce_axis(KH), cfg.reduce_axis(KW)
+
+oco, oci = cfg.define_split('tile_co', oc, num_outputs=2)
+oho, ohi = cfg.define_split('tile_oh', oh, num_outputs=2)
+owo, owi = cfg.define_split('tile_ow', ow, num_outputs=2)
+
+cfg.define_reorder('reorder_conv',
+   [n, oho, owo, oco, kh, kw, ic, ohi, owi, oci],
+   policy='candidate', candidate=[
+   [n, oho, owo, oco, kh, kw, ic, ohi, owi, oci],
+   [n, oho, owo, oco, ohi, kh, kw, ic, owi, oci],
+   [n, oho, owo, oco, ohi, kh, kw, owi, ic, oci],
+   [n, oho, owo, ohi, oco, kh, kw, owi, ic, oci]])
+
+cfg.define_annotate("ann_reduce", [kh, kw], policy='try_unroll')
+cfg.define_annotate("ann_spatial", [ohi, owi, oci], 
policy='try_unroll_vec')
+# 
+
+OCI = cfg['tile_co'].size[-1]
+OHI = cfg['tile_oh'].size[-1]
+OWI = cfg['tile_ow'].size[-1]
+OCO = OC // OCI
+OHO = OH // OHI
+OWO = OW // OWI
+
+kvshape = (OCO, KH, KW, IC, OCI)
+ovshape = (N, OHO, OWO, OCO, OHI, OWI, OCI)
+oshape = (N, OH, OW, OC)
+
+if dilation_h != 1 or dilation_w != 1:
+# undilate input data
+dvshape = (N, OHO, OWO, KH, KW, IC, OHI, OWI)
+data_vec = tvm.compute(dvshape, lambda n, oho, owo, kh, kw, ic, ohi, 
owi:
+   data_pad[n][(oho*OHI+ohi)*HSTR+kh*dilation_h]
+   [(owo*OWI+owi)*WSTR+kw*dilation_w][ic],
+   name='data_vec_undilated')
+else:
+dvshape = (N, OHO, OWO, KH + (OHI-1)*HSTR, KW + (OWI-1)*WSTR, IC)
+data_vec = tvm.compute(dvshape, lambda n, oho, owo, ohi, owi, ic:
+   
data_pad[n][oho*OHI*HSTR+ohi][owo*OWI*WSTR+owi][ic],
+   name='data_vec')
+kernel_vec = tvm.compute(kvshape, lambda oco, kh, kw, ic, oci: \
+ kernel[kh][kw][ic][oco*OCI+oci],
+ name='kernel_vec')
+
+ic = tvm.reduce_axis((0, IC), name='ic')
+kh = tvm.reduce_axis((0, KH), name='kh')
+kw = tvm.reduce_axis((0, KW), name='kw')
+
+if dilation_h != 1 or dilation_w != 1:
+conv = tvm.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \
+tvm.sum(data_vec[n, oho, owo, kh, kw, ohi, owi, 
ic].astype(out_dtype) *
+kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype),
+axis=[ic, kh, kw]), name='conv')
+else:
+conv = tvm.compute(ovshape, lambda n, oho, owo, oco, ohi, owi, oci: \
+tvm.sum(data_vec[n, oho, owo, ohi*HSTR+kh, owi*WSTR+kw, 
ic].astype(out_dtype) *
+kernel_vec[oco, kh, kw, ic, oci].astype(out_dtype),
+axis=[ic, kh, kw]), name='conv')
+
+output = tvm.compute(oshape, lambda n, oho, owo, oc:
+ 
conv[n][oho//OHI][owo//OWI][oc//OCI][oho%OHI][owo%OWI][oc%OCI],
+ name='output_unpack', tag='spatial_conv_output_NHWC')
+return output
+
+def schedule_conv2d_spatial_pack_nhwc(cfg, s, op, output):
+"""Spatial Pack schedule for Conv2d NHWC"""
+unpack = op.output(0)
+conv = unpack.op.input_tensors[0]
+data_vec =