[tvm] 01/02: update tune_conv2d_cuda.py

lmzheng Fri, 05 Feb 2021 14:01:41 -0800

This is an automated email from the ASF dual-hosted git repository.

lmzheng pushed a commit to branch custom_tile_size
in repository https://gitbox.apache.org/repos/asf/tvm.git


commit 9082cf81e473701ca6985cf78f6bf9ddf8869ea8
Author: Lianmin Zheng <[email protected]>
AuthorDate: Fri Feb 5 21:53:35 2021 +0000

    update tune_conv2d_cuda.py
---
 tutorials/autotvm/tune_conv2d_cuda.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/tutorials/autotvm/tune_conv2d_cuda.py 
b/tutorials/autotvm/tune_conv2d_cuda.py
index dc8e6e5..a00fe5f 100644
--- a/tutorials/autotvm/tune_conv2d_cuda.py
+++ b/tutorials/autotvm/tune_conv2d_cuda.py
@@ -58,6 +58,8 @@ from tvm.topi.testing import conv2d_nchw_python
 import tvm.testing
 
 from tvm import autotvm
+from tvm.autotvm.task.space import SplitEntity
+from tvm.autotvm.task.dispatcher import ApplyConfig
 
 ######################################################################
 # Step 1:  Define the search space
@@ -98,14 +100,14 @@ def conv2d_no_batching(N, H, W, CO, CI, KH, KW, stride, 
padding):
     rc, ry, rx = s[conv].op.reduce_axis
 
     cfg = autotvm.get_config()
-    cfg.define_split("tile_f", f, num_outputs=4)
-    cfg.define_split("tile_y", y, num_outputs=4)
-    cfg.define_split("tile_x", x, num_outputs=4)
-    cfg.define_split("tile_rc", rc, num_outputs=3)
-    cfg.define_split("tile_ry", ry, num_outputs=3)
-    cfg.define_split("tile_rx", rx, num_outputs=3)
-    cfg.define_knob("auto_unroll_max_step", [0, 512, 1500])
-    cfg.define_knob("unroll_explicit", [0, 1])
+    cfg.define_split("tile_f", f, num_outputs=4)          # filter / output 
channel
+    cfg.define_split("tile_y", y, num_outputs=4)          # height
+    cfg.define_split("tile_x", x, num_outputs=4)          # width
+    cfg.define_split("tile_rc", rc, num_outputs=3)        # input channel
+    cfg.define_split("tile_ry", ry, num_outputs=3)        # kernel width
+    cfg.define_split("tile_rx", rx, num_outputs=3)        # kernel height
+    cfg.define_knob("auto_unroll_max_step", [0])          # disable auto unroll
+    cfg.define_knob("unroll_explicit", [0])               # disable auto unroll
     ##### space definition end #####
 
     # inline padding
@@ -204,7 +206,7 @@ measure_option = autotvm.measure_option(
 # see many error reports. As long as you can see non-zero GFLOPS, it is okay.
 tuner = autotvm.tuner.XGBTuner(task)
 tuner.tune(
-    n_trial=20,
+    n_trial=100,
     measure_option=measure_option,
     callbacks=[autotvm.callback.log_to_file("conv2d.log")],
 )
@@ -216,11 +218,16 @@ tuner.tune(
 # inspect the best config
 dispatch_context = autotvm.apply_history_best("conv2d.log")
 best_config = dispatch_context.query(task.target, task.workload)
+
+# Plug your own tile sizes
+#best_config._entity_map['tile_f'] = SplitEntity([-1, 2, 8, 8])
+
 print("\nBest config:")
 print(best_config)
 
 # apply history best from log file
-with autotvm.apply_history_best("conv2d.log"):
+#with autotvm.apply_history_best("conv2d.log"):
+with ApplyConfig(best_config):
     with tvm.target.Target("cuda"):
         s, arg_bufs = conv2d_no_batching(N, H, W, CO, CI, KH, KW, strides, 
padding)
         func = tvm.build(s, arg_bufs)

[tvm] 01/02: update tune_conv2d_cuda.py

Reply via email to