[TVM Discuss] [Questions] [AutoTVM]Tuning fails for an ONNX network on x86 CPU in tune_graph KeyError: ‘topi_op’

Kevin Wan via TVM Discuss Tue, 26 May 2020 22:58:05 -0700


Hi, i have meet the KeyError when I try to tune a new model SINet 
(https://arxiv.org/abs/1911.09099) in the step of tune_graph after 
tune_kernels, following the instructure.


I have successfully transfer the model from pytorch to ONNX (i tried 
opset_version=10 with nearest upsample op but get error on compile step by 
relay.frontend.from_onnx. But compile success with opset_version=11). 
I also evaluated that the result is same on pytorch, onnxruntime and tvm, shown 
the transformation from pytorch to onnx to IR is no error. 
The environment info is shown behind:
System: Ubuntu 18.04
tvm version: 0.7 dev1
python: 3.6.9
pytorch: 1.4.0+cu100
onnx: 1.4.0
model (.onnx file): 
https://drive.google.com/open?id=1HycNYFMyMVgXTM7ie6ujOeQMLdF-QO5r

my code (hardly change the code from tutorial)
```
import numpy as np
import tvm, os
from tvm import relay
import cv2
from tvm.contrib import graph_runtime as runtime
import onnx
import my_utils
from tvm import autotvm
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
from tvm.autotvm.graph_tuner import DPTuner, PBQPTuner
from tvm.contrib import util


num_threads = 6
os.environ["TVM_NUM_THREADS"] = str(num_threads)
model_name = "Dnc_SINet"
img_path = "../test_img/0.png"
model_file = "../Dnc_SINet.onnx"
w = 320
h = 256
batch_size = 1
dtype = 'float32'
target = "llvm"
target_host = 'llvm'
input_name = "input.1"
output_name = "962"
input_shape = (batch_size,3,h,w)
ctx = tvm.cpu(0)
log_file = "%s.log" % model_name
graph_opt_sch_file = "%s_graph_opt.log" % model_name

tuning_option = {
    'log_filename': log_file,
    'tuner': 'random',
    'early_stopping': None,
    'measure_option': autotvm.measure_option(
        builder=autotvm.LocalBuilder(),
        runner=autotvm.LocalRunner(number=10, repeat=1,
                                   min_repeat_ms=1000),
    ),
}

# function for turn kernels
def tune_kernels(tasks,measure_option,
                 tuner='gridsearch',
                 early_stopping=None,
                 log_filename='tuning.log'):

    for i, task in enumerate(tasks):
        prefix = "[Task %2d/%2d] " % (i+1, len(tasks))

        # create tuner
        if tuner == 'xgb' or tuner == 'xgb-rank':
            tuner_obj = XGBTuner(task, loss_type='rank')
        elif tuner == 'ga':
            tuner_obj = GATuner(task, pop_size=50)
        elif tuner == 'random':
            tuner_obj = RandomTuner(task)
        elif tuner == 'gridsearch':
            tuner_obj = GridSearchTuner(task)
        else:
            raise ValueError("Invalid tuner: " + tuner)

        # do tuning
        n_trial=len(task.config_space)
        tuner_obj.tune(n_trial=n_trial,
                       early_stopping=early_stopping,
                       measure_option=measure_option,
                       callbacks=[
                           autotvm.callback.progress_bar(n_trial, 
prefix=prefix),
                           autotvm.callback.log_to_file(log_filename)])

# Use graph tuner to achieve graph level optimal schedules
# Set use_DP=False if it takes too long to finish.
def tune_graph(graph, dshape, records, opt_sch_file, use_DP=True):
    target_op = [relay.op.get("nn.conv2d"),]
    Tuner = DPTuner if use_DP else PBQPTuner
    print(dshape)
    executor = Tuner(graph, {input_name: dshape}, records, target_op, target)
    executor.benchmark_layout_transform(min_exec_num=2000)
    executor.run()
    executor.write_opt_sch2record_file(opt_sch_file)


def tune_and_evaluate(tuning_opt,mod, params, data_shape,target = "llvm"):
    # extract workloads from relay program
    print("Extract tasks...")
    tasks = autotvm.task.extract_from_program(mod["main"], target=target,
                                              params=params,
                                              ops=(relay.op.get("nn.conv2d"),))

    # run tuning tasks
    tune_kernels(tasks, **tuning_opt) # turn kernel

    """some bug on tune_graph for SINet: 
    File 
"/home/qqai-cv/yexing/my_python/lib/python3.6/site-packages/tvm/autotvm/graph_tuner/base_graph_tuner.py",
 line 269, in _iterate_layout_transform
    i_topi_op = in_node_entry["topi_op"][0]

    KeyError: 'topi_op'
    """
    tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file, False) # 
turn graph

    # compile kernels with graph-level best records
    with autotvm.apply_graph_best(graph_opt_sch_file):
        print("Compile...")
        with relay.build_config(opt_level=4):
            graph, lib, params = relay.build_module.build(
                mod, target=target, params=params)

        # upload parameters to device
        ctx = tvm.cpu(0)
        data_tvm = 
tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
        module = runtime.create(graph, lib, ctx)
        module.set_input(input_name, data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))

        return graph, lib, params

#######################################################################


# load test image
img = cv2.imread(img_path)
input = my_utils.preprocess(img,w,h)
print("input shape:{}".format(input.shape))
input_array = tvm.nd.array(input.astype(dtype))

# load onnx model
onnx_model = onnx.load(model_file)

# compile model by relay
shape_dict = {input_name: input_shape}
mod,params = relay.frontend.from_onnx(onnx_model,shape_dict,dtype)
meta_file = "./Dnc_SINet.meta"
mf = open(meta_file,'w')
print(mod.astext(show_meta_data=False),file=mf)
mf.close()

print("#######################################################################")

# Auto-tune
graph, lib, params= 
tune_and_evaluate(tuning_option,mod,params,input_shape,target=target)

# save the graph, lib and params into separate files
temp = util.tempdir("./model_x86")
path_lib = temp.relpath("DncSINet_lib.tar")
lib.export_library(path_lib)
with open(temp.relpath("DncSINet_graph.json"), "w") as fo:
    fo.write(graph)
with open(temp.relpath("DncSINet_param.params"), "wb") as fo:
    fo.write(relay.save_param_dict(params))
print(temp.listdir())

# load the module back.
loaded_graph = open(temp.relpath("DncSINet_graph.json")).read()
loaded_lib = tvm.runtime.load_module(path_lib)
loaded_params = bytearray(open(temp.relpath("DncSINet_param.params"), 
"rb").read())


module = runtime.create(loaded_graph, loaded_lib, ctx)
module.load_params(loaded_params)
module.set_input(input_name, input_array)
module.run()
tvm_output = module.get_output(0)
print(tvm_output)

# onnxruntime for evaluation
import onnxruntime as ort
from onnxruntime.capi import _pybind_state as C
so = ort.SessionOptions()
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
so.execution_mode = ort.ExecutionMode.ORT_PARALLEL
so.intra_op_num_threads = 2
so.inter_op_num_threads = 2
ort_sess = ort.InferenceSession("../Dnc_SINet.onnx",sess_options=so)
print(C.get_available_providers())
ort_sess.set_providers(["CPUExecutionProvider"])

onnx_out = ort_sess.run(None, {'input.1': input})
print("ONNX_output")
print(onnx_out)
print()

```



Further, the tune_kernel step seems run without erro and output the log 
Dnc_SINet.log
```
{"input": ["llvm", "conv2d_NCHWc.x86", [["TENSOR", [1, 2, 256, 320], 
"float32"], ["TENSOR", [2, 2, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], 
"NCHW", "NCHW", "float32"], {}], "config": {"index": 54, "code_hash": null, 
"entity": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 2]], ["tile_ow", 
"sp", [-1, 2]], ["unroll_kw", "ot", false]]}, "result": 
[[0.00013595731521333498], 0, 1.8156261444091797, 1590487913.6498568], 
"version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm", "conv2d_NCHWc.x86", [["TENSOR", [1, 2, 256, 320], 
"float32"], ["TENSOR", [2, 2, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], 
"NCHW", "NCHW", "float32"], {}], "config": {"index": 10, "code_hash": null, 
"entity": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 2]], ["tile_ow", 
"sp", [-1, 2]], ["unroll_kw", "ot", true]]}, "result": 
[[0.00013540219174497443], 0, 1.3245575428009033, 1590487914.9348269], 
"version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm", "conv2d_NCHWc.x86", [["TENSOR", [1, 2, 256, 320], 
"float32"], ["TENSOR", [2, 2, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], 
"NCHW", "NCHW", "float32"], {}], "config": {"index": 70, "code_hash": null, 
"entity": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 2]], ["tile_ow", 
"sp", [-1, 10]], ["unroll_kw", "ot", false]]}, "result": 
[[0.00015527120916877555], 0, 3.4014461040496826, 1590487918.2519333], 
"version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm", "conv2d_NCHWc.x86", [["TENSOR", [1, 2, 256, 320], 
"float32"], ["TENSOR", [2, 2, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], 
"NCHW", "NCHW", "float32"], {}], "config": {"index": 6, "code_hash": null, 
"entity": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 2]], ["tile_ow", 
"sp", [-1, 1]], ["unroll_kw", "ot", true]]}, "result": 
[[0.0001641825172311788], 0, 1.314802885055542, 1590487919.5376344], "version": 
0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm", "conv2d_NCHWc.x86", [["TENSOR", [1, 2, 256, 320], 
"float32"], ["TENSOR", [2, 2, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], 
"NCHW", "NCHW", "float32"], {}], "config": {"index": 36, "code_hash": null, 
"entity": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", 
"sp", [-1, 32]], ["unroll_kw", "ot", true]]}, "result": 
[[0.00014663836520854528], 0, 3.4310293197631836, 1590487922.846251], 
"version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm", "conv2d_NCHWc.x86", [["TENSOR", [1, 2, 256, 320], 
"float32"], ["TENSOR", [2, 2, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], 
"NCHW", "NCHW", "float32"], {}], "config": {"index": 9, "code_hash": null, 
"entity": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", 
"sp", [-1, 2]], ["unroll_kw", "ot", true]]}, "result": 
[[0.00018114181677483774], 0, 1.3032824993133545, 1590487924.0816596], 
"version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm", "conv2d_NCHWc.x86", [["TENSOR", [1, 2, 256, 320], 
"float32"], ["TENSOR", [2, 2, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], 
"NCHW", "NCHW", "float32"], {}], "config": {"index": 68, "code_hash": null, 
"entity": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", 
"sp", [-1, 10]], ["unroll_kw", "ot", false]]}, "result": 
[[0.00011425483953435289], 0, 1.730971097946167, 1590487925.7408793], 
"version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm", "conv2d_NCHWc.x86", [["TENSOR", [1, 2, 256, 320], 
"float32"], ["TENSOR", [2, 2, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], 
"NCHW", "NCHW", "float32"], {}], "config": {"index": 81, "code_hash": null, 
"entity": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", 
"sp", [-1, 32]], ["unroll_kw", "ot", false]]}, "result": 
[[0.00012996511115645148], 0, 3.3890671730041504, 1590487929.0349448], 
"version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm", "conv2d_NCHWc.x86", [["TENSOR", [1, 2, 256, 320], 
"float32"], ["TENSOR", [2, 2, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], 
"NCHW", "NCHW", "float32"], {}], "config": {"index": 37, "code_hash": null, 
"entity": [["tile_ic", "sp", [-1, 2]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", 
"sp", [-1, 32]], ["unroll_kw", "ot", true]]}, "result": 
[[0.00011664882574600265], 0, 1.3587851524353027, 1590487930.3270526], 
"version": 0.2, "tvm_version": "0.7.dev1"}
{"input": ["llvm", "conv2d_NCHWc.x86", [["TENSOR", [1, 2, 256, 320], 
"float32"], ["TENSOR", [2, 2, 3, 3], "float32"], [1, 1], [1, 1, 1, 1], [1, 1], 
"NCHW", "NCHW", "float32"], {}], "config": {"index": 40, "code_hash": null, 
"entity": [["tile_ic", "sp", [-1, 1]], ["tile_oc", "sp", [-1, 1]], ["tile_ow", 
"sp", [-1, 40]], ["unroll_kw", "ot", true]]}, "result": 
[[0.00014529477851287214], 0, 1.8773386478424072, 1590487932.0300732], 
"version": 0.2, "tvm_version": "0.7.dev1"}
...
```
I check the correctness of shape of convolution in my IR, which are all NCHW 
(same as onnx and pytorch), so the reason of KeyError is different
I have also check the log file and all is 0 but not 2/3/4.





---
[Visit 
Topic](https://discuss.tvm.ai/t/autotvm-tuning-fails-for-an-onnx-network-on-x86-cpu-in-tune-graph-keyerror-topi-op/6813/2)
 to respond.

You are receiving this because you enabled mailing list mode.

To unsubscribe from these emails, [click 
here](https://discuss.tvm.ai/email/unsubscribe/9bf618be8eb3dfbdcd262e004fe242caf6a1448d3af62f114c651aaa1de4819b).

[TVM Discuss] [Questions] [AutoTVM]Tuning fails for an ONNX network on x86 CPU in tune_graph KeyError: ‘topi_op’

Reply via email to