hcms1994 commented on issue #14717: URL: https://github.com/apache/tvm/issues/14717#issuecomment-1521412690
Thank you for your reply, Due to the large size of the model file, I uploaded it to Baidu Netdisk Baidu Netdisk link: https://pan.baidu.com/s/1Td-WjIY8wvjOeO2HoDTsAQ password: ekql **The full repro script is as follows:** `import os import numpy as np import tvm from tvm import relay, autotvm import tvm.relay.testing from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner import tvm.contrib.graph_executor as runtime import onnx from tvm.relay.op.contrib.tensorrt import partition_for_tensorrt def get_tvm_cloud_model_network(): input_name0 = 'x2paddle_input0' input_shape0 = (1, 1, 16, 80) input_name1 = 'x2paddle_input1' input_shape1 = (1, 6, 192, 192) onnx_model = onnx.load_model('/home/jiyingyu/jyy_work/tvm_test/tvm_cloud_models/shu_zi_ren_v1_v2/v1_projects/shuziren_v1.onnx') # shape_dict = [{input_name0 : input_shape0}, {input_name1 : input_shape1}] shape_dict = {} shape_dict[input_name0] = input_shape0 shape_dict[input_name1] = input_shape1 mod, params = relay.frontend.from_onnx(onnx_model, shape = shape_dict) return mod, params, [input_shape0, input_shape1], {} #### DEVICE CONFIG #### target = tvm.target.cuda(arch="sm_52") dtype = "float32" def tune_and_evaluate(): # extract workloads from relay program print("Extract tasks...") mod, params, input_shape, out_shape = get_tvm_cloud_model_network() print("get network success...") mod, config = partition_for_tensorrt(mod, params) # compile kernels with history best records for i in range(1): print("Compile...") with tvm.transform.PassContext(opt_level=3): lib = relay.build_module.build(mod, target=target, params=params) lib.export_library("shuziren_v1_default_agx_cuda_tensorrt_3.so") # load parameters print("load parameters...") dev = tvm.device(str(target), 0) print(dev) print("device is right") module = runtime.GraphModule(lib["default"](dev)) print("module is right") data_tvm0 = tvm.nd.array((np.random.uniform(size=input_shape[0])).astype('float32')) module.set_input("x2paddle_input0", data_tvm0) data_tvm1 = tvm.nd.array((np.random.uniform(size=input_shape[1])).astype('float32')) module.set_input("x2paddle_input1", data_tvm1) # evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", dev, number=1, repeat=600) prof_res = np.array(ftimer().results) * 1000 # convert to millisecond print( "Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)) ) tune_and_evaluate()` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
