[GitHub] [tvm] islavutin commented on issue #9242: [Bug] Fine tuned OpenCL gives incorrect outputs

GitBox Tue, 30 Nov 2021 05:57:57 -0800


islavutin commented on issue #9242:
URL: https://github.com/apache/tvm/issues/9242#issuecomment-982660399



   I slightly modified your script and getting very much close output by 
running on CPU and on Adreno GPU
   ```
   maximum element difference: 5.4836273193359375e-06, l2 diff: 
4.9437097914051265e-05
   +-------------+--------------------------------+----------+-------------+
   | TVM Results |             Label              | Class ID | Probability |
   +-------------+--------------------------------+----------+-------------+
   |     Top1    |           tiger cat            |   282    |    835.40   |
   |     Top2    |          Egyptian cat          |   285    |    738.12   |
   |     Top3    |        tabby, tabby cat        |   281    |    726.89   |
   |     Top4    |    kit fox, Vulpes macrotis    |   278    |    601.60   |
   |     Top5    | Pembroke, Pembroke Welsh corgi |   263    |    598.17   |
   +-------------+--------------------------------+----------+-------------+
   
+-------------------+--------------------------------+----------+-------------+
   | TVM Results (ref) |             Label              | Class ID | 
Probability |
   
+-------------------+--------------------------------+----------+-------------+
   |        Top1       |           tiger cat            |   282    |    835.40  
 |
   |        Top2       |          Egyptian cat          |   285    |    738.12  
 |
   |        Top3       |        tabby, tabby cat        |   281    |    726.89  
 |
   |        Top4       |    kit fox, Vulpes macrotis    |   278    |    601.60  
 |
   |        Top5       | Pembroke, Pembroke Welsh corgi |   263    |    598.17  
 |
   
+-------------------+--------------------------------+----------+-------------+
   ```
   
   
   Here is the modified code:
   
   
   ```
   import os
   
   import cv2
   import numpy as np
   import tvm
   from mxnet.gluon.model_zoo.vision import get_model
   from prettytable import PrettyTable
   from tvm import auto_scheduler, rpc
   from tvm import relay
   from tvm.contrib import utils, ndk, graph_executor
   from tvm.contrib.download import download_testdata
   
   def preprocess(img_data):
       mean_vec = np.array([0.485, 0.456, 0.406])
       stddev_vec = np.array([0.229, 0.224, 0.225])
       norm_img_data = np.zeros(img_data.shape).astype('float32')
   
       for i in range(img_data.shape[0]):
           # for each pixel in each channel, divide the value by 255 to get 
value between [0, 1] and then normalize
           norm_img_data[i, :, :] = (img_data[i, :, :] / 255.0 - mean_vec[i]) / 
stddev_vec[i]
       return norm_img_data
   
   
   def get_img(batch=1):
       img_url = 
"https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true";
       img_path = download_testdata(img_url, "cat.png", module="data")
   
       img = cv2.imread(img_path)
       img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
       img = cv2.resize(img, (224, 224))
       in_data = np.asarray(img[:, :])
   
       # hwc to chw
       in_data = in_data.transpose((2, 0, 1))
       shape = in_data.shape
       in_data = preprocess(in_data)
       in_data = np.broadcast_to(in_data.astype("float32"), shape=(batch, 
*shape))
       return in_data
   
   
   # Download class labels (from a separate file)
   def load_class_labels():
       synset_url = "".join(
           [
               "https://gist.githubusercontent.com/zhreshold/";,
               "4d0b62f3d01426887599d4f7ede23ee5/raw/",
               "596b27d23537e5a1b5751d2b0481ef172f58b539/",
               "imagenet1000_clsid_to_human.txt",
           ]
       )
       synset_name = "imagenet1000_clsid_to_human.txt"
       synset_path = download_testdata(synset_url, synset_name, module="data")
       with open(synset_path) as f:
           synset = eval(f.read())
   
       return synset
   
   
   if __name__ == "__main__":
       block = get_model("mobilenet1.0", pretrained=True)
       shape_dict = {"data": (1, 3, 224, 224)}
       mod, params = relay.frontend.from_mxnet(block, shape_dict)
   
       tracker_host = "0.0.0.0"
       tracker_port = 9191
       key = 'Android'
       os.environ[
           "TVM_NDK_CC"] = 
'/home/iliya/Android/Sdk/ndk/22.0.6917172/android-toolchain-arm64/bin/aarch64-linux-android-g++'
       output_file = 'auto_scheduled_model_mxnet.json'
   
       target = 'opencl -device=mali'
       target_host = 'llvm -mtriple=arm64-linux-android'
   
       tasks, task_weights = tvm.auto_scheduler.extract_tasks(mod['main'], 
params, target, target_host)
   
       tuner = tvm.auto_scheduler.TaskScheduler(tasks, task_weights)
   
       builder = tvm.auto_scheduler.LocalBuilder(build_func='ndk')
       runner = tvm.auto_scheduler.RPCRunner(key=key, host=tracker_host, 
port=tracker_port, priority=0, number=3, repeat=1)
       tune_options = tvm.auto_scheduler.TuningOptions(num_measure_trials=35,
                                                       builder=builder,
                                                       runner=runner,
                                                       
measure_callbacks=[tvm.auto_scheduler.RecordToFile(output_file)])
       tuner.tune(tune_options)
   
       with tvm.auto_scheduler.ApplyHistoryBest(output_file):
           with tvm.transform.PassContext(opt_level=3, 
config={"relay.backend.use_auto_scheduler": True}):
               lib = relay.build(mod, target=target, target_host=target_host, 
params=params)
   
       tracker = rpc.connect_tracker(tracker_host, tracker_port)
       remote = tracker.request(key, priority=0, session_timeout=0)
       device = remote.cl(0)
       tmp = utils.tempdir()
       lib_fname = tmp.relpath(f"net_gpu.so")
       fcompile = ndk.create_shared
       lib.export_library(lib_fname, fcompile)
       remote.upload(lib_fname)
       exported_lib = remote.load_module(f"net_gpu.so")
       module = graph_executor.GraphModule(exported_lib["default"](device))
   
       local_device_ref = tvm.device(str(tvm.target.Target('llvm')), 0)
       with tvm.transform.PassContext(opt_level=3):
           ref_lib = relay.build(mod, target=tvm.target.Target('llvm 
-mcpu=core-avx2'), params=params)
   
       unoptimized_module = 
graph_executor.GraphModule(ref_lib["default"](local_device_ref))
   
       dummy_input = {'data': get_img().astype("float32")}
       tvm_dummy = {key: tvm.nd.array(input) for key, input in 
dummy_input.items()}
   
       module.set_input(**tvm_dummy)
       module.run()
       module_output = module.get_output(0).asnumpy()[0]
   
       unoptimized_module.set_input(**tvm_dummy)
       unoptimized_module.run()
       unoptimized_module_output = unoptimized_module.get_output(0).asnumpy()[0]
       diff = np.abs(module_output - unoptimized_module_output)
   
       print(f'maximum element difference: {np.amax(diff)}, l2 diff: 
{np.linalg.norm(diff)}')
   
       # cut last 5 values after argsort and reverse
       ref_res5 = unoptimized_module_output.argsort()[-5:][::-1]
       res5 = module_output.argsort()[-5:][::-1]
   
       class_labels = load_class_labels()
   
       t = PrettyTable(['TVM Results', 'Label', 'Class ID', 'Probability'], 
float_format=".2")
       for i, idx in enumerate(res5):
           t.add_row([f"Top{i + 1}", class_labels[idx], idx, module_output[idx] 
* 100])
       print(t)
   
       t = PrettyTable(['TVM Results (ref)', 'Label', 'Class ID', 
'Probability'], float_format=".2")
       for i, idx in enumerate(ref_res5):
           t.add_row([f"Top{i + 1}", class_labels[idx], idx, 
unoptimized_module_output[idx] * 100])
       print(t)
   
   
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [tvm] islavutin commented on issue #9242: [Bug] Fine tuned OpenCL gives incorrect outputs

Reply via email to