crawlingcub opened a new issue #9818:
URL: https://github.com/apache/tvm/issues/9818


   Hi,
   
   I generated a model from `MobileNet-V3` by mutating some layers. When run 
with TVM, the model produces lower accuracy than PyTorch. However, I expected 
the results to be exactly same.
   
   To reproduce the problem, download the model and data from this 
[link](https://drive.google.com/drive/folders/1_AOA-9hA1I92vBUeQMbZ6F-2r69_aIrF?usp=sharing)
 and run the script below using:
   
   `python runscript.py model.pt data.pt`
   
   I suspect the bug can be due to the `BatchNorm2d` layer. In one of the 
mutations, I changed the BatchNorm2d config from ` {"momentum": 0.01, "affine": 
true, "eps": 0.001}` to `{"momentum": 0.19067323835610506, "affine": true, 
"eps": -1.0}`.
   
   Can the difference be due to this layer?
   
   Please let me know if there is any other info that you need.
   
   **Environment**:
   ```
   TVM installed from source
   Pytorch 1.9.1+cu111
   Python 3.7
   OS: Ubuntu 18.04
   Cuda 11.1
   GPUs: 8 NVidia GA100
   ```
   
   **Code to Reproduce**:
   ```python
   import torch
   from torch.utils.data import DataLoader
   import sys
   import numpy as np
   
   
   def accuracy(output, target, topk=(1,)): 
       with torch.no_grad():
           maxk = max(topk)
           batch_size = target.size(0)
   
           _, pred = output.topk(maxk, 1, True, True)
           pred = pred.t()
           correct = pred.eq(target.view(1, -1).expand_as(pred))
   
           res = []
           for k in topk:
               correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
               res.append(correct_k.mul_(100.0 / batch_size))
           return res
   
   
   def eval_model_tvm(model, dataset, device, batch_size):
       import tvm
       from tvm import relay
       from tvm.contrib.download import download_testdata
       from tvm.contrib import graph_executor
       import logging
       logger = logging.getLogger('compile_engine')
       logger.setLevel(logging.ERROR)
       
       validation_dataloader = DataLoader(dataset, batch_size=batch_size, 
shuffle=False)
       if "cpu" in device.lower():
           target = tvm.target.Target("llvm", host="llvm")
       else:
           target = tvm.target.cuda()
       print("target", target)
       dev = tvm.device(str(target))
       model = model.to("cpu")
       model.eval()
       mod = None
       lib = None
       acc1s = []
       acc5s = []
       for i, (images, targets) in enumerate(validation_dataloader):
           input_name = "input0"
           if mod is None:
               scripted_model = torch.jit.trace(model, images).eval()
               print("scripted")
               input_data = np.array([images[i].data.numpy() for i in 
range(len(images))], dtype="float32")
               shape_list = [(input_name, input_data.shape)]
               mod, params = relay.frontend.from_pytorch(scripted_model, 
shape_list)
               
               with tvm.transform.PassContext(opt_level=3):
                   lib = relay.build(mod, target=target, params=params)
                   
           m = graph_executor.GraphModule(lib["default"](dev))
           m.set_input(input_name, tvm.nd.array(images))
           m.run()
           output = torch.tensor(m.get_output(0).asnumpy())
           acc1, acc5 = accuracy(output, targets, topk=(1, 5))
   
           acc1s.append(acc1.item())
           acc5s.append(acc5.item())
           
       
       return {'acc1': np.mean(acc1s), 'acc5': np.mean(acc5s)}
   
   
   def eval_model_vision(model, dataset, device, criterion, compute_metrics_fn, 
batch_size):
       print("Running validation...")
       from tqdm import tqdm
           
       if not isinstance(model, torch.nn.DataParallel):
           model = torch.nn.DataParallel(model)
       if not isinstance(dataset, DataLoader):
           validation_dataloader = DataLoader(dataset, batch_size=batch_size, 
shuffle=False, num_workers=8, pin_memory=True)
       else:
           validation_dataloader = dataset
       acc1s = []
       acc2s = []
       losses = []
       model.to(device)
       model.eval()
   
       with torch.no_grad():        
           for i, (images, target) in tqdm(enumerate(validation_dataloader), 
total=len(validation_dataloader)):
               # compute output
               images = images.to(device)
               target = target.to(device)
   
               output = model(images)
               loss = criterion(output, target)
   
               # measure accuracy and record loss
               acc1, acc5 = compute_metrics_fn(output, target, topk=(1, 5))
               acc1s.append(acc1.item())
               acc2s.append(acc5.item())
               losses.append(loss.item())
               #if i % 10 == 0:
               #    print(i, loss)
   
       return {'acc1': np.mean(acc1s), 'acc5': np.mean(acc2s)}
   
   model=sys.argv[1]
   data=sys.argv[2]
   
   DEVICE='cuda'
   model = torch.load(sys.argv[1])
   data=torch.load(sys.argv[2])
   
   res1=eval_model_vision(model, data, device=DEVICE, 
criterion=torch.nn.CrossEntropyLoss(), compute_metrics_fn=accuracy, 
batch_size=100)
   print(res1)
   
   res2= eval_model_tvm(model, data, DEVICE, batch_size=100)
   print(res2)
   ```
   
   **Actual output**:
   ```
   {'acc1': 0.3, 'acc5': 0.8}
   {'acc1': 0.0, 'acc5': 0.8}
   ```
   
   
   **Expected output**:
   Both results should be same


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to