crawlingcub opened a new issue, #12524: URL: https://github.com/apache/tvm/issues/12524
Hi, I used a variant of pre-trained Alexnet model with a subset of cifar100 dataset. I am getting lower accuracy with TVM compared to PyTorch. I made two modifications to the original model. I changed a `ReLU` layer to `LogSigmoid` and added some noise to a `Conv2d` layer. I have more examples where using the `LogSigmoid` sometimes lead to different accuracy. Is this a bug? Converting from an onnx model also leads to same result. Please find the code/model/dataset below. Let me know if you need more info. Thanks! ### Expected behavior Accuracy should be same ### Actual behavior Accuracy with TVM is lower: (Acc1 and Acc5 numbers) ``` Running validation... Pytorch: 2.5 3.5 TVM: 0.1 1.9 ``` ### Environment torch 1.8.0+cu111 torchvision 0.9.0+cu111 TVM version: latest ecbe4ca0edadeca8fee4d0c2c9f7a9093043b5ee Python 3.7.12 ### Steps to reproduce Download the model and data files [here](https://drive.google.com/drive/folders/1_AOA-9hA1I92vBUeQMbZ6F-2r69_aIrF?usp=sharing) Python script to reproduce: ```python import sys import os import torch from torch.utils.data import DataLoader import sys from torchvision import datasets from torchvision.transforms import transforms import numpy as np import os import pickle import torch import metrics import sys def accuracy(output, target, topk=(1,)): """Computes the accuracy over the k top predictions for the specified values of k""" with torch.no_grad(): maxk = max(topk) batch_size = target.size(0) _, pred = output.topk(maxk, 1, True, True) pred = pred.t() correct = pred.eq(target.view(1, -1).expand_as(pred)) res = [] for k in topk: correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) res.append(correct_k.mul_(100.0 / batch_size)) return res def eval_model_tvm(model, dataset, device, batch_size): import tvm from tvm import relay from tvm.contrib.download import download_testdata from tvm.contrib import graph_executor import logging logger = logging.getLogger('compile_engine') logger.setLevel(logging.ERROR) validation_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False) if "cpu" in device.lower(): target = tvm.target.Target("llvm", host="llvm") else: target = tvm.target.cuda() print("target", target) dev = tvm.device(str(target)) model = model.to("cpu") model.eval() mod = None lib = None acc1s = [] acc5s = [] for i, (images, targets) in enumerate(validation_dataloader): input_name = "input0" if mod is None: scripted_model = torch.jit.trace(model, images).eval() print("scripted") input_data = np.array([images[i].data.numpy() for i in range(len(images))], dtype="float32") shape_list = [(input_name, input_data.shape)] mod, params = relay.frontend.from_pytorch(scripted_model, shape_list) with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, params=params) m = graph_executor.GraphModule(lib["default"](dev)) m.set_input(input_name, tvm.nd.array(images)) m.run() output = torch.tensor(m.get_output(0).asnumpy()) acc1, acc5 = accuracy(output, targets, topk=(1, 5)) acc1s.append(acc1.item()) acc5s.append(acc5.item()) return {'acc1': np.mean(acc1s), 'acc5': np.mean(acc5s)} def eval_model_vision(model, dataset, device, criterion, compute_metrics_fn, batch_size): print("Running validation...") from tqdm import tqdm if not isinstance(model, torch.nn.DataParallel): model = torch.nn.DataParallel(model) if not isinstance(dataset, DataLoader): validation_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True) else: validation_dataloader = dataset acc1s = [] acc2s = [] losses = [] model.to(device) model.eval() with torch.no_grad(): for i, (images, target) in tqdm(enumerate(validation_dataloader), total=len(validation_dataloader)): # compute output images = images.to(device) target = target.to(device) output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = compute_metrics_fn(output, target, topk=(1, 5)) acc1s.append(acc1.item()) acc2s.append(acc5.item()) losses.append(loss.item()) #if i % 10 == 0: # print(i, loss) return {'acc1': np.mean(acc1s), 'acc5': np.mean(acc2s)} if __name__ == '__main__': DEVICE='cuda' model=torch.load(sys.argv[1]) data=torch.load(sys.argv[2]) criterion=torch.nn.CrossEntropyLoss() batch_size=10 results_torch = eval_model_vision(model, data, device=DEVICE, criterion=criterion, compute_metrics_fn=accuracy, batch_size=batch_size) print(results_torch["acc1"], results_torch["acc5"]) res2= eval_model_tvm(model, data, DEVICE, batch_size=batch_size) print(res2["acc1"], res2["acc5"]) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
