(tvm) branch main updated: [Doc] Remove MxNet related tutorials (#16572)

tqchen Thu, 15 Feb 2024 05:08:13 -0800

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/main by this push:
     new 1a01102f87 [Doc] Remove MxNet related tutorials (#16572)
1a01102f87 is described below

commit 1a01102f876adec6f66c4a210e688601728fcd5a
Author: Siyuan Feng <[email protected]>
AuthorDate: Thu Feb 15 21:07:20 2024 +0800

    [Doc] Remove MxNet related tutorials (#16572)
    
    * [Doc] Remove MxNet related tutorials
    
    As mxnet is retired, we remove related tutorials
    and scripts first.
    
    We will also remove mxnet frontend support in the future
---
 apps/benchmark/adreno/adreno_gpu_bench_clml.py     |  11 -
 apps/benchmark/adreno/adreno_gpu_bench_texture.py  |  11 -
 apps/benchmark/util.py                             |  11 -
 docs/conf.py                                       |   1 -
 gallery/how_to/compile_models/from_mxnet.py        | 153 ------
 .../how_to/deploy_models/deploy_model_on_nano.py   |  30 +-
 .../how_to/deploy_models/deploy_model_on_rasp.py   |  32 +-
 gallery/how_to/deploy_models/deploy_quantized.py   | 172 -------
 .../how_to/extend_tvm/bring_your_own_datatypes.py  | 415 ----------------
 .../tune_with_autoscheduler/tune_network_arm.py    |  23 +-
 .../tune_with_autoscheduler/tune_network_cuda.py   |  20 +-
 .../tune_with_autoscheduler/tune_network_mali.py   |  19 +-
 .../tune_with_autoscheduler/tune_network_x86.py    |  31 +-
 gallery/how_to/tune_with_autotvm/tune_relay_arm.py |  11 -
 .../how_to/tune_with_autotvm/tune_relay_cuda.py    |  11 -
 .../tune_with_autotvm/tune_relay_mobile_gpu.py     |  11 -
 gallery/how_to/tune_with_autotvm/tune_relay_x86.py |  11 -
 vta/scripts/tune_resnet.py                         | 377 ---------------
 vta/tutorials/autotvm/README.txt                   |   2 -
 vta/tutorials/autotvm/tune_alu_vta.py              | 345 -------------
 vta/tutorials/autotvm/tune_relay_vta.py            | 538 ---------------------
 vta/tutorials/frontend/deploy_classification.py    | 313 ------------
 22 files changed, 43 insertions(+), 2505 deletions(-)

diff --git a/apps/benchmark/adreno/adreno_gpu_bench_clml.py 
b/apps/benchmark/adreno/adreno_gpu_bench_clml.py
index e045f60c3a..a7e2e5e9c2 100755
--- a/apps/benchmark/adreno/adreno_gpu_bench_clml.py
+++ b/apps/benchmark/adreno/adreno_gpu_bench_clml.py
@@ -84,17 +84,6 @@ def get_network(name, batch_size, dtype="float32"):
         net, params = testing.squeezenet.get_workload(
             batch_size=batch_size, version=version, dtype=dtype
         )
-    elif name == "mxnet":
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        block = get_model("resnet18_v1", pretrained=True)
-        net, params = relay.frontend.from_mxnet(block, shape={"data": 
input_shape}, dtype=dtype)
-        net = net["main"]
-        net = relay.Function(
-            net.params, relay.nn.softmax(net.body), None, net.type_params, 
net.attrs
-        )
-        net = tvm.IRModule.from_expr(net)
     else:
         raise ValueError("Unsupported network: " + name)
 
diff --git a/apps/benchmark/adreno/adreno_gpu_bench_texture.py 
b/apps/benchmark/adreno/adreno_gpu_bench_texture.py
index 7b10111186..5c4ee3bb6e 100755
--- a/apps/benchmark/adreno/adreno_gpu_bench_texture.py
+++ b/apps/benchmark/adreno/adreno_gpu_bench_texture.py
@@ -83,17 +83,6 @@ def get_network(name, batch_size, dtype="float32"):
         net, params = testing.squeezenet.get_workload(
             batch_size=batch_size, version=version, dtype=dtype
         )
-    elif name == "mxnet":
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        block = get_model("resnet18_v1", pretrained=True)
-        net, params = relay.frontend.from_mxnet(block, shape={"data": 
input_shape}, dtype=dtype)
-        net = net["main"]
-        net = relay.Function(
-            net.params, relay.nn.softmax(net.body), None, net.type_params, 
net.attrs
-        )
-        net = tvm.IRModule.from_expr(net)
     else:
         raise ValueError("Unsupported network: " + name)
 
diff --git a/apps/benchmark/util.py b/apps/benchmark/util.py
index 01f0a11635..4e9bfa8d9e 100644
--- a/apps/benchmark/util.py
+++ b/apps/benchmark/util.py
@@ -72,17 +72,6 @@ def get_network(name, batch_size, dtype="float32"):
         net, params = testing.squeezenet.get_workload(
             batch_size=batch_size, version=version, dtype=dtype
         )
-    elif name == "mxnet":
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        block = get_model("resnet18_v1", pretrained=True)
-        net, params = relay.frontend.from_mxnet(block, shape={"data": 
input_shape}, dtype=dtype)
-        net = net["main"]
-        net = relay.Function(
-            net.params, relay.nn.softmax(net.body), None, net.type_params, 
net.attrs
-        )
-        net = tvm.IRModule.from_expr(net)
     else:
         raise ValueError("Unsupported network: " + name)
 
diff --git a/docs/conf.py b/docs/conf.py
index d599b4fdc0..553aaf8a92 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -441,7 +441,6 @@ subsection_order = ExplicitOrder(
     for p in [
         tvm_path / "vta" / "tutorials" / "frontend",
         tvm_path / "vta" / "tutorials" / "optimize",
-        tvm_path / "vta" / "tutorials" / "autotvm",
     ]
 )
 
diff --git a/gallery/how_to/compile_models/from_mxnet.py 
b/gallery/how_to/compile_models/from_mxnet.py
deleted file mode 100644
index 132f098d92..0000000000
--- a/gallery/how_to/compile_models/from_mxnet.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-from-mxnet:
-
-Compile MXNet Models
-====================
-**Author**: `Joshua Z. Zhang <https://zhreshold.github.io/>`_, \
-            `Kazutaka Morita <https://github.com/kazum>`_
-
-This article is an introductory tutorial to deploy mxnet models with Relay. To 
begin, we must install `mxnet`:
-
-.. code-block:: bash
-
-    %%shell
-    pip install mxnet
-
-or please refer to official installation guide.
-https://mxnet.apache.org/versions/master/install/index.html
-"""
-
-# some standard imports
-# sphinx_gallery_start_ignore
-# sphinx_gallery_requires_cuda = True
-# sphinx_gallery_end_ignore
-import sys
-import mxnet as mx
-import tvm
-import tvm.relay as relay
-import numpy as np
-
-######################################################################
-# Download Resnet18 model from Gluon Model Zoo
-# ---------------------------------------------
-# In this section, we download a pretrained imagenet model and classify an 
image.
-from tvm.contrib.download import download_testdata
-from mxnet.gluon.model_zoo.vision import get_model
-from PIL import Image
-from matplotlib import pyplot as plt
-
-try:
-    block = get_model("resnet18_v1", pretrained=True)
-except RuntimeError:
-    print("Downloads from mxnet no longer supported", file=sys.stderr)
-    sys.exit(0)
-
-img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true";
-img_name = "cat.png"
-synset_url = "".join(
-    [
-        "https://gist.githubusercontent.com/zhreshold/";,
-        "4d0b62f3d01426887599d4f7ede23ee5/raw/",
-        "596b27d23537e5a1b5751d2b0481ef172f58b539/",
-        "imagenet1000_clsid_to_human.txt",
-    ]
-)
-synset_name = "imagenet1000_clsid_to_human.txt"
-img_path = download_testdata(img_url, "cat.png", module="data")
-synset_path = download_testdata(synset_url, synset_name, module="data")
-with open(synset_path) as f:
-    synset = eval(f.read())
-image = Image.open(img_path).resize((224, 224))
-plt.imshow(image)
-plt.show()
-
-
-def transform_image(image):
-    image = np.array(image) - np.array([123.0, 117.0, 104.0])
-    image /= np.array([58.395, 57.12, 57.375])
-    image = image.transpose((2, 0, 1))
-    image = image[np.newaxis, :]
-    return image
-
-
-x = transform_image(image)
-print("x", x.shape)
-
-######################################################################
-# Compile the Graph
-# -----------------
-# Now we would like to port the Gluon model to a portable computational graph.
-# It's as easy as several lines.
-# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
-shape_dict = {"data": x.shape}
-mod, params = relay.frontend.from_mxnet(block, shape_dict)
-## we want a probability so add a softmax operator
-func = mod["main"]
-func = relay.Function(func.params, relay.nn.softmax(func.body), None, 
func.type_params, func.attrs)
-
-######################################################################
-# now compile the graph
-target = "cuda"
-with tvm.transform.PassContext(opt_level=3):
-    lib = relay.build(func, target, params=params)
-
-######################################################################
-# Execute the portable graph on TVM
-# ---------------------------------
-# Now, we would like to reproduce the same forward computation using TVM.
-from tvm.contrib import graph_executor
-
-dev = tvm.cuda(0)
-dtype = "float32"
-m = graph_executor.GraphModule(lib["default"](dev))
-# set inputs
-m.set_input("data", tvm.nd.array(x.astype(dtype)))
-# execute
-m.run()
-# get outputs
-tvm_output = m.get_output(0)
-top1 = np.argmax(tvm_output.numpy()[0])
-print("TVM prediction top-1:", top1, synset[top1])
-
-######################################################################
-# Use MXNet symbol with pretrained weights
-# ----------------------------------------
-# MXNet often use `arg_params` and `aux_params` to store network parameters
-# separately, here we show how to use these weights with existing API
-def block2symbol(block):
-    data = mx.sym.Variable("data")
-    sym = block(data)
-    args = {}
-    auxs = {}
-    for k, v in block.collect_params().items():
-        args[k] = mx.nd.array(v.data().asnumpy())
-    return sym, args, auxs
-
-
-mx_sym, args, auxs = block2symbol(block)
-# usually we would save/load it as checkpoint
-mx.model.save_checkpoint("resnet18_v1", 0, mx_sym, args, auxs)
-# there are 'resnet18_v1-0000.params' and 'resnet18_v1-symbol.json' on disk
-
-######################################################################
-# for a normal mxnet model, we start from here
-mx_sym, args, auxs = mx.model.load_checkpoint("resnet18_v1", 0)
-# now we use the same API to get Relay computation graph
-mod, relay_params = relay.frontend.from_mxnet(mx_sym, shape_dict, 
arg_params=args, aux_params=auxs)
-# repeat the same steps to run this model using TVM
diff --git a/gallery/how_to/deploy_models/deploy_model_on_nano.py 
b/gallery/how_to/deploy_models/deploy_model_on_nano.py
index 761187e2d7..a656159546 100644
--- a/gallery/how_to/deploy_models/deploy_model_on_nano.py
+++ b/gallery/how_to/deploy_models/deploy_model_on_nano.py
@@ -102,22 +102,22 @@ from tvm.contrib.download import download_testdata
 # -----------------------------
 # Back to the host machine, which should have a full TVM installed (with LLVM).
 #
-# We will use pre-trained model from
-# `MXNet Gluon model zoo 
<https://mxnet.apache.org/api/python/gluon/model_zoo.html>`_.
-# You can found more details about this part at tutorial 
:ref:`tutorial-from-mxnet`.
+# We will use pre-trained model from torchvision
 
-import sys
-
-from mxnet.gluon.model_zoo.vision import get_model
+import torch
+import torchvision
 from PIL import Image
 import numpy as np
 
 # one line to get the model
-try:
-    block = get_model("resnet18_v1", pretrained=True)
-except RuntimeError:
-    print("Downloads from mxnet no longer supported", file=sys.stderr)
-    sys.exit(0)
+model_name = "resnet18"
+model = getattr(torchvision.models, model_name)(pretrained=True)
+model = model.eval()
+
+# We grab the TorchScripted model via tracing
+input_shape = [1, 3, 224, 224]
+input_data = torch.randn(input_shape)
+scripted_model = torch.jit.trace(model, input_data).eval()
 
 ######################################################################
 # In order to test our model, here we download an image of cat and
@@ -158,9 +158,9 @@ with open(synset_path) as f:
 # Now we would like to port the Gluon model to a portable computational graph.
 # It's as easy as several lines.
 
-# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
-shape_dict = {"data": x.shape}
-mod, params = relay.frontend.from_mxnet(block, shape_dict)
+input_name = "input0"
+shape_list = [(input_name, x.shape)]
+mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
 # we want a probability so add a softmax operator
 func = mod["main"]
 func = relay.Function(func.params, relay.nn.softmax(func.body), None, 
func.type_params, func.attrs)
@@ -241,7 +241,7 @@ else:
 
 module = runtime.GraphModule(rlib["default"](dev))
 # set input data
-module.set_input("data", tvm.nd.array(x.astype("float32")))
+module.set_input(input_name, tvm.nd.array(x.astype("float32")))
 # run
 module.run()
 # get output
diff --git a/gallery/how_to/deploy_models/deploy_model_on_rasp.py 
b/gallery/how_to/deploy_models/deploy_model_on_rasp.py
index 5196ae9ce1..64f83dbbc0 100644
--- a/gallery/how_to/deploy_models/deploy_model_on_rasp.py
+++ b/gallery/how_to/deploy_models/deploy_model_on_rasp.py
@@ -95,22 +95,22 @@ from tvm.contrib.download import download_testdata
 # -----------------------------
 # Back to the host machine, which should have a full TVM installed (with LLVM).
 #
-# We will use pre-trained model from
-# `MXNet Gluon model zoo 
<https://mxnet.apache.org/api/python/gluon/model_zoo.html>`_.
-# You can found more details about this part at tutorial 
:ref:`tutorial-from-mxnet`.
+# We will use pre-trained model from torchvision
 
-import sys
-
-from mxnet.gluon.model_zoo.vision import get_model
+import torch
+import torchvision
 from PIL import Image
 import numpy as np
 
 # one line to get the model
-try:
-    block = get_model("resnet18_v1", pretrained=True)
-except RuntimeError:
-    print("Downloads from mxnet no longer supported", file=sys.stderr)
-    sys.exit(0)
+model_name = "resnet18"
+model = getattr(torchvision.models, model_name)(pretrained=True)
+model = model.eval()
+
+# We grab the TorchScripted model via tracing
+input_shape = [1, 3, 224, 224]
+input_data = torch.randn(input_shape)
+scripted_model = torch.jit.trace(model, input_data).eval()
 
 ######################################################################
 # In order to test our model, here we download an image of cat and
@@ -148,12 +148,12 @@ with open(synset_path) as f:
     synset = eval(f.read())
 
 ######################################################################
-# Now we would like to port the Gluon model to a portable computational graph.
+# Now we would like to port the PyTorch model to a portable computational 
graph.
 # It's as easy as several lines.
 
-# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
-shape_dict = {"data": x.shape}
-mod, params = relay.frontend.from_mxnet(block, shape_dict)
+input_name = "input0"
+shape_list = [(input_name, x.shape)]
+mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
 # we want a probability so add a softmax operator
 func = mod["main"]
 func = relay.Function(func.params, relay.nn.softmax(func.body), None, 
func.type_params, func.attrs)
@@ -226,7 +226,7 @@ rlib = remote.load_module("net.tar")
 dev = remote.cpu(0)
 module = runtime.GraphModule(rlib["default"](dev))
 # set input data
-module.set_input("data", tvm.nd.array(x.astype("float32")))
+module.set_input(input_name, tvm.nd.array(x.astype("float32")))
 # run
 module.run()
 # get output
diff --git a/gallery/how_to/deploy_models/deploy_quantized.py 
b/gallery/how_to/deploy_models/deploy_quantized.py
deleted file mode 100644
index 2cdb7da5f8..0000000000
--- a/gallery/how_to/deploy_models/deploy_quantized.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Deploy a Quantized Model on Cuda
-================================
-**Author**: `Wuwei Lin <https://github.com/vinx13>`_
-
-This article is an introductory tutorial of automatic quantization with TVM.
-Automatic quantization is one of the quantization modes in TVM. More details on
-the quantization story in TVM can be found
-`here <https://discuss.tvm.apache.org/t/quantization-story/3920>`_.
-In this tutorial, we will import a GluonCV pre-trained model on ImageNet to
-Relay, quantize the Relay model and then perform the inference.
-"""
-
-import logging
-import os
-import sys
-
-import tvm
-from tvm import te
-from tvm import relay
-import mxnet as mx
-from tvm.contrib.download import download_testdata
-from mxnet import gluon
-
-
-batch_size = 1
-model_name = "resnet18_v1"
-target = "cuda"
-dev = tvm.device(target)
-
-###############################################################################
-# Prepare the Dataset
-# -------------------
-# We will demonstrate how to prepare the calibration dataset for quantization.
-# We first download the validation set of ImageNet and pre-process the dataset.
-calibration_rec = download_testdata(
-    
"http://data.mxnet.io.s3-website-us-west-1.amazonaws.com/data/val_256_q90.rec";,
-    "val_256_q90.rec",
-)
-
-
-def get_val_data(num_workers=4):
-    mean_rgb = [123.68, 116.779, 103.939]
-    std_rgb = [58.393, 57.12, 57.375]
-
-    def batch_fn(batch):
-        return batch.data[0].asnumpy(), batch.label[0].asnumpy()
-
-    img_size = 299 if model_name == "inceptionv3" else 224
-    val_data = mx.io.ImageRecordIter(
-        path_imgrec=calibration_rec,
-        preprocess_threads=num_workers,
-        shuffle=False,
-        batch_size=batch_size,
-        resize=256,
-        data_shape=(3, img_size, img_size),
-        mean_r=mean_rgb[0],
-        mean_g=mean_rgb[1],
-        mean_b=mean_rgb[2],
-        std_r=std_rgb[0],
-        std_g=std_rgb[1],
-        std_b=std_rgb[2],
-    )
-    return val_data, batch_fn
-
-
-###############################################################################
-# The calibration dataset should be an iterable object. We define the
-# calibration dataset as a generator object in Python. In this tutorial, we
-# only use a few samples for calibration.
-
-calibration_samples = 10
-
-
-def calibrate_dataset():
-    val_data, batch_fn = get_val_data()
-    val_data.reset()
-    for i, batch in enumerate(val_data):
-        if i * batch_size >= calibration_samples:
-            break
-        data, _ = batch_fn(batch)
-        yield {"data": data}
-
-
-###############################################################################
-# Import the model
-# ----------------
-# We use the Relay MxNet frontend to import a model from the Gluon model zoo.
-def get_model():
-    gluon_model = gluon.model_zoo.vision.get_model(model_name, pretrained=True)
-    img_size = 299 if model_name == "inceptionv3" else 224
-    data_shape = (batch_size, 3, img_size, img_size)
-    mod, params = relay.frontend.from_mxnet(gluon_model, {"data": data_shape})
-    return mod, params
-
-
-###############################################################################
-# Quantize the Model
-# ------------------
-# In quantization, we need to find the scale for each weight and intermediate
-# feature map tensor of each layer.
-#
-# For weights, the scales are directly calculated based on the value of the
-# weights. Two modes are supported: `power2` and `max`. Both modes find the
-# maximum value within the weight tensor first. In `power2` mode, the maximum
-# is rounded down to power of two. If the scales of both weights and
-# intermediate feature maps are power of two, we can leverage bit shifting for
-# multiplications. This make it computationally more efficient. In `max` mode,
-# the maximum is used as the scale. Without rounding, `max` mode might have
-# better accuracy in some cases. When the scales are not powers of two, fixed
-# point multiplications will be used.
-#
-# For intermediate feature maps, we can find the scales with data-aware
-# quantization. Data-aware quantization takes a calibration dataset as the
-# input argument. Scales are calculated by minimizing the KL divergence between
-# distribution of activation before and after quantization.
-# Alternatively, we can also use pre-defined global scales. This saves the time
-# for calibration. But the accuracy might be impacted.
-
-
-def quantize(mod, params, data_aware):
-    if data_aware:
-        with relay.quantize.qconfig(calibrate_mode="kl_divergence", 
weight_scale="max"):
-            mod = relay.quantize.quantize(mod, params, 
dataset=calibrate_dataset())
-    else:
-        with relay.quantize.qconfig(calibrate_mode="global_scale", 
global_scale=8.0):
-            mod = relay.quantize.quantize(mod, params)
-    return mod
-
-
-###############################################################################
-# Run Inference
-# -------------
-# We create a Relay VM to build and execute the model.
-def run_inference(mod):
-    model = relay.create_executor("vm", mod, dev, target).evaluate()
-    val_data, batch_fn = get_val_data()
-    for i, batch in enumerate(val_data):
-        data, label = batch_fn(batch)
-        prediction = model(data)
-        if i > 10:  # only run inference on a few samples in this tutorial
-            break
-
-
-def main():
-    try:
-        mod, params = get_model()
-    except RuntimeError:
-        print("Downloads from mxnet no longer supported", file=sys.stderr)
-        return
-    mod = quantize(mod, params, data_aware=True)
-    run_inference(mod)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/gallery/how_to/extend_tvm/bring_your_own_datatypes.py 
b/gallery/how_to/extend_tvm/bring_your_own_datatypes.py
deleted file mode 100644
index e502aff3e0..0000000000
--- a/gallery/how_to/extend_tvm/bring_your_own_datatypes.py
+++ /dev/null
@@ -1,415 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Bring Your Own Datatypes to TVM
-===============================
-**Authors**: `Gus Smith <https://github.com/gussmith23>`_, `Andrew Liu 
<https://github.com/hypercubestart>`_
-
-In this tutorial, we will show you how to utilize the Bring Your Own Datatypes 
framework to use your own custom datatypes in TVM.
-Note that the Bring Your Own Datatypes framework currently only handles 
**software emulated versions of datatypes**.
-The framework does not support compiling for custom accelerator datatypes 
out-of-the-box.
-
-Datatype Libraries
-------------------
-
-The Bring Your Own Datatypes allows users to register their own datatype 
implementations alongside TVM's native datatypes (such as ``float``).
-In the wild, these datatype implementations often appear as libraries.
-For example:
-
-- `libposit <https://github.com/cjdelisle/libposit>`_, a posit library
-- `Stillwater Universal <https://github.com/stillwater-sc/universal>`_, a 
library with posits, fixed-point numbers, and other types
-- `SoftFloat <https://github.com/ucb-bar/berkeley-softfloat-3>`_, Berkeley's 
software implementation of IEEE 754 floating-point
-
-The Bring Your Own Datatypes enables users to plug these datatype 
implementations into TVM!
-
-In this section, we will use an example library we have already implemented, 
located at ``3rdparty/byodt/myfloat.cc``.
-This datatype, which we dubbed "myfloat", is really just a IEE-754 float 
under-the-hood, but it serves a useful example
-to show that any datatype can be used in the BYODT framework.
-
-Setup
------
-
-Since we do not use any 3rdparty library, there is no setup needed.
-
-If you would like to try this with your own datatype library, first bring the 
library's functions into the process space with ``CDLL``:
-
-.. code-block:: python
-
-    ctypes.CDLL('my-datatype-lib.so', ctypes.RTLD_GLOBAL)
-"""
-
-
-######################
-# A Simple TVM Program
-# --------------------
-#
-# We'll begin by writing a simple program in TVM; afterwards, we will re-write 
it to use custom datatypes.
-import sys
-
-import tvm
-from tvm import relay
-
-# Our basic program: Z = X + Y
-x = relay.var("x", shape=(3,), dtype="float32")
-y = relay.var("y", shape=(3,), dtype="float32")
-z = x + y
-program = relay.Function([x, y], z)
-module = tvm.IRModule.from_expr(program)
-
-######################################################################
-# Now, we create random inputs to feed into this program using numpy:
-
-import numpy as np
-
-np.random.seed(23)  # for reproducibility
-
-x_input = np.random.rand(3).astype("float32")
-y_input = np.random.rand(3).astype("float32")
-print("x: {}".format(x_input))
-print("y: {}".format(y_input))
-
-######################################################################
-# Finally, we're ready to run the program:
-
-z_output = relay.create_executor(mod=module).evaluate()(x_input, y_input)
-print("z: {}".format(z_output))
-
-######################################################################
-# Adding Custom Datatypes
-# -----------------------
-# Now, we will do the same, but we will use a custom datatype for our 
intermediate computation.
-#
-# We use the same input variables ``x`` and ``y`` as above, but before adding 
``x + y``, we first cast both ``x`` and ``y`` to a custom datatype via the 
``relay.cast(...)`` call.
-#
-# Note how we specify the custom datatype: we indicate it using the special 
``custom[...]`` syntax.
-# Additionally, note the "32" after the datatype: this is the bitwidth of the 
custom datatype. This tells TVM that each instance of ``myfloat`` is 32 bits 
wide.
-
-try:
-    with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-        x_myfloat = relay.cast(x, dtype="custom[myfloat]32")
-        y_myfloat = relay.cast(y, dtype="custom[myfloat]32")
-        z_myfloat = x_myfloat + y_myfloat
-        z = relay.cast(z_myfloat, dtype="float32")
-except tvm.TVMError as e:
-    # Print last line of error
-    print(str(e).split("\n")[-1])
-
-######################################################################
-# Trying to generate this program throws an error from TVM.
-# TVM does not know how to handle any custom datatype out of the box!
-# We first have to register the custom type with TVM, giving it a name and a 
type code:
-
-tvm.target.datatype.register("myfloat", 150)
-
-######################################################################
-# Note that the type code, 150, is currently chosen manually by the user.
-# See ``TVMTypeCode::kCustomBegin`` in `include/tvm/runtime/c_runtime_api.h 
<https://github.com/apache/tvm/blob/main/include/tvm/runtime/data_type.h>`_.
-# Now we can generate our program again:
-
-x_myfloat = relay.cast(x, dtype="custom[myfloat]32")
-y_myfloat = relay.cast(y, dtype="custom[myfloat]32")
-z_myfloat = x_myfloat + y_myfloat
-z = relay.cast(z_myfloat, dtype="float32")
-program = relay.Function([x, y], z)
-module = tvm.IRModule.from_expr(program)
-module = relay.transform.InferType()(module)
-
-######################################################################
-# Now we have a Relay program that uses myfloat!
-print(program)
-
-######################################################################
-# Now that we can express our program without errors, let's try running it!
-try:
-    with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-        z_output_myfloat = relay.create_executor("graph", 
mod=module).evaluate()(x_input, y_input)
-        print("z: {}".format(y_myfloat))
-except tvm.TVMError as e:
-    # Print last line of error
-    print(str(e).split("\n")[-1])
-
-######################################################################
-# Now, trying to compile this program throws an error.
-# Let's dissect this error.
-#
-# The error is occurring during the process of lowering the custom datatype 
code to code that TVM can compile and run.
-# TVM is telling us that it cannot find a *lowering function* for the ``Cast`` 
operation, when casting from source type 2 (``float``, in TVM), to destination 
type 150 (our custom datatype).
-# When lowering custom datatypes, if TVM encounters an operation over a custom 
datatype, it looks for a user-registered *lowering function*, which tells it 
how to lower the operation to an operation over datatypes it understands.
-# We have not told TVM how to lower ``Cast`` operations for our custom 
datatypes; thus, the source of this error.
-#
-# To fix this error, we simply need to specify a lowering function:
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func(
-        {
-            (32, 32): "FloatToCustom32",  # cast from float32 to myfloat32
-        }
-    ),
-    "Cast",
-    "llvm",
-    "float",
-    "myfloat",
-)
-
-######################################################################
-# The ``register_op(...)`` call takes a lowering function, and a number of 
parameters which specify exactly the operation which should be lowered with the 
provided lowering function.
-# In this case, the arguments we pass specify that this lowering function is 
for lowering a ``Cast`` from ``float`` to ``myfloat`` for target ``"llvm"``.
-#
-# The lowering function passed into this call is very general: it should take 
an operation of the specified type (in this case, `Cast`) and return another 
operation which only uses datatypes which TVM understands.
-#
-# In the general case, we expect users to implement operations over their 
custom datatypes using calls to an external library.
-# In our example, our ``myfloat`` library implements a ``Cast`` from ``float`` 
to 32-bit ``myfloat`` in the function ``FloatToCustom32``.
-# To provide for the general case, we have made a helper function, 
``create_lower_func(...)``,
-# which does just this: given a dictionary, it replaces the given operation 
with a ``Call`` to the appropriate function name provided based on the op and 
the bit widths.
-# It additionally removes usages of the custom datatype by storing the custom 
datatype in an opaque ``uint`` of the appropriate width; in our case, a 
``uint32_t``.
-# For more information, see `the source code 
<https://github.com/apache/tvm/blob/main/python/tvm/target/datatype.py>`_.
-
-# We can now re-try running the program:
-try:
-    with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-        z_output_myfloat = relay.create_executor("graph", 
mod=module).evaluate()(x_input, y_input)
-        print("z: {}".format(z_output_myfloat))
-except tvm.TVMError as e:
-    # Print last line of error
-    print(str(e).split("\n")[-1])
-
-######################################################################
-# This new error tells us that the ``Add`` lowering function is not found, 
which is good news, as it's no longer complaining about the ``Cast``!
-# We know what to do from here: we just need to register the lowering 
functions for the other operations in our program.
-#
-# Note that for ``Add``, ``create_lower_func`` takes in a dict where the key 
is an integer.
-# For ``Cast`` operations, we require a 2-tuple to specify the 
``src_bit_length`` and the ``dest_bit_length``,
-# while for all other operations, the bit length is the same between the 
operands so we only require one integer to specify ``bit_length``.
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Add"}),
-    "Add",
-    "llvm",
-    "myfloat",
-)
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({(32, 32): "Custom32ToFloat"}),
-    "Cast",
-    "llvm",
-    "myfloat",
-    "float",
-)
-
-# Now, we can run our program without errors.
-with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-    z_output_myfloat = relay.create_executor(mod=module).evaluate()(x_input, 
y_input)
-print("z: {}".format(z_output_myfloat))
-
-print("x:\t\t{}".format(x_input))
-print("y:\t\t{}".format(y_input))
-print("z (float32):\t{}".format(z_output))
-print("z (myfloat32):\t{}".format(z_output_myfloat))
-
-# Perhaps as expected, the ``myfloat32`` results and ``float32`` are exactly 
the same!
-
-######################################################################
-# Running Models With Custom Datatypes
-# ------------------------------------
-#
-# We will first choose the model which we would like to run with myfloat.
-# In this case we use `Mobilenet <https://arxiv.org/abs/1704.04861>`_.
-# We choose Mobilenet due to its small size.
-# In this alpha state of the Bring Your Own Datatypes framework, we have not 
implemented any software optimizations for running software emulations of 
custom datatypes; the result is poor performance due to many calls into our 
datatype emulation library.
-#
-# First let us define two helper functions to get the mobilenet model and a 
cat image.
-
-
-def get_mobilenet():
-    dshape = (1, 3, 224, 224)
-    from mxnet.gluon.model_zoo.vision import get_model
-
-    block = get_model("mobilenet0.25", pretrained=True)
-    shape_dict = {"data": dshape}
-    return relay.frontend.from_mxnet(block, shape_dict)
-
-
-def get_cat_image():
-    from tvm.contrib.download import download_testdata
-    from PIL import Image
-
-    url = 
"https://gist.githubusercontent.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/fa7ef0e9c9a5daea686d6473a62aacd1a5885849/cat.png";
-    dst = "cat.png"
-    real_dst = download_testdata(url, dst, module="data")
-    img = Image.open(real_dst).resize((224, 224))
-    # CoreML's standard model image format is BGR
-    img_bgr = np.array(img)[:, :, ::-1]
-    img = np.transpose(img_bgr, (2, 0, 1))[np.newaxis, :]
-    return np.asarray(img, dtype="float32")
-
-
-try:
-    module, params = get_mobilenet()
-except RuntimeError:
-    print("Downloads from mxnet no longer supported", file=sys.stderr)
-    sys.exit(0)
-
-######################################################################
-# It's easy to execute MobileNet with native TVM:
-
-ex = tvm.relay.create_executor("graph", mod=module, params=params)
-input = get_cat_image()
-result = ex.evaluate()(input).numpy()
-# print first 10 elements
-print(result.flatten()[:10])
-
-######################################################################
-# Now, we would like to change the model to use myfloat internally. To do so, 
we need to convert the network. To do this, we first define a function which 
will help us convert tensors:
-
-
-def convert_ndarray(dst_dtype, array):
-    """Converts an NDArray into the specified datatype"""
-    x = relay.var("x", shape=array.shape, dtype=str(array.dtype))
-    cast = relay.Function([x], x.astype(dst_dtype))
-    with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-        return relay.create_executor("graph").evaluate(cast)(array)
-
-
-######################################################################
-# Now, to actually convert the entire network, we have written `a pass in 
Relay 
<https://github.com/gussmith23/tvm/blob/ea174c01c54a2529e19ca71e125f5884e728da6e/python/tvm/relay/frontend/change_datatype.py#L21>`_
 which simply converts all nodes within the model to use the new datatype.
-
-from tvm.relay.frontend.change_datatype import ChangeDatatype
-
-src_dtype = "float32"
-dst_dtype = "custom[myfloat]32"
-
-module = relay.transform.InferType()(module)
-
-# Currently, custom datatypes only work if you run simplify_inference 
beforehand
-module = tvm.relay.transform.SimplifyInference()(module)
-
-# Run type inference before changing datatype
-module = tvm.relay.transform.InferType()(module)
-
-# Change datatype from float to myfloat and re-infer types
-cdtype = ChangeDatatype(src_dtype, dst_dtype)
-expr = cdtype.visit(module["main"])
-module = tvm.relay.transform.InferType()(module)
-
-# We also convert the parameters:
-params = {k: convert_ndarray(dst_dtype, v) for k, v in params.items()}
-
-# We also need to convert our input:
-input = convert_ndarray(dst_dtype, input)
-
-# Finally, we can try to run the converted model:
-try:
-    # Vectorization is not implemented with custom datatypes.
-    with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-        result_myfloat = tvm.relay.create_executor("graph", 
mod=module).evaluate(expr)(
-            input, **params
-        )
-except tvm.TVMError as e:
-    print(str(e).split("\n")[-1])
-
-######################################################################
-# When we attempt to run the model, we get a familiar error telling us that 
more functions need to be registered for myfloat.
-#
-# Because this is a neural network, many more operations are required.
-# Here, we register all the needed functions:
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "FloatToCustom32"}),
-    "FloatImm",
-    "llvm",
-    "myfloat",
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.lower_ite, "Call", "llvm", "myfloat", 
intrinsic_name="tir.if_then_else"
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.lower_call_pure_extern,
-    "Call",
-    "llvm",
-    "myfloat",
-    intrinsic_name="tir.call_pure_extern",
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Mul"}),
-    "Mul",
-    "llvm",
-    "myfloat",
-)
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Div"}),
-    "Div",
-    "llvm",
-    "myfloat",
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Sqrt"}),
-    "Call",
-    "llvm",
-    "myfloat",
-    intrinsic_name="tir.sqrt",
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Sub"}),
-    "Sub",
-    "llvm",
-    "myfloat",
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Exp"}),
-    "Call",
-    "llvm",
-    "myfloat",
-    intrinsic_name="tir.exp",
-)
-
-tvm.target.datatype.register_op(
-    tvm.target.datatype.create_lower_func({32: "Custom32Max"}),
-    "Max",
-    "llvm",
-    "myfloat",
-)
-
-tvm.target.datatype.register_min_func(
-    tvm.target.datatype.create_min_lower_func({32: "MinCustom32"}, "myfloat"),
-    "myfloat",
-)
-
-######################################################################
-# Note we are making use of two new functions: ``register_min_func`` and 
``create_min_lower_func``.
-#
-# ``register_min_func`` takes in an integer ``num_bits`` for the bit length, 
and should return an operation
-# representing the minimum finite representable value for the custom data type 
with the specified bit length.
-#
-# Similar to ``register_op`` and ``create_lower_func``, the 
``create_min_lower_func`` handles the general case
-# where the minimum representable custom datatype value is implemented using 
calls to an external library.
-#
-# Now we can finally run the model:
-
-# Vectorization is not implemented with custom datatypes.
-with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
-    result_myfloat = relay.create_executor(mod=module).evaluate(expr)(input, 
**params)
-    result_myfloat = convert_ndarray(src_dtype, result_myfloat).numpy()
-    # print first 10 elements
-    print(result_myfloat.flatten()[:10])
-
-# Again, note that the output using 32-bit myfloat exactly the same as 32-bit 
floats,
-# because myfloat is exactly a float!
-np.testing.assert_array_equal(result, result_myfloat)
diff --git a/gallery/how_to/tune_with_autoscheduler/tune_network_arm.py 
b/gallery/how_to/tune_with_autoscheduler/tune_network_arm.py
index 0b59038f19..d795c3aba2 100644
--- a/gallery/how_to/tune_with_autoscheduler/tune_network_arm.py
+++ b/gallery/how_to/tune_with_autoscheduler/tune_network_arm.py
@@ -120,19 +120,6 @@ def get_network(name, batch_size, layout="NHWC", 
dtype="float32", use_sparse=Fal
     elif name == "inception_v3":
         input_shape = (batch_size, 3, 299, 299) if layout == "NCHW" else 
(batch_size, 299, 299, 3)
         mod, params = 
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
-    elif name == "mxnet":
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        assert layout == "NCHW"
-
-        block = get_model("resnet50_v1", pretrained=True)
-        mod, params = relay.frontend.from_mxnet(block, shape={"data": 
input_shape}, dtype=dtype)
-        net = mod["main"]
-        net = relay.Function(
-            net.params, relay.nn.softmax(net.body), None, net.type_params, 
net.attrs
-        )
-        mod = tvm.IRModule.from_expr(net)
     elif name == "mlp":
         mod, params = relay.testing.mlp.get_workload(
             batch_size=batch_size, dtype=dtype, image_shape=image_shape, 
num_classes=1000
@@ -265,13 +252,9 @@ log_file = "%s-%s-B%d-%s.json" % (network, layout, 
batch_size, target.kind.name)
 
 # Extract tasks from the network
 print("Get model...")
-try:
-    mod, params, input_shape, output_shape = get_network(
-        network, batch_size, layout, dtype=dtype, use_sparse=use_sparse
-    )
-except RuntimeError:
-    print("Downloads from mxnet no longer supported", file=sys.stderr)
-    sys.exit(0)
+mod, params, input_shape, output_shape = get_network(
+    network, batch_size, layout, dtype=dtype, use_sparse=use_sparse
+)
 print("Extract tasks...")
 tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
 
diff --git a/gallery/how_to/tune_with_autoscheduler/tune_network_cuda.py 
b/gallery/how_to/tune_with_autoscheduler/tune_network_cuda.py
index 41e7e8fb41..1f8c0cc13a 100644
--- a/gallery/how_to/tune_with_autoscheduler/tune_network_cuda.py
+++ b/gallery/how_to/tune_with_autoscheduler/tune_network_cuda.py
@@ -114,20 +114,6 @@ def get_network(name, batch_size, layout="NHWC", 
dtype="float32"):
     elif name == "inception_v3":
         input_shape = (batch_size, 3, 299, 299) if layout == "NCHW" else 
(batch_size, 299, 299, 3)
         mod, params = 
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
-    elif name == "mxnet":
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        assert layout == "NCHW"
-
-        block = get_model("resnet18_v1", pretrained=True)
-        mod, params = relay.frontend.from_mxnet(block, shape={"data": 
input_shape}, dtype=dtype)
-        net = mod["main"]
-        net = relay.Function(
-            net.params, relay.nn.softmax(net.body), None, net.type_params, 
net.attrs
-        )
-        mod = tvm.IRModule.from_expr(net)
-
     return mod, params, input_shape, output_shape
 
 
@@ -152,11 +138,7 @@ log_file = "%s-%s-B%d-%s.json" % (network, layout, 
batch_size, target.kind.name)
 
 # Extract tasks from the network
 print("Extract tasks...")
-try:
-    mod, params, input_shape, output_shape = get_network(network, batch_size, 
layout, dtype=dtype)
-except RuntimeError:
-    print("Downloads from mxnet no longer supported", file=sys.stderr)
-    sys.exit(0)
+mod, params, input_shape, output_shape = get_network(network, batch_size, 
layout, dtype=dtype)
 tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
 
 for idx, task in enumerate(tasks):
diff --git a/gallery/how_to/tune_with_autoscheduler/tune_network_mali.py 
b/gallery/how_to/tune_with_autoscheduler/tune_network_mali.py
index 1c531a5303..15f3379013 100644
--- a/gallery/how_to/tune_with_autoscheduler/tune_network_mali.py
+++ b/gallery/how_to/tune_with_autoscheduler/tune_network_mali.py
@@ -117,19 +117,6 @@ def get_network(name, batch_size, layout="NHWC", 
dtype="float32"):
     elif name == "inception_v3":
         input_shape = (batch_size, 3, 299, 299) if layout == "NCHW" else 
(batch_size, 299, 299, 3)
         mod, params = 
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
-    elif name == "mxnet":
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        assert layout == "NCHW"
-
-        block = get_model("resnet50_v1", pretrained=True)
-        mod, params = relay.frontend.from_mxnet(block, shape={"data": 
input_shape}, dtype=dtype)
-        net = mod["main"]
-        net = relay.Function(
-            net.params, relay.nn.softmax(net.body), None, net.type_params, 
net.attrs
-        )
-        mod = tvm.IRModule.from_expr(net)
 
     return mod, params, input_shape, output_shape
 
@@ -171,11 +158,7 @@ device_key = "rk3399"
 
 # Extract tasks from the network
 print("Extract tasks...")
-try:
-    mod, params, input_shape, output_shape = get_network(network, batch_size, 
layout, dtype=dtype)
-except RuntimeError:
-    print("Downloads from mxnet no longer supported", file=sys.stderr)
-    sys.exit(0)
+mod, params, input_shape, output_shape = get_network(network, batch_size, 
layout, dtype=dtype)
 tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
 
 for idx, task in enumerate(tasks):
diff --git a/gallery/how_to/tune_with_autoscheduler/tune_network_x86.py 
b/gallery/how_to/tune_with_autoscheduler/tune_network_x86.py
index 96df3942ab..169567122f 100644
--- a/gallery/how_to/tune_with_autoscheduler/tune_network_x86.py
+++ b/gallery/how_to/tune_with_autoscheduler/tune_network_x86.py
@@ -117,19 +117,6 @@ def get_network(name, batch_size, layout="NHWC", 
dtype="float32", use_sparse=Fal
     elif name == "inception_v3":
         input_shape = (batch_size, 3, 299, 299) if layout == "NCHW" else 
(batch_size, 299, 299, 3)
         mod, params = 
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
-    elif name == "mxnet":
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        assert layout == "NCHW"
-
-        block = get_model("resnet50_v1", pretrained=True)
-        mod, params = relay.frontend.from_mxnet(block, shape={"data": 
input_shape}, dtype=dtype)
-        net = mod["main"]
-        net = relay.Function(
-            net.params, relay.nn.softmax(net.body), None, net.type_params, 
net.attrs
-        )
-        mod = tvm.IRModule.from_expr(net)
     elif name == "mlp":
         mod, params = relay.testing.mlp.get_workload(
             batch_size=batch_size, dtype=dtype, image_shape=image_shape, 
num_classes=1000
@@ -169,17 +156,13 @@ log_file = "%s-%s-B%d-%s.json" % (network, layout, 
batch_size, target.kind.name)
 
 # Extract tasks from the network
 print("Get model...")
-try:
-    mod, params, input_shape, output_shape = get_network(
-        network,
-        batch_size,
-        layout,
-        dtype=dtype,
-        use_sparse=use_sparse,
-    )
-except RuntimeError:
-    print("Downloads from mxnet no longer supported", file=sys.stderr)
-    sys.exit(0)
+mod, params, input_shape, output_shape = get_network(
+    network,
+    batch_size,
+    layout,
+    dtype=dtype,
+    use_sparse=use_sparse,
+)
 
 print("Extract tasks...")
 tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
diff --git a/gallery/how_to/tune_with_autotvm/tune_relay_arm.py 
b/gallery/how_to/tune_with_autotvm/tune_relay_arm.py
index 4f2e952ce3..13674f5cda 100644
--- a/gallery/how_to/tune_with_autotvm/tune_relay_arm.py
+++ b/gallery/how_to/tune_with_autotvm/tune_relay_arm.py
@@ -105,17 +105,6 @@ def get_network(name, batch_size):
     elif name == "inception_v3":
         input_shape = (batch_size, 3, 299, 299)
         mod, params = 
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
-    elif name == "mxnet":
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        block = get_model("resnet18_v1", pretrained=True)
-        mod, params = relay.frontend.from_mxnet(block, shape={"data": 
input_shape}, dtype=dtype)
-        net = mod["main"]
-        net = relay.Function(
-            net.params, relay.nn.softmax(net.body), None, net.type_params, 
net.attrs
-        )
-        mod = tvm.IRModule.from_expr(net)
     else:
         raise ValueError("Unsupported network: " + name)
 
diff --git a/gallery/how_to/tune_with_autotvm/tune_relay_cuda.py 
b/gallery/how_to/tune_with_autotvm/tune_relay_cuda.py
index 47ea99884d..53d56c709d 100644
--- a/gallery/how_to/tune_with_autotvm/tune_relay_cuda.py
+++ b/gallery/how_to/tune_with_autotvm/tune_relay_cuda.py
@@ -104,17 +104,6 @@ def get_network(name, batch_size):
     elif name == "inception_v3":
         input_shape = (batch_size, 3, 299, 299)
         mod, params = 
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
-    elif name == "mxnet":
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        block = get_model("resnet18_v1", pretrained=True)
-        mod, params = relay.frontend.from_mxnet(block, shape={"data": 
input_shape}, dtype=dtype)
-        net = mod["main"]
-        net = relay.Function(
-            net.params, relay.nn.softmax(net.body), None, net.type_params, 
net.attrs
-        )
-        mod = tvm.IRModule.from_expr(net)
     else:
         raise ValueError("Unsupported network: " + name)
 
diff --git a/gallery/how_to/tune_with_autotvm/tune_relay_mobile_gpu.py 
b/gallery/how_to/tune_with_autotvm/tune_relay_mobile_gpu.py
index 3c2f173c23..d5b4b217ab 100644
--- a/gallery/how_to/tune_with_autotvm/tune_relay_mobile_gpu.py
+++ b/gallery/how_to/tune_with_autotvm/tune_relay_mobile_gpu.py
@@ -104,17 +104,6 @@ def get_network(name, batch_size):
     elif name == "inception_v3":
         input_shape = (batch_size, 3, 299, 299)
         mod, params = 
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
-    elif name == "mxnet":
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        block = get_model("resnet18_v1", pretrained=True)
-        mod, params = relay.frontend.from_mxnet(block, shape={"data": 
input_shape}, dtype=dtype)
-        net = mod["main"]
-        net = relay.Function(
-            net.params, relay.nn.softmax(net.body), None, net.type_params, 
net.attrs
-        )
-        mod = tvm.IRModule.from_expr(net)
     else:
         raise ValueError("Unsupported network: " + name)
 
diff --git a/gallery/how_to/tune_with_autotvm/tune_relay_x86.py 
b/gallery/how_to/tune_with_autotvm/tune_relay_x86.py
index a637f7222e..b56ec0ad0e 100644
--- a/gallery/how_to/tune_with_autotvm/tune_relay_x86.py
+++ b/gallery/how_to/tune_with_autotvm/tune_relay_x86.py
@@ -74,17 +74,6 @@ def get_network(name, batch_size):
     elif name == "inception_v3":
         input_shape = (batch_size, 3, 299, 299)
         mod, params = 
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
-    elif name == "mxnet":
-        # an example for mxnet model
-        from mxnet.gluon.model_zoo.vision import get_model
-
-        block = get_model("resnet18_v1", pretrained=True)
-        mod, params = relay.frontend.from_mxnet(block, shape={input_name: 
input_shape}, dtype=dtype)
-        net = mod["main"]
-        net = relay.Function(
-            net.params, relay.nn.softmax(net.body), None, net.type_params, 
net.attrs
-        )
-        mod = tvm.IRModule.from_expr(net)
     else:
         raise ValueError("Unsupported network: " + name)
 
diff --git a/vta/scripts/tune_resnet.py b/vta/scripts/tune_resnet.py
deleted file mode 100644
index 7fa6ec42ce..0000000000
--- a/vta/scripts/tune_resnet.py
+++ /dev/null
@@ -1,377 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Perform ResNet autoTVM tuning on VTA using Relay."""
-
-import argparse, os, sys, time
-from mxnet.gluon.model_zoo import vision
-import numpy as np
-from PIL import Image
-
-from tvm import topi
-import tvm
-from tvm import te
-from tvm import rpc, autotvm, relay
-from tvm.autotvm.measure.measure_methods import request_remote
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib import graph_executor, utils, download
-from tvm.contrib.debugger import debug_executor
-import vta
-from vta.testing import simulator
-from vta.top import graph_pack
-from tvm.autotvm.task import extract_from_program
-
-
-def parse_arguments():
-
-    parser = argparse.ArgumentParser(description="Train a model for image 
classification.")
-    parser.add_argument(
-        "--model",
-        type=str,
-        default="resnet18_v1",
-        choices=["resnet18_v1"],
-        help="Input model name.",
-    )
-    parser.add_argument(
-        "--start-name",
-        type=str,
-        default="nn.max_pool2d",
-        help="The name of the node where packing starts",
-    )
-    parser.add_argument(
-        "--stop-name",
-        type=str,
-        default="nn.global_avg_pool2d",
-        help="The name of the node where packing stops",
-    )
-    parser.add_argument(
-        "--debug-profile", action="store_true", help="Show layer-wise time 
cost profiling results"
-    )
-    parser.add_argument(
-        "--device", default="vta", choices=["vta", "arm_cpu"], help="Select 
device target"
-    )
-    parser.add_argument(
-        "--measurements", type=int, default=1, help="Number of measurements 
during AutoTVM search"
-    )
-    parser.add_argument("--tuner", type=str, default="random", help="AutoTVM 
search strategy")
-    parser.add_argument(
-        "--log-filename", type=str, default="resnet-18.log", help="AutoTVM log 
file name"
-    )
-
-    return parser.parse_args()
-
-
-def register_vta_tuning_tasks():
-    from tvm.autotvm.task.topi_integration import TaskExtractEnv, 
deserialize_args
-
-    @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
-    def my_clip(x, a_min, a_max):
-        """Unlike topi's current clip, put min and max into two stages."""
-        const_min = tvm.tir.const(a_min, x.dtype)
-        const_max = tvm.tir.const(a_max, x.dtype)
-        x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), 
name="clipA")
-        x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), 
name="clipB")
-        return x
-
-    # init autotvm env to register VTA operator
-    TaskExtractEnv()
-
-    @autotvm.task.register("topi_nn_conv2d", override=True)
-    def _topi_nn_conv2d(*args, **kwargs):
-        assert not kwargs, "Do not support kwargs in template function call"
-        args = deserialize_args(args)
-        A, W = args[:2]
-
-        with tvm.target.vta():
-            res = topi.nn.conv2d(*args, **kwargs)
-            res = topi.right_shift(res, 8)
-            res = my_clip(res, 0, 127)
-            res = topi.cast(res, "int8")
-
-        if tvm.target.Target.current().device_name == "vta":
-            s = topi.generic.schedule_conv2d_nchw([res])
-        else:
-            s = te.create_schedule([res.op])
-        return s, [A, W, res]
-
-    @autotvm.task.register("topi_nn_dense", override=True)
-    def _topi_nn_dense(*args, **kwargs):
-        assert not kwargs, "Do not support kwargs in template function call"
-        args = deserialize_args(args)
-        A, W = args[:2]
-
-        with tvm.target.vta():
-            res = topi.nn.dense(*args, **kwargs)
-            res = topi.right_shift(res, 8)
-            res = my_clip(res, 0, 127)
-            res = topi.cast(res, "int8")
-
-        if tvm.target.Target.current().device_name == "vta":
-            s = topi.generic.schedule_dense([res])
-        else:
-            s = te.create_schedule([res.op])
-
-        return s, [A, W, res]
-
-
-def compile_network(opt, env, target):
-
-    # Populate the shape and data type dictionary
-    dtype_dict = {"data": "float32"}
-    shape_dict = {"data": (env.BATCH, 3, 224, 224)}
-
-    # Get off the shelf gluon model, and convert to relay
-    gluon_model = vision.get_model(opt.model, pretrained=True)
-    mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)
-
-    # Update shape and type dictionary
-    shape_dict.update({k: v.shape for k, v in params.items()})
-    dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
-
-    # Perform quantization in Relay
-    # Note: We set opt_level to 3 in order to fold batch norm
-    with tvm.transform.PassContext(opt_level=3):
-        with relay.quantize.qconfig(global_scale=8.0, skip_conv_layers=[0]):
-            relay_prog = relay.quantize.quantize(mod["main"], params=params)
-
-    # Perform graph packing and constant folding for VTA target
-    if target.device_name == "vta":
-        assert env.BLOCK_IN == env.BLOCK_OUT
-        relay_prog = graph_pack(
-            relay_prog,
-            env.BATCH,
-            env.BLOCK_OUT,
-            env.WGT_WIDTH,
-            start_name=opt.start_name,
-            stop_name=opt.stop_name,
-        )
-
-    return relay_prog, params
-
-
-def tune_tasks(
-    tasks,
-    measure_option,
-    tuner="xgb",
-    n_trial=1000,
-    early_stopping=None,
-    log_filename="tuning.log",
-    use_transfer_learning=True,
-    try_winograd=True,
-):
-
-    # create tmp log file
-    tmp_log_file = log_filename + ".tmp"
-    if os.path.exists(tmp_log_file):
-        os.remove(tmp_log_file)
-
-    for i, tsk in enumerate(reversed(tasks)):
-        prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
-
-        # create tuner
-        if tuner == "xgb":
-            tuner_obj = XGBTuner(tsk, loss_type="reg")
-        elif tuner == "xgb_knob":
-            tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="knob")
-        elif tuner == "xgb_itervar":
-            tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="itervar")
-        elif tuner == "xgb_curve":
-            tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="curve")
-        elif tuner == "xgb_rank":
-            tuner_obj = XGBTuner(tsk, loss_type="rank")
-        elif tuner == "xgb_rank_knob":
-            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob")
-        elif tuner == "xgb_rank_itervar":
-            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="itervar")
-        elif tuner == "xgb_rank_curve":
-            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="curve")
-        elif tuner == "xgb_rank_binary":
-            tuner_obj = XGBTuner(tsk, loss_type="rank-binary")
-        elif tuner == "xgb_rank_binary_knob":
-            tuner_obj = XGBTuner(tsk, loss_type="rank-binary", 
feature_type="knob")
-        elif tuner == "xgb_rank_binary_itervar":
-            tuner_obj = XGBTuner(tsk, loss_type="rank-binary", 
feature_type="itervar")
-        elif tuner == "xgb_rank_binary_curve":
-            tuner_obj = XGBTuner(tsk, loss_type="rank-binary", 
feature_type="curve")
-        elif tuner == "ga":
-            tuner_obj = GATuner(tsk, pop_size=50)
-        elif tuner == "random":
-            tuner_obj = RandomTuner(tsk)
-        elif tuner == "gridsearch":
-            tuner_obj = GridSearchTuner(tsk)
-        else:
-            raise ValueError("Invalid tuner: " + tuner)
-
-        if use_transfer_learning:
-            if os.path.isfile(tmp_log_file):
-                
tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
-
-        # do tuning
-        n_trial_ = min(n_trial, len(tsk.config_space))
-        tuner_obj.tune(
-            n_trial_,
-            early_stopping=early_stopping,
-            measure_option=measure_option,
-            callbacks=[
-                autotvm.callback.progress_bar(n_trial_, prefix=prefix),
-                autotvm.callback.log_to_file(tmp_log_file),
-            ],
-        )
-
-    # pick best records to a cache file
-    autotvm.record.pick_best(tmp_log_file, log_filename)
-    os.remove(tmp_log_file)
-
-
-if __name__ == "__main__":
-
-    opt = parse_arguments()
-
-    # Make sure that TVM was compiled with RPC=1
-    assert tvm.runtime.enabled("rpc")
-
-    # Read in VTA environment
-    env = vta.get_env()
-
-    # Get remote from fleet node
-    tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
-    tracker_port = os.environ.get("TVM_TRACKER_PORT", None)
-    if not tracker_host or not tracker_port:
-        print("Set your AutoTVM tracker node host and port variables to run 
the autotuner")
-        exit()
-
-    # Get remote
-    if env.TARGET != "sim":
-
-        # Measure build start time
-        reconfig_start = time.time()
-
-        # Get remote from fleet node
-        remote = autotvm.measure.request_remote(
-            env.TARGET, tracker_host, int(tracker_port), timeout=10000
-        )
-
-        # Reconfigure the JIT runtime and FPGA.
-        # You can program the FPGA with your own custom bitstream
-        # by passing the path to the bitstream file instead of None.
-        vta.reconfig_runtime(remote)
-        vta.program_fpga(remote, bitstream=None)
-
-        # Report on reconfiguration time
-        reconfig_time = time.time() - reconfig_start
-        print("Reconfigured FPGA and RPC runtime in 
{0:.2f}s!".format(reconfig_time))
-
-    # In simulation mode, host the RPC server locally.
-    else:
-        remote = rpc.LocalSession()
-
-    # VTA target and execution context
-    target = env.target if opt.device == "vta" else env.target_vta_cpu
-    ctx = remote.ext_dev(0) if opt.device == "vta" else remote.cpu(0)
-
-    # Compile Relay program
-    print("Initial compile...")
-    try:
-        relay_prog, params = compile_network(opt, env, target)
-    except RuntimeError:
-        print("Downloads from mxnet no longer supported", file=sys.stderr)
-        sys.exit(0)
-
-    # Register VTA tuning tasks
-    register_vta_tuning_tasks()
-
-    # Perform task extraction on Relay program
-    print("Extracting tasks...")
-    tasks = extract_from_program(
-        func=relay_prog,
-        params=params,
-        ops=(relay.op.get("nn.conv2d"),),
-        target=tvm.target.Target(target, host=env.target_host),
-    )
-
-    # Perform Autotuning
-    print("Tuning...")
-    tuning_opt = {
-        "log_filename": opt.log_filename,
-        "tuner": opt.tuner,
-        "n_trial": 1e9,
-        "early_stopping": None,
-        "measure_option": autotvm.measure_option(
-            
builder=autotvm.LocalBuilder(build_func=vta.vta_autotvm_build_func),
-            runner=autotvm.RPCRunner(
-                env.TARGET,
-                tracker_host,
-                tracker_port,
-                number=4,
-                min_repeat_ms=150,
-                repeat=opt.measurements,
-                timeout=60,
-                # check_correctness=True, # TODO: re-enable when 
check_correctness works again.
-            ),
-        ),
-    }
-    tune_tasks(tasks, **tuning_opt)
-
-    # Compile kernels with history best records
-    with autotvm.tophub.context(target, extra_files=[opt.log_filename]):
-
-        # Compile network
-        print("Compiling network with best tuning parameters...")
-        if target.device_name != "vta":
-            with tvm.transform.PassContext(opt_level=3, 
disabled_pass={"AlterOpLayout"}):
-                graph, lib, params = relay.build(
-                    relay_prog,
-                    target=tvm.target.Target(target, host=env.target_host),
-                    params=params,
-                )
-        else:
-            with vta.build_config(opt_level=3, 
disabled_pass={"AlterOpLayout"}):
-                graph, lib, params = relay.build(
-                    relay_prog,
-                    target=tvm.target.Target(target, host=env.target_host),
-                    params=params,
-                )
-
-        # Export library
-        temp = utils.tempdir()
-        lib.save(temp.relpath("graphlib.o"))
-        remote.upload(temp.relpath("graphlib.o"))
-        lib = remote.load_module("graphlib.o")
-
-        # If detailed runtime info is needed build with debug runtime
-        if opt.debug_profile:
-            m = debug_executor.create(graph, lib, ctx)
-        else:
-            m = graph_executor.create(graph, lib, ctx)
-
-        # Set the network parameters and synthetic input
-        image = tvm.nd.array((np.random.uniform(size=(1, 3, 224, 
224))).astype("float32"))
-        m.set_input(**params)
-        m.set_input("data", image)
-
-        # Perform inference
-        timer = m.module.time_evaluator("run", ctx, number=4, 
repeat=opt.measurements)
-        tcost = timer()
-        prof_res = np.array(tcost.results) * 1000  # convert to millisecond
-        print(
-            "Mean inference time (std dev): %.2f ms (%.2f ms)"
-            % (np.mean(prof_res), np.std(prof_res))
-        )
-
-        # Display profile information
-        if opt.debug_profile:
-            m.run()
diff --git a/vta/tutorials/autotvm/README.txt b/vta/tutorials/autotvm/README.txt
deleted file mode 100644
index a282a740ec..0000000000
--- a/vta/tutorials/autotvm/README.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Auto tuning
--------------
diff --git a/vta/tutorials/autotvm/tune_alu_vta.py 
b/vta/tutorials/autotvm/tune_alu_vta.py
deleted file mode 100644
index 8ee58fe990..0000000000
--- a/vta/tutorials/autotvm/tune_alu_vta.py
+++ /dev/null
@@ -1,345 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Auto-tuning a ALU fused op on VTA
----------------------------------
-"""
-
-import os
-import sys
-from mxnet.gluon.model_zoo import vision
-import numpy as np
-from PIL import Image
-
-from tvm import topi
-import tvm
-from tvm import te
-from tvm import rpc, autotvm, relay
-from tvm.contrib import download
-from tvm.autotvm.measure.measure_methods import request_remote
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.autotvm import record
-
-import vta
-from vta.testing import simulator
-from vta.top import graph_pack
-import copy
-
-
-#################################################################
-# Compile network
-# ---------------
-# Perform vta-specific compilation with Relay from a Gluon model
-def compile_network(env, target, model, start_pack, stop_pack):
-
-    # Populate the shape and data type dictionary
-    dtype_dict = {"data": "float32"}
-    shape_dict = {"data": (env.BATCH, 3, 224, 224)}
-
-    # Get off the shelf gluon model, and convert to relay
-    gluon_model = vision.get_model(model, pretrained=True)
-    mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)
-
-    # Update shape and type dictionary
-    shape_dict.update({k: v.shape for k, v in params.items()})
-    dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
-
-    # Perform quantization in Relay
-    # Note: We set opt_level to 3 in order to fold batch norm
-    with relay.build_config(opt_level=3):
-        with relay.quantize.qconfig(global_scale=8.0, skip_conv_layers=[0]):
-            mod = relay.quantize.quantize(mod, params=params)
-
-    # Perform graph packing and constant folding for VTA target
-    if target.device_name == "vta":
-        assert env.BLOCK_IN == env.BLOCK_OUT
-        relay_prog = graph_pack(
-            mod["main"],
-            env.BATCH,
-            env.BLOCK_OUT,
-            env.WGT_WIDTH,
-            start_name=start_pack,
-            stop_name=stop_pack,
-        )
-
-    return relay_prog, params
-
-
-###########################################
-# Set Tuning Options
-# ------------------
-# Before tuning, we should apply some configurations.
-# Here we use an Pynq-Z1 board as an example.
-
-# Tracker host and port can be set by your environment
-tracker_host = os.environ.get("TVM_TRACKER_HOST", "0.0.0.0")
-tracker_port = int(os.environ.get("TVM_TRACKER_PORT", 9190))
-
-# Load VTA parameters from the vta/config/vta_config.json file
-env = vta.get_env()
-
-# This target is used for cross compilation. You can query it by :code:`gcc 
-v` on your device.
-# Set ``device=arm_cpu`` to run inference on the CPU
-# or ``device=vta`` to run inference on the FPGA.
-device = "vta"
-target = env.target if device == "vta" else env.target_vta_cpu
-
-# Name of Gluon model to compile
-# The ``start_pack`` and ``stop_pack`` labels indicate where
-# to start and end the graph packing relay pass: in other words
-# where to start and finish offloading to VTA.
-network = "resnet50_v2"
-start_pack = "nn.max_pool2d"
-stop_pack = "nn.global_avg_pool2d"
-
-# Tuning option
-log_file = "%s.alu.%s.log" % (device, network)
-tuning_option = {
-    "log_filename": log_file,
-    "tuner": "random",
-    "n_trial": 1000,
-    "early_stopping": None,
-    "measure_option": autotvm.measure_option(
-        builder=autotvm.LocalBuilder(n_parallel=1),
-        runner=autotvm.RPCRunner(
-            env.TARGET,
-            host=tracker_host,
-            port=tracker_port,
-            number=5,
-            timeout=60,
-            # check_correctness=True, # TODO: re-enable when check_correctness 
works again.
-        ),
-    ),
-}
-
-
-def log_to_file(file_out, protocol="json"):
-    """Log the tuning records into file.
-    The rows of the log are stored in the format of autotvm.record.encode.
-    for lhs == rhs, we add an extra rhs = [] record
-
-    Parameters
-    ----------
-    file_out : str
-        The file to log to.
-    protocol: str, optional
-        The log protocol. Can be 'json' or 'pickle'
-
-    Returns
-    -------
-    callback : callable
-        Callback function to do the logging.
-    """
-
-    def _callback(_, inputs, results):
-        with open(file_out, "a") as f:
-            for inp, result in zip(inputs, results):
-                f.write(record.encode(inp, result, protocol) + "\n")
-
-                # we only consider task with same lhs and rhs
-                if inp.task.args[0] == inp.task.args[1]:
-                    args = list(inp.task.args)
-                    args[1] = (args[0][0], (), args[0][2])
-                    inp_copy = copy.deepcopy(inp)
-                    inp_copy.task.args = tuple(args)
-                    f.write(record.encode(inp_copy, result, protocol) + "\n")
-
-    return _callback
-
-
-def tune_tasks(
-    tasks,
-    measure_option,
-    tuner="xgb",
-    n_trial=10,
-    early_stopping=None,
-    log_filename="tuning.log",
-    use_transfer_learning=True,
-):
-
-    # create tmp log file
-    tmp_log_file = log_filename + ".tmp"
-    if os.path.exists(tmp_log_file):
-        os.remove(tmp_log_file)
-
-    for i, tsk in enumerate(reversed(tasks)):
-        prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
-
-        # create tuner
-        if tuner == "xgb":
-            tuner_obj = XGBTuner(tsk, loss_type="reg")
-        elif tuner == "xgb_knob":
-            tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="knob")
-        elif tuner == "xgb_itervar":
-            tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="itervar")
-        elif tuner == "xgb_curve":
-            tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="curve")
-        elif tuner == "xgb_rank":
-            tuner_obj = XGBTuner(tsk, loss_type="rank")
-        elif tuner == "xgb_rank_knob":
-            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob")
-        elif tuner == "xgb_rank_itervar":
-            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="itervar")
-        elif tuner == "xgb_rank_curve":
-            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="curve")
-        elif tuner == "xgb_rank_binary":
-            tuner_obj = XGBTuner(tsk, loss_type="rank-binary")
-        elif tuner == "xgb_rank_binary_knob":
-            tuner_obj = XGBTuner(tsk, loss_type="rank-binary", 
feature_type="knob")
-        elif tuner == "xgb_rank_binary_itervar":
-            tuner_obj = XGBTuner(tsk, loss_type="rank-binary", 
feature_type="itervar")
-        elif tuner == "xgb_rank_binary_curve":
-            tuner_obj = XGBTuner(tsk, loss_type="rank-binary", 
feature_type="curve")
-        elif tuner == "ga":
-            tuner_obj = GATuner(tsk, pop_size=50)
-        elif tuner == "random":
-            tuner_obj = RandomTuner(tsk)
-        elif tuner == "gridsearch":
-            tuner_obj = GridSearchTuner(tsk)
-        else:
-            raise ValueError("Invalid tuner: " + tuner)
-
-        if use_transfer_learning:
-            if os.path.isfile(tmp_log_file):
-                
tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
-
-        # do tuning
-        tsk_trial = min(n_trial, len(tsk.config_space))
-        tuner_obj.tune(
-            n_trial=tsk_trial,
-            early_stopping=early_stopping,
-            measure_option=measure_option,
-            callbacks=[
-                autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
-                log_to_file(tmp_log_file),
-            ],
-        )
-
-    # pick best records to a cache file
-    autotvm.record.pick_best(tmp_log_file, log_filename)
-    os.remove(tmp_log_file)
-
-
-########################################################################
-# Register VTA-specific tuning tasks
-def register_vta_tuning_tasks():
-    from tvm.autotvm.task import TaskExtractEnv
-
-    @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
-    def my_clip(x, a_min, a_max):
-        """Unlike topi's current clip, put min and max into two stages."""
-        const_min = tvm.tir.const(a_min, x.dtype)
-        const_max = tvm.tir.const(a_max, x.dtype)
-        x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), 
name="clipA")
-        x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), 
name="clipB")
-        return x
-
-    # init autotvm env to register VTA operator
-    TaskExtractEnv()
-
-    @autotvm.template("add.vta")
-    def _topi_add(*args, **kwargs):
-        assert not kwargs, "Do not support kwargs in template function call"
-        A, B = args[:2]
-
-        with tvm.target.vta():
-            res = vta.top.op.add_packed(*args, **kwargs)
-            res = my_clip(res, 0, 127)
-            res = topi.cast(res, "int8")
-
-        if tvm.target.Target.current().device_name == "vta":
-            s = vta.top.op.schedule_add_packed([res])
-        else:
-            s = te.create_schedule([res.op])
-        return s, [A, B, res]
-
-    @autotvm.template("multiply.vta")
-    def _topi_multiply(*args, **kwargs):
-        assert not kwargs, "Do not support kwargs in template function call"
-        A, B = args[:2]
-
-        with tvm.target.vta():
-            res = vta.top.op.multiply_packed(*args, **kwargs)
-            res = my_clip(res, 0, 127)
-            res = topi.cast(res, "int8")
-
-        if tvm.target.Target.current().device_name == "vta":
-            s = vta.top.op.schedule_multiply_packed([res])
-        else:
-            s = te.create_schedule([res.op])
-        return s, [A, B, res]
-
-
-########################################################################
-# Finally, we launch tuning jobs and evaluate the end-to-end performance.
-def tune_and_evaluate(tuning_opt):
-
-    if env.TARGET != "intelfocl":
-        print("ALU only op only available for intelfocl target")
-        return
-
-    # Register VTA tuning tasks
-    register_vta_tuning_tasks()
-
-    # Perform task extraction on Relay program
-    print("Extract tasks...")
-    relay_prog, params = compile_network(env, target, network, start_pack, 
stop_pack)
-    mod = tvm.IRModule.from_expr(relay_prog)
-    tasks = autotvm.task.extract_from_program(
-        mod,
-        params=params,
-        ops=(
-            relay.op.get("add"),
-            relay.op.get("multiply"),
-        ),
-        target=tvm.target.Target(target, host=env.target_host),
-    )
-
-    # filter out non-packed alu task
-    tasks = list(filter(lambda t: len(t.args[0][1]) > 4, tasks))
-    # filter out float alu task
-    tasks = list(filter(lambda t: t.args[0][2] != "float32", tasks))
-
-    # We should have extracted 10 convolution tasks
-    tasks_set = {}
-    print("Extracted {} alu tasks:".format(len(tasks)))
-    for tsk in tasks:
-        print("tsk = ", tsk)
-
-        if len(tsk.args[1][1]) == 0:
-            args = list(tsk.args)
-            args[1] = args[0]
-            tsk.args = tuple(args)
-
-        if (tsk.name, tsk.args) in tasks_set:
-            print("task {} already exists".format(tsk))
-        tasks_set[(tsk.name, tsk.args)] = tsk
-
-    tasks = list(tasks_set.values())
-    print("After merged, final #tasks={}, tasks = {}".format(len(tasks), 
tasks))
-
-    # run tuning tasks
-    print("Tuning...")
-    tune_tasks(tasks, **tuning_opt)
-
-
-# Run the tuning and evaluate the results
-try:
-    tune_and_evaluate(tuning_option)
-except RuntimeError:
-    print("Downloads from mxnet no longer supported", file=sys.stderr)
-    sys.exit(0)
diff --git a/vta/tutorials/autotvm/tune_relay_vta.py 
b/vta/tutorials/autotvm/tune_relay_vta.py
deleted file mode 100644
index b5de247883..0000000000
--- a/vta/tutorials/autotvm/tune_relay_vta.py
+++ /dev/null
@@ -1,538 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Auto-tuning a convolutional network on VTA
-==========================================
-**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, `Thierry Moreau 
<https://homes.cs.washington.edu/~moreau/>`_
-
-Auto-tuning for a specific accelerator design is critical for getting the best
-performance for any given operator. This is a tutorial showcases how to tune a
-whole convolutional network on VTA.
-
-The operator implementation for VTA in TVM is written in template form.
-The template has many tunable knobs (tile factor, virtual threads, etc).
-We will tune all convolution operators in the neural network. After tuning,
-we produce a log file which stores the best schedule parameters for all tuned
-operators. When the TVM compiler compiles these operators, it will query this
-log file to get the best knob parameters.
-
-"""
-
-######################################################################
-# Install dependencies
-# --------------------
-# To use the autotvm package in tvm, we need to install some extra 
dependencies.
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-#   pip3 install --user psutil xgboost tornado mxnet requests "Pillow<7" 
cloudpickle
-#
-# To make TVM run faster during tuning, it is recommended to use cython
-# as FFI of TVM. In the root directory of TVM, execute
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-#   pip3 install --user cython
-#   sudo make cython3
-#
-# Now return to python code. Import packages.
-
-import os
-import sys
-
-from mxnet.gluon.model_zoo import vision
-import numpy as np
-from PIL import Image
-
-from tvm import topi
-import tvm
-from tvm import te
-from tvm import rpc, autotvm, relay
-from tvm.contrib import graph_executor, utils, download
-from tvm.autotvm.measure.measure_methods import request_remote
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-
-import vta
-from vta.testing import simulator
-from vta.top import graph_pack
-
-#################################################################
-# Compile network
-# ---------------
-# Perform vta-specific compilation with Relay from a Gluon model
-
-
-def compile_network(env, target, model, start_pack, stop_pack):
-
-    # Populate the shape and data type dictionary
-    dtype_dict = {"data": "float32"}
-    shape_dict = {"data": (env.BATCH, 3, 224, 224)}
-
-    # Get off the shelf gluon model, and convert to relay
-    gluon_model = vision.get_model(model, pretrained=True)
-    mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)
-
-    # Update shape and type dictionary
-    shape_dict.update({k: v.shape for k, v in params.items()})
-    dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
-
-    # Perform quantization in Relay
-    # Note: We set opt_level to 3 in order to fold batch norm
-    with tvm.transform.PassContext(opt_level=3):
-        with relay.quantize.qconfig(global_scale=8.0, skip_conv_layers=[0]):
-            mod = relay.quantize.quantize(mod, params=params)
-
-    # Perform graph packing and constant folding for VTA target
-    if target.device_name == "vta":
-        assert env.BLOCK_IN == env.BLOCK_OUT
-        relay_prog = graph_pack(
-            mod["main"],
-            env.BATCH,
-            env.BLOCK_OUT,
-            env.WGT_WIDTH,
-            start_name=start_pack,
-            stop_name=stop_pack,
-        )
-
-    return relay_prog, params
-
-
-#################################################################
-# Start RPC Tracker
-# -----------------
-# TVM uses an RPC session to communicate with Pynq boards.
-# During tuning, the tuner will send the generated code to the board and
-# measure the speed of code on the board.
-#
-# To scale up tuning, TVM uses an RPC Tracker to manage multiple devices.
-# The RPC Tracker is a centralized controller node. We can register all 
devices to
-# the tracker. For example, if we have 10 Pynq boards, we can register all of 
them
-# to the tracker, and run 10 measurements in parallel, accelerating the tuning 
process.
-#
-# To start an RPC tracker, run this command on the host machine. The tracker is
-# required during the whole tuning process, so we need to open a new terminal 
for
-# this command:
-#
-# .. code-block:: bash
-#
-#   python -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190
-#
-# The expected output is:
-#
-# .. code-block:: bash
-#
-#   INFO:RPCTracker:bind to 0.0.0.0:9190
-
-#################################################################
-# Register devices to RPC Tracker
-# -----------------------------------
-# Now we can register our devices to the tracker. The first step is to
-# build the TVM runtime for the Pynq devices.
-#
-# Follow :ref:`vta-index`
-# to build the TVM runtime on the device. Then register the device to the 
tracker with:
-#
-# .. code-block:: bash
-#
-#   python -m tvm.exec.rpc_server --tracker=[HOST_IP]:9190 --key=pynq
-#
-# (replace :code:`[HOST_IP]` with the IP address of your host machine)
-#
-# After registering devices, we can confirm it by querying the rpc_tracker:
-#
-# .. code-block:: bash
-#
-#   python -m tvm.exec.query_rpc_tracker --host=0.0.0.0 --port=9190
-#
-# For example, if we have 6 Pynq boards and 11 Raspberry Pi 3B,
-# the output can be
-#
-# .. code-block:: bash
-#
-#    Queue Status
-#    ----------------------------------
-#    key          total  free  pending
-#    ----------------------------------
-#    pynq         6      6     0
-#    rpi3b        11     11    0
-#    ----------------------------------
-#
-# You can register multiple devices to the tracker to accelerate tuning.
-
-###########################################
-# Set Tuning Options
-# ------------------
-# Before tuning, we should apply some configurations.
-# Here we use an Pynq-Z1 board as an example.
-
-# Tracker host and port can be set by your environment
-tracker_host = os.environ.get("TVM_TRACKER_HOST", "127.0.0.1")
-tracker_port = int(os.environ.get("TVM_TRACKER_PORT", 9190))
-
-# Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file
-env = vta.get_env()
-
-# This target is used for cross compilation. You can query it by :code:`gcc 
-v` on your device.
-# Set ``device=arm_cpu`` to run inference on the CPU
-# or ``device=vta`` to run inference on the FPGA.
-device = "vta"
-target = env.target if device == "vta" else env.target_vta_cpu
-
-# Name of Gluon model to compile
-# The ``start_pack`` and ``stop_pack`` labels indicate where
-# to start and end the graph packing relay pass: in other words
-# where to start and finish offloading to VTA.
-network = "resnet18_v1"
-start_pack = "nn.max_pool2d"
-stop_pack = "nn.global_avg_pool2d"
-
-# Tuning option
-log_file = "%s.%s.log" % (device, network)
-tuning_option = {
-    "log_filename": log_file,
-    "tuner": "random",
-    "n_trial": 1000,
-    "early_stopping": None,
-    "measure_option": autotvm.measure_option(
-        builder=autotvm.LocalBuilder(),
-        runner=autotvm.RPCRunner(
-            env.TARGET,
-            host=tracker_host,
-            port=tracker_port,
-            number=5,
-            timeout=60,
-            module_loader=vta.module_loader(),
-            # check_correctness=True, # TODO: re-enable when check_correctness 
works again.
-        ),
-    ),
-}
-
-####################################################################
-#
-# .. note:: How to set tuning options
-#
-#   In general, the default values provided here work well.
-#   If you have enough time budget, you can set :code:`n_trial`, 
:code:`early_stopping`
-#   to larger values, makes the tuning run for longer.
-#   If your device is under-powered or your conv2d operators are large, 
consider
-#   setting a longer timeout.
-#
-
-###################################################################
-# Begin Tuning
-# ------------
-# Now we can extract tuning tasks from the network and begin tuning.
-# Here, we provide a simple utility function to tune a list of tasks.
-# This function is just an initial implementation which tunes them in 
sequential order.
-# We will introduce a more sophisticated tuning scheduler in the future.
-#
-# Given that the tuning will be done on Pynq FPGA boards, make sure that
-# the ```TARGET`` entry in the ``vta_config.json`` file is set to ``pynq``.
-
-
-# You can skip the implementation of this function for this tutorial.
-def tune_tasks(
-    tasks,
-    measure_option,
-    tuner="xgb",
-    n_trial=1000,
-    early_stopping=None,
-    log_filename="tuning.log",
-    use_transfer_learning=True,
-):
-
-    # create tmp log file
-    tmp_log_file = log_filename + ".tmp"
-    if os.path.exists(tmp_log_file):
-        os.remove(tmp_log_file)
-
-    for i, tsk in enumerate(reversed(tasks)):
-        prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
-
-        # create tuner
-        if tuner == "xgb":
-            tuner_obj = XGBTuner(tsk, loss_type="reg")
-        elif tuner == "xgb_knob":
-            tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="knob")
-        elif tuner == "xgb_itervar":
-            tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="itervar")
-        elif tuner == "xgb_curve":
-            tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="curve")
-        elif tuner == "xgb_rank":
-            tuner_obj = XGBTuner(tsk, loss_type="rank")
-        elif tuner == "xgb_rank_knob":
-            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob")
-        elif tuner == "xgb_rank_itervar":
-            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="itervar")
-        elif tuner == "xgb_rank_curve":
-            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="curve")
-        elif tuner == "xgb_rank_binary":
-            tuner_obj = XGBTuner(tsk, loss_type="rank-binary")
-        elif tuner == "xgb_rank_binary_knob":
-            tuner_obj = XGBTuner(tsk, loss_type="rank-binary", 
feature_type="knob")
-        elif tuner == "xgb_rank_binary_itervar":
-            tuner_obj = XGBTuner(tsk, loss_type="rank-binary", 
feature_type="itervar")
-        elif tuner == "xgb_rank_binary_curve":
-            tuner_obj = XGBTuner(tsk, loss_type="rank-binary", 
feature_type="curve")
-        elif tuner == "ga":
-            tuner_obj = GATuner(tsk, pop_size=50)
-        elif tuner == "random":
-            tuner_obj = RandomTuner(tsk)
-        elif tuner == "gridsearch":
-            tuner_obj = GridSearchTuner(tsk)
-        else:
-            raise ValueError("Invalid tuner: " + tuner)
-
-        if use_transfer_learning:
-            if os.path.isfile(tmp_log_file):
-                
tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
-
-        # do tuning
-        tsk_trial = min(n_trial, len(tsk.config_space))
-        tuner_obj.tune(
-            n_trial=tsk_trial,
-            early_stopping=early_stopping,
-            measure_option=measure_option,
-            callbacks=[
-                autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
-                autotvm.callback.log_to_file(tmp_log_file),
-            ],
-        )
-
-    # pick best records to a cache file
-    autotvm.record.pick_best(tmp_log_file, log_filename)
-    os.remove(tmp_log_file)
-
-
-########################################################################
-# Register VTA-specific tuning tasks
-
-
-def register_vta_tuning_tasks():
-    from tvm.autotvm.task import TaskExtractEnv
-
-    @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
-    def my_clip(x, a_min, a_max):
-        """Unlike topi's current clip, put min and max into two stages."""
-        const_min = tvm.tir.const(a_min, x.dtype)
-        const_max = tvm.tir.const(a_max, x.dtype)
-        x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), 
name="clipA")
-        x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), 
name="clipB")
-        return x
-
-    # init autotvm env to register VTA operator
-    TaskExtractEnv()
-
-    @autotvm.template("conv2d_packed.vta")
-    def _topi_nn_conv2d(*args, **kwargs):
-        assert not kwargs, "Do not support kwargs in template function call"
-        A, W = args[:2]
-
-        with tvm.target.vta():
-            res = vta.top.conv2d_packed(*args, **kwargs)
-            res = topi.right_shift(res, 8)
-            res = my_clip(res, 0, 127)
-            res = topi.cast(res, "int8")
-
-        if tvm.target.Target.current().device_name == "vta":
-            s = vta.top.schedule_conv2d_packed([res])
-        else:
-            s = te.create_schedule([res.op])
-        return s, [A, W, res]
-
-
-########################################################################
-# Finally, we launch tuning jobs and evaluate the end-to-end performance.
-
-
-def tune_and_evaluate(tuning_opt):
-
-    # Register VTA tuning tasks
-    register_vta_tuning_tasks()
-
-    # Perform task extraction on Relay program
-    print("Extract tasks...")
-    relay_prog, params = compile_network(env, target, network, start_pack, 
stop_pack)
-    mod = tvm.IRModule.from_expr(relay_prog)
-    tasks = autotvm.task.extract_from_program(
-        mod,
-        params=params,
-        ops=(relay.op.get("nn.conv2d"),),
-        target=target,
-        target_host=env.target_host,
-    )
-
-    # filter out non-packed conv2d task
-    tasks = list(filter(lambda t: len(t.args[0][1]) > 4 and "conv" in t.name, 
tasks))
-
-    # We should have extracted 10 convolution tasks
-    assert len(tasks) == 10
-    print("Extracted {} conv2d tasks:".format(len(tasks)))
-    for tsk in tasks:
-        inp = tsk.args[0][1]
-        wgt = tsk.args[1][1]
-        batch = inp[0] * inp[4]
-        in_filter = inp[1] * inp[5]
-        out_filter = wgt[0] * wgt[4]
-        height, width = inp[2], inp[3]
-        hkernel, wkernel = wgt[2], wgt[3]
-        hstride, wstride = tsk.args[2][0], tsk.args[2][1]
-        hpad, wpad = tsk.args[3][0], tsk.args[3][1]
-        print(
-            "({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})".format(
-                batch,
-                height,
-                width,
-                in_filter,
-                out_filter,
-                hkernel,
-                wkernel,
-                hpad,
-                wpad,
-                hstride,
-                wstride,
-            )
-        )
-
-    # We do not run the tuning in our webpage server since it takes too long.
-    # Comment the following line to run it by yourself.
-    return
-
-    # run tuning tasks
-    print("Tuning...")
-    tune_tasks(tasks, **tuning_opt)
-
-    # evaluate with tuning history
-    if env.TARGET != "sim":
-        # Get remote from fleet node
-        remote = autotvm.measure.request_remote(
-            env.TARGET, tracker_host, tracker_port, timeout=10000
-        )
-        # Reconfigure the JIT runtime and FPGA.
-        vta.reconfig_runtime(remote)
-        vta.program_fpga(remote, bitstream=None)
-    else:
-        # In simulation mode, host the RPC server locally.
-        remote = rpc.LocalSession()
-
-    # compile kernels with history best records
-    with autotvm.tophub.context(target, extra_files=[log_file]):
-        # Compile network
-        print("Compile...")
-        if target.device_name != "vta":
-            with tvm.transform.PassContext(opt_level=3, 
disabled_pass={"AlterOpLayout"}):
-                lib = relay.build(
-                    relay_prog, target=target, params=params, 
target_host=env.target_host
-                )
-        else:
-            with vta.build_config(opt_level=3, 
disabled_pass={"AlterOpLayout"}):
-                lib = relay.build(
-                    relay_prog, target=target, params=params, 
target_host=env.target_host
-                )
-
-        # Export library
-        print("Upload...")
-        temp = utils.tempdir()
-        lib.export_library(temp.relpath("graphlib.tar"))
-        remote.upload(temp.relpath("graphlib.tar"))
-        lib = remote.load_module("graphlib.tar")
-
-        # Generate the graph executor
-        ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
-        m = graph_executor.GraphModule(lib["default"](ctx))
-
-        # upload parameters to device
-        image = tvm.nd.array((np.random.uniform(size=(1, 3, 224, 
224))).astype("float32"))
-        m.set_input("data", image)
-
-        # evaluate
-        print("Evaluate inference time cost...")
-        timer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
-        tcost = timer()
-        prof_res = np.array(tcost.results) * 1000  # convert to millisecond
-        print(
-            "Mean inference time (std dev): %.2f ms (%.2f ms)"
-            % (np.mean(prof_res), np.std(prof_res))
-        )
-
-
-# Run the tuning and evaluate the results
-try:
-    tune_and_evaluate(tuning_option)
-except RuntimeError:
-    print("Downloads from mxnet no longer supported", file=sys.stderr)
-    sys.exit(0)
-
-######################################################################
-# Sample Output
-# -------------
-# The tuning needs to compile many programs and extract feature from them.
-# So a high performance CPU is recommended.
-# One sample output is listed below.
-# It takes about 2 hours on a 16T CPU, and 6 Pynq boards.
-#
-# .. code-block:: bash
-#
-#    Extract tasks...
-#    [Warning] Invalid shape during AutoTVM task creation
-#    Extracted 10 conv2d tasks:
-#        Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 16, 14, 14, 1, 
16), 'int8'), ('TENSOR', (32, 16, 1, 1, 16, 16), 'int8'), (2, 2), (0, 0), (1, 
1), 'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 16, 14, 14, 1, 
16, 'int8'), (32, 16, 1, 1, 16, 16, 'int8'), (2, 2), (0, 0), (1, 1), 
'NCHW1n16c', 'int32'))
-#        Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 8, 28, 28, 1, 
16), 'int8'), ('TENSOR', (16, 8, 1, 1, 16, 16), 'int8'), (2, 2), (0, 0), (1, 
1), 'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 8, 28, 28, 1, 16, 
'int8'), (16, 8, 1, 1, 16, 16, 'int8'), (2, 2), (0, 0), (1, 1), 'NCHW1n16c', 
'int32'))
-#        Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 4, 56, 56, 1, 
16), 'int8'), ('TENSOR', (8, 4, 1, 1, 16, 16), 'int8'), (2, 2), (0, 0), (1, 1), 
'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 4, 56, 56, 1, 16, 
'int8'), (8, 4, 1, 1, 16, 16, 'int8'), (2, 2), (0, 0), (1, 1), 'NCHW1n16c', 
'int32'))
-#        Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 4, 56, 56, 1, 
16), 'int8'), ('TENSOR', (4, 4, 3, 3, 16, 16), 'int8'), (1, 1), (1, 1), (1, 1), 
'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 4, 56, 56, 1, 16, 
'int8'), (4, 4, 3, 3, 16, 16, 'int8'), (1, 1), (1, 1), (1, 1), 'NCHW1n16c', 
'int32'))
-#        Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 8, 28, 28, 1, 
16), 'int8'), ('TENSOR', (8, 8, 3, 3, 16, 16), 'int8'), (1, 1), (1, 1), (1, 1), 
'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 8, 28, 28, 1, 16, 
'int8'), (8, 8, 3, 3, 16, 16, 'int8'), (1, 1), (1, 1), (1, 1), 'NCHW1n16c', 
'int32'))
-#        Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 4, 56, 56, 1, 
16), 'int8'), ('TENSOR', (8, 4, 3, 3, 16, 16), 'int8'), (2, 2), (1, 1), (1, 1), 
'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 4, 56, 56, 1, 16, 
'int8'), (8, 4, 3, 3, 16, 16, 'int8'), (2, 2), (1, 1), (1, 1), 'NCHW1n16c', 
'int32'))
-#        Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 16, 14, 14, 1, 
16), 'int8'), ('TENSOR', (16, 16, 3, 3, 16, 16), 'int8'), (1, 1), (1, 1), (1, 
1), 'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 16, 14, 14, 1, 
16, 'int8'), (16, 16, 3, 3, 16, 16, 'int8'), (1, 1), (1, 1), (1, 1), 
'NCHW1n16c', 'int32'))
-#        Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 8, 28, 28, 1, 
16), 'int8'), ('TENSOR', (16, 8, 3, 3, 16, 16), 'int8'), (2, 2), (1, 1), (1, 
1), 'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 8, 28, 28, 1, 16, 
'int8'), (16, 8, 3, 3, 16, 16, 'int8'), (2, 2), (1, 1), (1, 1), 'NCHW1n16c', 
'int32'))
-#        Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 32, 7, 7, 1, 16), 
'int8'), ('TENSOR', (32, 32, 3, 3, 16, 16), 'int8'), (1, 1), (1, 1), (1, 1), 
'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 32, 7, 7, 1, 16, 
'int8'), (32, 32, 3, 3, 16, 16, 'int8'), (1, 1), (1, 1), (1, 1), 'NCHW1n16c', 
'int32'))
-#        Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 16, 14, 14, 1, 
16), 'int8'), ('TENSOR', (32, 16, 3, 3, 16, 16), 'int8'), (2, 2), (1, 1), (1, 
1), 'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 16, 14, 14, 1, 
16, 'int8'), (32, 16, 3, 3, 16, 16, 'int8'), (2, 2), (1, 1), (1, 1), 
'NCHW1n16c', 'int32'))
-#    Tuning...
-#    [Task  1/10]  Current/Best:    0.72/  23.24 GFLOPS | Progress: (480/1000) 
| 640.31 s Done.
-#    [Task  2/10]  Current/Best:    0.00/  27.69 GFLOPS | Progress: (576/1000) 
| 810.09 s Done.
-#    [Task  3/10]  Current/Best:    0.00/  22.97 GFLOPS | Progress: 
(1000/1000) | 1125.37 s Done.
-#    [Task  4/10]  Current/Best:    0.00/  31.26 GFLOPS | Progress: 
(1000/1000) | 1025.52 s Done.
-#    [Task  5/10]  Current/Best:    0.00/  15.15 GFLOPS | Progress: 
(1000/1000) | 1236.58 s Done.
-#    [Task  6/10]  Current/Best:    0.00/  22.74 GFLOPS | Progress: 
(1000/1000) | 906.60 s Done.
-#    [Task  7/10]  Current/Best:    0.00/  15.27 GFLOPS | Progress: 
(1000/1000) | 1056.25 s Done.
-#    [Task  8/10]  Current/Best:    0.00/   2.18 GFLOPS | Progress: 
(1000/1000) | 2275.29 s Done.
-#    [Task  9/10]  Current/Best:    2.23/   3.99 GFLOPS | Progress: 
(1000/1000) | 2527.25 s Done.
-#    [Task 10/10]  Current/Best:    1.56/   6.32 GFLOPS | Progress: (480/1000) 
| 1304.84 s Done.
-#    Compile...
-#    Upload...
-#    Evaluate inference time cost...
-#    Mean inference time (std dev): 621.79 ms (0.14 ms)
-
-######################################################################
-#
-# .. note:: **Experiencing Difficulties?**
-#
-#   The auto tuning module is error-prone. If you always see " 0.00/ 0.00 
GFLOPS",
-#   then there must be something wrong.
-#
-#   First, make sure you set the correct configuration of your device.
-#   Then, you can print debug information by adding these lines in the 
beginning
-#   of the script. It will print every measurement result, where you can find 
useful
-#   error messages.
-#
-#   .. code-block:: python
-#
-#      import logging
-#      logging.getLogger('autotvm').setLevel(logging.DEBUG)
-#
-#   Finally, always feel free to ask our community for help on 
https://discuss.tvm.apache.org
diff --git a/vta/tutorials/frontend/deploy_classification.py 
b/vta/tutorials/frontend/deploy_classification.py
deleted file mode 100644
index c741a1678f..0000000000
--- a/vta/tutorials/frontend/deploy_classification.py
+++ /dev/null
@@ -1,313 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Deploy Pretrained Vision Model from MxNet on VTA
-================================================
-**Author**: `Thierry Moreau <https://homes.cs.washington.edu/~moreau/>`_
-
-This tutorial provides an end-to-end demo, on how to run ImageNet 
classification
-inference onto the VTA accelerator design to perform ImageNet classification 
tasks.
-It showcases Relay as a front end compiler that can perform quantization (VTA
-only supports int8/32 inference) as well as graph packing (in order to enable
-tensorization in the core) to massage the compute graph for the hardware 
target.
-"""
-
-######################################################################
-# Install dependencies
-# --------------------
-# To use the autotvm package in tvm, we need to install some extra 
dependencies.
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-#   pip3 install --user mxnet requests "Pillow<7"
-#
-# Now return to the python code. Import packages.
-
-from __future__ import absolute_import, print_function
-
-import argparse, json, os, requests, sys, time
-from io import BytesIO
-from os.path import join, isfile
-import sys
-from PIL import Image
-
-from mxnet.gluon.model_zoo import vision
-import numpy as np
-from matplotlib import pyplot as plt
-
-import tvm
-from tvm import te
-from tvm import rpc, autotvm, relay
-from tvm.contrib import graph_executor, utils, download
-from tvm.contrib.debugger import debug_executor
-from tvm.relay import transform
-
-import vta
-from vta.testing import simulator
-from vta.top import graph_pack
-
-
-# Make sure that TVM was compiled with RPC=1
-assert tvm.runtime.enabled("rpc")
-
-######################################################################
-# Define the platform and model targets
-# -------------------------------------
-# Execute on CPU vs. VTA, and define the model.
-
-# Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file
-env = vta.get_env()
-
-# Set ``device=arm_cpu`` to run inference on the CPU
-# or ``device=vta`` to run inference on the FPGA.
-device = "vta"
-target = env.target if device == "vta" else env.target_vta_cpu
-
-# Dictionary lookup for when to start/end bit packing
-pack_dict = {
-    "resnet18_v1": ["nn.max_pool2d", "nn.global_avg_pool2d"],
-    "resnet34_v1": ["nn.max_pool2d", "nn.global_avg_pool2d"],
-    "resnet18_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
-    "resnet34_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
-    "resnet50_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
-    "resnet101_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
-}
-
-# Name of Gluon model to compile
-# The ``start_pack`` and ``stop_pack`` labels indicate where
-# to start and end the graph packing relay pass: in other words
-# where to start and finish offloading to VTA.
-model = "resnet18_v1"
-assert model in pack_dict
-
-######################################################################
-# Obtain an execution remote
-# --------------------------
-# When target is 'pynq', reconfigure FPGA and runtime.
-# Otherwise, if target is 'sim', execute locally.
-
-if env.TARGET not in ["sim", "tsim", "intelfocl"]:
-
-    # Get remote from tracker node if environment variable is set.
-    # To set up the tracker, you'll need to follow the "Auto-tuning
-    # a convolutional network for VTA" tutorial.
-    tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
-    tracker_port = os.environ.get("TVM_TRACKER_PORT", None)
-    # Otherwise if you have a device you want to program directly from
-    # the host, make sure you've set the variables below to the IP of
-    # your board.
-    device_host = os.environ.get("VTA_RPC_HOST", "192.168.2.99")
-    device_port = os.environ.get("VTA_RPC_PORT", "9091")
-    if not tracker_host or not tracker_port:
-        remote = rpc.connect(device_host, int(device_port))
-    else:
-        remote = autotvm.measure.request_remote(
-            env.TARGET, tracker_host, int(tracker_port), timeout=10000
-        )
-
-    # Reconfigure the JIT runtime and FPGA.
-    # You can program the FPGA with your own custom bitstream
-    # by passing the path to the bitstream file instead of None.
-    reconfig_start = time.time()
-    vta.reconfig_runtime(remote)
-    vta.program_fpga(remote, bitstream=None)
-    reconfig_time = time.time() - reconfig_start
-    print("Reconfigured FPGA and RPC runtime in 
{0:.2f}s!".format(reconfig_time))
-
-# In simulation mode, host the RPC server locally.
-else:
-    remote = rpc.LocalSession()
-
-    if env.TARGET in ["intelfocl"]:
-        # program intelfocl aocx
-        vta.program_fpga(remote, bitstream="vta.bitstream")
-
-# Get execution context from remote
-ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
-
-######################################################################
-# Build the inference graph executor
-# ----------------------------------
-# Grab vision model from Gluon model zoo and compile with Relay.
-# The compilation steps are:
-#
-# 1. Front end translation from MxNet into Relay module.
-# 2. Apply 8-bit quantization: here we skip the first conv layer,
-#    and dense layer which will both be executed in fp32 on the CPU.
-# 3. Perform graph packing to alter the data layout for tensorization.
-# 4. Perform constant folding to reduce number of operators (e.g. eliminate 
batch norm multiply).
-# 5. Perform relay build to object file.
-# 6. Load the object file onto remote (FPGA device).
-# 7. Generate graph executor, `m`.
-#
-
-# Load pre-configured AutoTVM schedules
-with autotvm.tophub.context(target):
-
-    # Populate the shape and data type dictionary for ImageNet classifier input
-    dtype_dict = {"data": "float32"}
-    shape_dict = {"data": (env.BATCH, 3, 224, 224)}
-
-    # Get off the shelf gluon model, and convert to relay
-    try:
-        gluon_model = vision.get_model(model, pretrained=True)
-    except RuntimeError:
-        print("Downloads from mxnet no longer supported", file=sys.stderr)
-        sys.exit(0)
-
-    # Measure build start time
-    build_start = time.time()
-
-    # Start front end compilation
-    mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)
-
-    # Update shape and type dictionary
-    shape_dict.update({k: v.shape for k, v in params.items()})
-    dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
-
-    if target.device_name == "vta":
-        # Perform quantization in Relay
-        # Note: We set opt_level to 3 in order to fold batch norm
-        with tvm.transform.PassContext(opt_level=3):
-            with relay.quantize.qconfig(global_scale=8.0, 
skip_conv_layers=[0]):
-                mod = relay.quantize.quantize(mod, params=params)
-            # Perform graph packing and constant folding for VTA target
-            assert env.BLOCK_IN == env.BLOCK_OUT
-            # do device annotation if target is intelfocl or sim
-            relay_prog = graph_pack(
-                mod["main"],
-                env.BATCH,
-                env.BLOCK_OUT,
-                env.WGT_WIDTH,
-                start_name=pack_dict[model][0],
-                stop_name=pack_dict[model][1],
-                device_annot=(env.TARGET == "intelfocl"),
-            )
-    else:
-        relay_prog = mod["main"]
-
-    # Compile Relay program with AlterOpLayout disabled
-    if target.device_name != "vta":
-        with tvm.transform.PassContext(opt_level=3, 
disabled_pass={"AlterOpLayout"}):
-            graph, lib, params = relay.build(
-                relay_prog, target=tvm.target.Target(target, 
host=env.target_host), params=params
-            )
-    else:
-        if env.TARGET == "intelfocl":
-            # multiple targets to run both on cpu and vta
-            target = {"cpu": env.target_vta_cpu, "ext_dev": target}
-        with vta.build_config(
-            opt_level=3, disabled_pass={"AlterOpLayout", 
"tir.CommonSubexprElimTIR"}
-        ):
-            graph, lib, params = relay.build(
-                relay_prog, target=tvm.target.Target(target, 
host=env.target_host), params=params
-            )
-
-    # Measure Relay build time
-    build_time = time.time() - build_start
-    print(model + " inference graph built in {0:.2f}s!".format(build_time))
-
-    # Send the inference library over to the remote RPC server
-    temp = utils.tempdir()
-    lib.export_library(temp.relpath("graphlib.tar"))
-    remote.upload(temp.relpath("graphlib.tar"))
-    lib = remote.load_module("graphlib.tar")
-
-    if env.TARGET == "intelfocl":
-        ctxes = [remote.ext_dev(0), remote.cpu(0)]
-        m = graph_executor.create(graph, lib, ctxes)
-    else:
-        # Graph runtime
-        m = graph_executor.create(graph, lib, ctx)
-
-######################################################################
-# Perform image classification inference
-# --------------------------------------
-# We run classification on an image sample from ImageNet
-# We just need to download the categories files, `synset.txt`
-# and an input test image.
-
-# Download ImageNet categories
-categ_url = "https://github.com/uwsampl/web-data/raw/main/vta/models/";
-categ_fn = "synset.txt"
-download.download(join(categ_url, categ_fn), categ_fn)
-synset = eval(open(categ_fn).read())
-
-# Download test image
-image_url = "https://homes.cs.washington.edu/~moreau/media/vta/cat.jpg";
-image_fn = "cat.png"
-download.download(image_url, image_fn)
-
-# Prepare test image for inference
-image = Image.open(image_fn).resize((224, 224))
-plt.imshow(image)
-plt.show()
-image = np.array(image) - np.array([123.0, 117.0, 104.0])
-image /= np.array([58.395, 57.12, 57.375])
-image = image.transpose((2, 0, 1))
-image = image[np.newaxis, :]
-image = np.repeat(image, env.BATCH, axis=0)
-
-# Set the network parameters and inputs
-m.set_input(**params)
-m.set_input("data", image)
-
-# Perform inference and gather execution statistics
-# More on: :py:method:`tvm.runtime.Module.time_evaluator`
-num = 4  # number of times we run module for a single measurement
-rep = 3  # number of measurements (we derive std dev from this)
-timer = m.module.time_evaluator("run", ctx, number=num, repeat=rep)
-
-if env.TARGET in ["sim", "tsim"]:
-    simulator.clear_stats()
-    timer()
-    sim_stats = simulator.stats()
-    print("\nExecution statistics:")
-    for k, v in sim_stats.items():
-        # Since we execute the workload many times, we need to normalize stats
-        # Note that there is always one warm up run
-        # Therefore we divide the overall stats by (num * rep + 1)
-        print("\t{:<16}: {:>16}".format(k, v // (num * rep + 1)))
-else:
-    tcost = timer()
-    std = np.std(tcost.results) * 1000
-    mean = tcost.mean * 1000
-    print("\nPerformed inference in %.2fms (std = %.2f) for %d samples" % 
(mean, std, env.BATCH))
-    print("Average per sample inference time: %.2fms" % (mean / env.BATCH))
-
-# Get classification results
-tvm_output = m.get_output(0, tvm.nd.empty((env.BATCH, 1000), "float32", 
remote.cpu(0)))
-for b in range(env.BATCH):
-    top_categories = np.argsort(tvm_output.numpy()[b])
-    # Report top-5 classification results
-    print("\n{} prediction for sample {}".format(model, b))
-    print("\t#1:", synset[top_categories[-1]])
-    print("\t#2:", synset[top_categories[-2]])
-    print("\t#3:", synset[top_categories[-3]])
-    print("\t#4:", synset[top_categories[-4]])
-    print("\t#5:", synset[top_categories[-5]])
-    # This just checks that one of the 5 top categories
-    # is one variety of cat; this is by no means an accurate
-    # assessment of how quantization affects classification
-    # accuracy but is meant to catch changes to the
-    # quantization pass that would accuracy in the CI.
-    cat_detected = False
-    for k in top_categories[-5:]:
-        if "cat" in synset[k]:
-            cat_detected = True
-    assert cat_detected

(tvm) branch main updated: [Doc] Remove MxNet related tutorials (#16572)

Reply via email to