This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 1a01102f87 [Doc] Remove MxNet related tutorials (#16572)
1a01102f87 is described below
commit 1a01102f876adec6f66c4a210e688601728fcd5a
Author: Siyuan Feng <[email protected]>
AuthorDate: Thu Feb 15 21:07:20 2024 +0800
[Doc] Remove MxNet related tutorials (#16572)
* [Doc] Remove MxNet related tutorials
As mxnet is retired, we remove related tutorials
and scripts first.
We will also remove mxnet frontend support in the future
---
apps/benchmark/adreno/adreno_gpu_bench_clml.py | 11 -
apps/benchmark/adreno/adreno_gpu_bench_texture.py | 11 -
apps/benchmark/util.py | 11 -
docs/conf.py | 1 -
gallery/how_to/compile_models/from_mxnet.py | 153 ------
.../how_to/deploy_models/deploy_model_on_nano.py | 30 +-
.../how_to/deploy_models/deploy_model_on_rasp.py | 32 +-
gallery/how_to/deploy_models/deploy_quantized.py | 172 -------
.../how_to/extend_tvm/bring_your_own_datatypes.py | 415 ----------------
.../tune_with_autoscheduler/tune_network_arm.py | 23 +-
.../tune_with_autoscheduler/tune_network_cuda.py | 20 +-
.../tune_with_autoscheduler/tune_network_mali.py | 19 +-
.../tune_with_autoscheduler/tune_network_x86.py | 31 +-
gallery/how_to/tune_with_autotvm/tune_relay_arm.py | 11 -
.../how_to/tune_with_autotvm/tune_relay_cuda.py | 11 -
.../tune_with_autotvm/tune_relay_mobile_gpu.py | 11 -
gallery/how_to/tune_with_autotvm/tune_relay_x86.py | 11 -
vta/scripts/tune_resnet.py | 377 ---------------
vta/tutorials/autotvm/README.txt | 2 -
vta/tutorials/autotvm/tune_alu_vta.py | 345 -------------
vta/tutorials/autotvm/tune_relay_vta.py | 538 ---------------------
vta/tutorials/frontend/deploy_classification.py | 313 ------------
22 files changed, 43 insertions(+), 2505 deletions(-)
diff --git a/apps/benchmark/adreno/adreno_gpu_bench_clml.py
b/apps/benchmark/adreno/adreno_gpu_bench_clml.py
index e045f60c3a..a7e2e5e9c2 100755
--- a/apps/benchmark/adreno/adreno_gpu_bench_clml.py
+++ b/apps/benchmark/adreno/adreno_gpu_bench_clml.py
@@ -84,17 +84,6 @@ def get_network(name, batch_size, dtype="float32"):
net, params = testing.squeezenet.get_workload(
batch_size=batch_size, version=version, dtype=dtype
)
- elif name == "mxnet":
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
-
- block = get_model("resnet18_v1", pretrained=True)
- net, params = relay.frontend.from_mxnet(block, shape={"data":
input_shape}, dtype=dtype)
- net = net["main"]
- net = relay.Function(
- net.params, relay.nn.softmax(net.body), None, net.type_params,
net.attrs
- )
- net = tvm.IRModule.from_expr(net)
else:
raise ValueError("Unsupported network: " + name)
diff --git a/apps/benchmark/adreno/adreno_gpu_bench_texture.py
b/apps/benchmark/adreno/adreno_gpu_bench_texture.py
index 7b10111186..5c4ee3bb6e 100755
--- a/apps/benchmark/adreno/adreno_gpu_bench_texture.py
+++ b/apps/benchmark/adreno/adreno_gpu_bench_texture.py
@@ -83,17 +83,6 @@ def get_network(name, batch_size, dtype="float32"):
net, params = testing.squeezenet.get_workload(
batch_size=batch_size, version=version, dtype=dtype
)
- elif name == "mxnet":
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
-
- block = get_model("resnet18_v1", pretrained=True)
- net, params = relay.frontend.from_mxnet(block, shape={"data":
input_shape}, dtype=dtype)
- net = net["main"]
- net = relay.Function(
- net.params, relay.nn.softmax(net.body), None, net.type_params,
net.attrs
- )
- net = tvm.IRModule.from_expr(net)
else:
raise ValueError("Unsupported network: " + name)
diff --git a/apps/benchmark/util.py b/apps/benchmark/util.py
index 01f0a11635..4e9bfa8d9e 100644
--- a/apps/benchmark/util.py
+++ b/apps/benchmark/util.py
@@ -72,17 +72,6 @@ def get_network(name, batch_size, dtype="float32"):
net, params = testing.squeezenet.get_workload(
batch_size=batch_size, version=version, dtype=dtype
)
- elif name == "mxnet":
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
-
- block = get_model("resnet18_v1", pretrained=True)
- net, params = relay.frontend.from_mxnet(block, shape={"data":
input_shape}, dtype=dtype)
- net = net["main"]
- net = relay.Function(
- net.params, relay.nn.softmax(net.body), None, net.type_params,
net.attrs
- )
- net = tvm.IRModule.from_expr(net)
else:
raise ValueError("Unsupported network: " + name)
diff --git a/docs/conf.py b/docs/conf.py
index d599b4fdc0..553aaf8a92 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -441,7 +441,6 @@ subsection_order = ExplicitOrder(
for p in [
tvm_path / "vta" / "tutorials" / "frontend",
tvm_path / "vta" / "tutorials" / "optimize",
- tvm_path / "vta" / "tutorials" / "autotvm",
]
)
diff --git a/gallery/how_to/compile_models/from_mxnet.py
b/gallery/how_to/compile_models/from_mxnet.py
deleted file mode 100644
index 132f098d92..0000000000
--- a/gallery/how_to/compile_models/from_mxnet.py
+++ /dev/null
@@ -1,153 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-.. _tutorial-from-mxnet:
-
-Compile MXNet Models
-====================
-**Author**: `Joshua Z. Zhang <https://zhreshold.github.io/>`_, \
- `Kazutaka Morita <https://github.com/kazum>`_
-
-This article is an introductory tutorial to deploy mxnet models with Relay. To
begin, we must install `mxnet`:
-
-.. code-block:: bash
-
- %%shell
- pip install mxnet
-
-or please refer to official installation guide.
-https://mxnet.apache.org/versions/master/install/index.html
-"""
-
-# some standard imports
-# sphinx_gallery_start_ignore
-# sphinx_gallery_requires_cuda = True
-# sphinx_gallery_end_ignore
-import sys
-import mxnet as mx
-import tvm
-import tvm.relay as relay
-import numpy as np
-
-######################################################################
-# Download Resnet18 model from Gluon Model Zoo
-# ---------------------------------------------
-# In this section, we download a pretrained imagenet model and classify an
image.
-from tvm.contrib.download import download_testdata
-from mxnet.gluon.model_zoo.vision import get_model
-from PIL import Image
-from matplotlib import pyplot as plt
-
-try:
- block = get_model("resnet18_v1", pretrained=True)
-except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- sys.exit(0)
-
-img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
-img_name = "cat.png"
-synset_url = "".join(
- [
- "https://gist.githubusercontent.com/zhreshold/",
- "4d0b62f3d01426887599d4f7ede23ee5/raw/",
- "596b27d23537e5a1b5751d2b0481ef172f58b539/",
- "imagenet1000_clsid_to_human.txt",
- ]
-)
-synset_name = "imagenet1000_clsid_to_human.txt"
-img_path = download_testdata(img_url, "cat.png", module="data")
-synset_path = download_testdata(synset_url, synset_name, module="data")
-with open(synset_path) as f:
- synset = eval(f.read())
-image = Image.open(img_path).resize((224, 224))
-plt.imshow(image)
-plt.show()
-
-
-def transform_image(image):
- image = np.array(image) - np.array([123.0, 117.0, 104.0])
- image /= np.array([58.395, 57.12, 57.375])
- image = image.transpose((2, 0, 1))
- image = image[np.newaxis, :]
- return image
-
-
-x = transform_image(image)
-print("x", x.shape)
-
-######################################################################
-# Compile the Graph
-# -----------------
-# Now we would like to port the Gluon model to a portable computational graph.
-# It's as easy as several lines.
-# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
-shape_dict = {"data": x.shape}
-mod, params = relay.frontend.from_mxnet(block, shape_dict)
-## we want a probability so add a softmax operator
-func = mod["main"]
-func = relay.Function(func.params, relay.nn.softmax(func.body), None,
func.type_params, func.attrs)
-
-######################################################################
-# now compile the graph
-target = "cuda"
-with tvm.transform.PassContext(opt_level=3):
- lib = relay.build(func, target, params=params)
-
-######################################################################
-# Execute the portable graph on TVM
-# ---------------------------------
-# Now, we would like to reproduce the same forward computation using TVM.
-from tvm.contrib import graph_executor
-
-dev = tvm.cuda(0)
-dtype = "float32"
-m = graph_executor.GraphModule(lib["default"](dev))
-# set inputs
-m.set_input("data", tvm.nd.array(x.astype(dtype)))
-# execute
-m.run()
-# get outputs
-tvm_output = m.get_output(0)
-top1 = np.argmax(tvm_output.numpy()[0])
-print("TVM prediction top-1:", top1, synset[top1])
-
-######################################################################
-# Use MXNet symbol with pretrained weights
-# ----------------------------------------
-# MXNet often use `arg_params` and `aux_params` to store network parameters
-# separately, here we show how to use these weights with existing API
-def block2symbol(block):
- data = mx.sym.Variable("data")
- sym = block(data)
- args = {}
- auxs = {}
- for k, v in block.collect_params().items():
- args[k] = mx.nd.array(v.data().asnumpy())
- return sym, args, auxs
-
-
-mx_sym, args, auxs = block2symbol(block)
-# usually we would save/load it as checkpoint
-mx.model.save_checkpoint("resnet18_v1", 0, mx_sym, args, auxs)
-# there are 'resnet18_v1-0000.params' and 'resnet18_v1-symbol.json' on disk
-
-######################################################################
-# for a normal mxnet model, we start from here
-mx_sym, args, auxs = mx.model.load_checkpoint("resnet18_v1", 0)
-# now we use the same API to get Relay computation graph
-mod, relay_params = relay.frontend.from_mxnet(mx_sym, shape_dict,
arg_params=args, aux_params=auxs)
-# repeat the same steps to run this model using TVM
diff --git a/gallery/how_to/deploy_models/deploy_model_on_nano.py
b/gallery/how_to/deploy_models/deploy_model_on_nano.py
index 761187e2d7..a656159546 100644
--- a/gallery/how_to/deploy_models/deploy_model_on_nano.py
+++ b/gallery/how_to/deploy_models/deploy_model_on_nano.py
@@ -102,22 +102,22 @@ from tvm.contrib.download import download_testdata
# -----------------------------
# Back to the host machine, which should have a full TVM installed (with LLVM).
#
-# We will use pre-trained model from
-# `MXNet Gluon model zoo
<https://mxnet.apache.org/api/python/gluon/model_zoo.html>`_.
-# You can found more details about this part at tutorial
:ref:`tutorial-from-mxnet`.
+# We will use pre-trained model from torchvision
-import sys
-
-from mxnet.gluon.model_zoo.vision import get_model
+import torch
+import torchvision
from PIL import Image
import numpy as np
# one line to get the model
-try:
- block = get_model("resnet18_v1", pretrained=True)
-except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- sys.exit(0)
+model_name = "resnet18"
+model = getattr(torchvision.models, model_name)(pretrained=True)
+model = model.eval()
+
+# We grab the TorchScripted model via tracing
+input_shape = [1, 3, 224, 224]
+input_data = torch.randn(input_shape)
+scripted_model = torch.jit.trace(model, input_data).eval()
######################################################################
# In order to test our model, here we download an image of cat and
@@ -158,9 +158,9 @@ with open(synset_path) as f:
# Now we would like to port the Gluon model to a portable computational graph.
# It's as easy as several lines.
-# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
-shape_dict = {"data": x.shape}
-mod, params = relay.frontend.from_mxnet(block, shape_dict)
+input_name = "input0"
+shape_list = [(input_name, x.shape)]
+mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
# we want a probability so add a softmax operator
func = mod["main"]
func = relay.Function(func.params, relay.nn.softmax(func.body), None,
func.type_params, func.attrs)
@@ -241,7 +241,7 @@ else:
module = runtime.GraphModule(rlib["default"](dev))
# set input data
-module.set_input("data", tvm.nd.array(x.astype("float32")))
+module.set_input(input_name, tvm.nd.array(x.astype("float32")))
# run
module.run()
# get output
diff --git a/gallery/how_to/deploy_models/deploy_model_on_rasp.py
b/gallery/how_to/deploy_models/deploy_model_on_rasp.py
index 5196ae9ce1..64f83dbbc0 100644
--- a/gallery/how_to/deploy_models/deploy_model_on_rasp.py
+++ b/gallery/how_to/deploy_models/deploy_model_on_rasp.py
@@ -95,22 +95,22 @@ from tvm.contrib.download import download_testdata
# -----------------------------
# Back to the host machine, which should have a full TVM installed (with LLVM).
#
-# We will use pre-trained model from
-# `MXNet Gluon model zoo
<https://mxnet.apache.org/api/python/gluon/model_zoo.html>`_.
-# You can found more details about this part at tutorial
:ref:`tutorial-from-mxnet`.
+# We will use pre-trained model from torchvision
-import sys
-
-from mxnet.gluon.model_zoo.vision import get_model
+import torch
+import torchvision
from PIL import Image
import numpy as np
# one line to get the model
-try:
- block = get_model("resnet18_v1", pretrained=True)
-except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- sys.exit(0)
+model_name = "resnet18"
+model = getattr(torchvision.models, model_name)(pretrained=True)
+model = model.eval()
+
+# We grab the TorchScripted model via tracing
+input_shape = [1, 3, 224, 224]
+input_data = torch.randn(input_shape)
+scripted_model = torch.jit.trace(model, input_data).eval()
######################################################################
# In order to test our model, here we download an image of cat and
@@ -148,12 +148,12 @@ with open(synset_path) as f:
synset = eval(f.read())
######################################################################
-# Now we would like to port the Gluon model to a portable computational graph.
+# Now we would like to port the PyTorch model to a portable computational
graph.
# It's as easy as several lines.
-# We support MXNet static graph(symbol) and HybridBlock in mxnet.gluon
-shape_dict = {"data": x.shape}
-mod, params = relay.frontend.from_mxnet(block, shape_dict)
+input_name = "input0"
+shape_list = [(input_name, x.shape)]
+mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
# we want a probability so add a softmax operator
func = mod["main"]
func = relay.Function(func.params, relay.nn.softmax(func.body), None,
func.type_params, func.attrs)
@@ -226,7 +226,7 @@ rlib = remote.load_module("net.tar")
dev = remote.cpu(0)
module = runtime.GraphModule(rlib["default"](dev))
# set input data
-module.set_input("data", tvm.nd.array(x.astype("float32")))
+module.set_input(input_name, tvm.nd.array(x.astype("float32")))
# run
module.run()
# get output
diff --git a/gallery/how_to/deploy_models/deploy_quantized.py
b/gallery/how_to/deploy_models/deploy_quantized.py
deleted file mode 100644
index 2cdb7da5f8..0000000000
--- a/gallery/how_to/deploy_models/deploy_quantized.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Deploy a Quantized Model on Cuda
-================================
-**Author**: `Wuwei Lin <https://github.com/vinx13>`_
-
-This article is an introductory tutorial of automatic quantization with TVM.
-Automatic quantization is one of the quantization modes in TVM. More details on
-the quantization story in TVM can be found
-`here <https://discuss.tvm.apache.org/t/quantization-story/3920>`_.
-In this tutorial, we will import a GluonCV pre-trained model on ImageNet to
-Relay, quantize the Relay model and then perform the inference.
-"""
-
-import logging
-import os
-import sys
-
-import tvm
-from tvm import te
-from tvm import relay
-import mxnet as mx
-from tvm.contrib.download import download_testdata
-from mxnet import gluon
-
-
-batch_size = 1
-model_name = "resnet18_v1"
-target = "cuda"
-dev = tvm.device(target)
-
-###############################################################################
-# Prepare the Dataset
-# -------------------
-# We will demonstrate how to prepare the calibration dataset for quantization.
-# We first download the validation set of ImageNet and pre-process the dataset.
-calibration_rec = download_testdata(
-
"http://data.mxnet.io.s3-website-us-west-1.amazonaws.com/data/val_256_q90.rec",
- "val_256_q90.rec",
-)
-
-
-def get_val_data(num_workers=4):
- mean_rgb = [123.68, 116.779, 103.939]
- std_rgb = [58.393, 57.12, 57.375]
-
- def batch_fn(batch):
- return batch.data[0].asnumpy(), batch.label[0].asnumpy()
-
- img_size = 299 if model_name == "inceptionv3" else 224
- val_data = mx.io.ImageRecordIter(
- path_imgrec=calibration_rec,
- preprocess_threads=num_workers,
- shuffle=False,
- batch_size=batch_size,
- resize=256,
- data_shape=(3, img_size, img_size),
- mean_r=mean_rgb[0],
- mean_g=mean_rgb[1],
- mean_b=mean_rgb[2],
- std_r=std_rgb[0],
- std_g=std_rgb[1],
- std_b=std_rgb[2],
- )
- return val_data, batch_fn
-
-
-###############################################################################
-# The calibration dataset should be an iterable object. We define the
-# calibration dataset as a generator object in Python. In this tutorial, we
-# only use a few samples for calibration.
-
-calibration_samples = 10
-
-
-def calibrate_dataset():
- val_data, batch_fn = get_val_data()
- val_data.reset()
- for i, batch in enumerate(val_data):
- if i * batch_size >= calibration_samples:
- break
- data, _ = batch_fn(batch)
- yield {"data": data}
-
-
-###############################################################################
-# Import the model
-# ----------------
-# We use the Relay MxNet frontend to import a model from the Gluon model zoo.
-def get_model():
- gluon_model = gluon.model_zoo.vision.get_model(model_name, pretrained=True)
- img_size = 299 if model_name == "inceptionv3" else 224
- data_shape = (batch_size, 3, img_size, img_size)
- mod, params = relay.frontend.from_mxnet(gluon_model, {"data": data_shape})
- return mod, params
-
-
-###############################################################################
-# Quantize the Model
-# ------------------
-# In quantization, we need to find the scale for each weight and intermediate
-# feature map tensor of each layer.
-#
-# For weights, the scales are directly calculated based on the value of the
-# weights. Two modes are supported: `power2` and `max`. Both modes find the
-# maximum value within the weight tensor first. In `power2` mode, the maximum
-# is rounded down to power of two. If the scales of both weights and
-# intermediate feature maps are power of two, we can leverage bit shifting for
-# multiplications. This make it computationally more efficient. In `max` mode,
-# the maximum is used as the scale. Without rounding, `max` mode might have
-# better accuracy in some cases. When the scales are not powers of two, fixed
-# point multiplications will be used.
-#
-# For intermediate feature maps, we can find the scales with data-aware
-# quantization. Data-aware quantization takes a calibration dataset as the
-# input argument. Scales are calculated by minimizing the KL divergence between
-# distribution of activation before and after quantization.
-# Alternatively, we can also use pre-defined global scales. This saves the time
-# for calibration. But the accuracy might be impacted.
-
-
-def quantize(mod, params, data_aware):
- if data_aware:
- with relay.quantize.qconfig(calibrate_mode="kl_divergence",
weight_scale="max"):
- mod = relay.quantize.quantize(mod, params,
dataset=calibrate_dataset())
- else:
- with relay.quantize.qconfig(calibrate_mode="global_scale",
global_scale=8.0):
- mod = relay.quantize.quantize(mod, params)
- return mod
-
-
-###############################################################################
-# Run Inference
-# -------------
-# We create a Relay VM to build and execute the model.
-def run_inference(mod):
- model = relay.create_executor("vm", mod, dev, target).evaluate()
- val_data, batch_fn = get_val_data()
- for i, batch in enumerate(val_data):
- data, label = batch_fn(batch)
- prediction = model(data)
- if i > 10: # only run inference on a few samples in this tutorial
- break
-
-
-def main():
- try:
- mod, params = get_model()
- except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- return
- mod = quantize(mod, params, data_aware=True)
- run_inference(mod)
-
-
-if __name__ == "__main__":
- main()
diff --git a/gallery/how_to/extend_tvm/bring_your_own_datatypes.py
b/gallery/how_to/extend_tvm/bring_your_own_datatypes.py
deleted file mode 100644
index e502aff3e0..0000000000
--- a/gallery/how_to/extend_tvm/bring_your_own_datatypes.py
+++ /dev/null
@@ -1,415 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Bring Your Own Datatypes to TVM
-===============================
-**Authors**: `Gus Smith <https://github.com/gussmith23>`_, `Andrew Liu
<https://github.com/hypercubestart>`_
-
-In this tutorial, we will show you how to utilize the Bring Your Own Datatypes
framework to use your own custom datatypes in TVM.
-Note that the Bring Your Own Datatypes framework currently only handles
**software emulated versions of datatypes**.
-The framework does not support compiling for custom accelerator datatypes
out-of-the-box.
-
-Datatype Libraries
-------------------
-
-The Bring Your Own Datatypes allows users to register their own datatype
implementations alongside TVM's native datatypes (such as ``float``).
-In the wild, these datatype implementations often appear as libraries.
-For example:
-
-- `libposit <https://github.com/cjdelisle/libposit>`_, a posit library
-- `Stillwater Universal <https://github.com/stillwater-sc/universal>`_, a
library with posits, fixed-point numbers, and other types
-- `SoftFloat <https://github.com/ucb-bar/berkeley-softfloat-3>`_, Berkeley's
software implementation of IEEE 754 floating-point
-
-The Bring Your Own Datatypes enables users to plug these datatype
implementations into TVM!
-
-In this section, we will use an example library we have already implemented,
located at ``3rdparty/byodt/myfloat.cc``.
-This datatype, which we dubbed "myfloat", is really just a IEE-754 float
under-the-hood, but it serves a useful example
-to show that any datatype can be used in the BYODT framework.
-
-Setup
------
-
-Since we do not use any 3rdparty library, there is no setup needed.
-
-If you would like to try this with your own datatype library, first bring the
library's functions into the process space with ``CDLL``:
-
-.. code-block:: python
-
- ctypes.CDLL('my-datatype-lib.so', ctypes.RTLD_GLOBAL)
-"""
-
-
-######################
-# A Simple TVM Program
-# --------------------
-#
-# We'll begin by writing a simple program in TVM; afterwards, we will re-write
it to use custom datatypes.
-import sys
-
-import tvm
-from tvm import relay
-
-# Our basic program: Z = X + Y
-x = relay.var("x", shape=(3,), dtype="float32")
-y = relay.var("y", shape=(3,), dtype="float32")
-z = x + y
-program = relay.Function([x, y], z)
-module = tvm.IRModule.from_expr(program)
-
-######################################################################
-# Now, we create random inputs to feed into this program using numpy:
-
-import numpy as np
-
-np.random.seed(23) # for reproducibility
-
-x_input = np.random.rand(3).astype("float32")
-y_input = np.random.rand(3).astype("float32")
-print("x: {}".format(x_input))
-print("y: {}".format(y_input))
-
-######################################################################
-# Finally, we're ready to run the program:
-
-z_output = relay.create_executor(mod=module).evaluate()(x_input, y_input)
-print("z: {}".format(z_output))
-
-######################################################################
-# Adding Custom Datatypes
-# -----------------------
-# Now, we will do the same, but we will use a custom datatype for our
intermediate computation.
-#
-# We use the same input variables ``x`` and ``y`` as above, but before adding
``x + y``, we first cast both ``x`` and ``y`` to a custom datatype via the
``relay.cast(...)`` call.
-#
-# Note how we specify the custom datatype: we indicate it using the special
``custom[...]`` syntax.
-# Additionally, note the "32" after the datatype: this is the bitwidth of the
custom datatype. This tells TVM that each instance of ``myfloat`` is 32 bits
wide.
-
-try:
- with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
- x_myfloat = relay.cast(x, dtype="custom[myfloat]32")
- y_myfloat = relay.cast(y, dtype="custom[myfloat]32")
- z_myfloat = x_myfloat + y_myfloat
- z = relay.cast(z_myfloat, dtype="float32")
-except tvm.TVMError as e:
- # Print last line of error
- print(str(e).split("\n")[-1])
-
-######################################################################
-# Trying to generate this program throws an error from TVM.
-# TVM does not know how to handle any custom datatype out of the box!
-# We first have to register the custom type with TVM, giving it a name and a
type code:
-
-tvm.target.datatype.register("myfloat", 150)
-
-######################################################################
-# Note that the type code, 150, is currently chosen manually by the user.
-# See ``TVMTypeCode::kCustomBegin`` in `include/tvm/runtime/c_runtime_api.h
<https://github.com/apache/tvm/blob/main/include/tvm/runtime/data_type.h>`_.
-# Now we can generate our program again:
-
-x_myfloat = relay.cast(x, dtype="custom[myfloat]32")
-y_myfloat = relay.cast(y, dtype="custom[myfloat]32")
-z_myfloat = x_myfloat + y_myfloat
-z = relay.cast(z_myfloat, dtype="float32")
-program = relay.Function([x, y], z)
-module = tvm.IRModule.from_expr(program)
-module = relay.transform.InferType()(module)
-
-######################################################################
-# Now we have a Relay program that uses myfloat!
-print(program)
-
-######################################################################
-# Now that we can express our program without errors, let's try running it!
-try:
- with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
- z_output_myfloat = relay.create_executor("graph",
mod=module).evaluate()(x_input, y_input)
- print("z: {}".format(y_myfloat))
-except tvm.TVMError as e:
- # Print last line of error
- print(str(e).split("\n")[-1])
-
-######################################################################
-# Now, trying to compile this program throws an error.
-# Let's dissect this error.
-#
-# The error is occurring during the process of lowering the custom datatype
code to code that TVM can compile and run.
-# TVM is telling us that it cannot find a *lowering function* for the ``Cast``
operation, when casting from source type 2 (``float``, in TVM), to destination
type 150 (our custom datatype).
-# When lowering custom datatypes, if TVM encounters an operation over a custom
datatype, it looks for a user-registered *lowering function*, which tells it
how to lower the operation to an operation over datatypes it understands.
-# We have not told TVM how to lower ``Cast`` operations for our custom
datatypes; thus, the source of this error.
-#
-# To fix this error, we simply need to specify a lowering function:
-
-tvm.target.datatype.register_op(
- tvm.target.datatype.create_lower_func(
- {
- (32, 32): "FloatToCustom32", # cast from float32 to myfloat32
- }
- ),
- "Cast",
- "llvm",
- "float",
- "myfloat",
-)
-
-######################################################################
-# The ``register_op(...)`` call takes a lowering function, and a number of
parameters which specify exactly the operation which should be lowered with the
provided lowering function.
-# In this case, the arguments we pass specify that this lowering function is
for lowering a ``Cast`` from ``float`` to ``myfloat`` for target ``"llvm"``.
-#
-# The lowering function passed into this call is very general: it should take
an operation of the specified type (in this case, `Cast`) and return another
operation which only uses datatypes which TVM understands.
-#
-# In the general case, we expect users to implement operations over their
custom datatypes using calls to an external library.
-# In our example, our ``myfloat`` library implements a ``Cast`` from ``float``
to 32-bit ``myfloat`` in the function ``FloatToCustom32``.
-# To provide for the general case, we have made a helper function,
``create_lower_func(...)``,
-# which does just this: given a dictionary, it replaces the given operation
with a ``Call`` to the appropriate function name provided based on the op and
the bit widths.
-# It additionally removes usages of the custom datatype by storing the custom
datatype in an opaque ``uint`` of the appropriate width; in our case, a
``uint32_t``.
-# For more information, see `the source code
<https://github.com/apache/tvm/blob/main/python/tvm/target/datatype.py>`_.
-
-# We can now re-try running the program:
-try:
- with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
- z_output_myfloat = relay.create_executor("graph",
mod=module).evaluate()(x_input, y_input)
- print("z: {}".format(z_output_myfloat))
-except tvm.TVMError as e:
- # Print last line of error
- print(str(e).split("\n")[-1])
-
-######################################################################
-# This new error tells us that the ``Add`` lowering function is not found,
which is good news, as it's no longer complaining about the ``Cast``!
-# We know what to do from here: we just need to register the lowering
functions for the other operations in our program.
-#
-# Note that for ``Add``, ``create_lower_func`` takes in a dict where the key
is an integer.
-# For ``Cast`` operations, we require a 2-tuple to specify the
``src_bit_length`` and the ``dest_bit_length``,
-# while for all other operations, the bit length is the same between the
operands so we only require one integer to specify ``bit_length``.
-tvm.target.datatype.register_op(
- tvm.target.datatype.create_lower_func({32: "Custom32Add"}),
- "Add",
- "llvm",
- "myfloat",
-)
-tvm.target.datatype.register_op(
- tvm.target.datatype.create_lower_func({(32, 32): "Custom32ToFloat"}),
- "Cast",
- "llvm",
- "myfloat",
- "float",
-)
-
-# Now, we can run our program without errors.
-with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
- z_output_myfloat = relay.create_executor(mod=module).evaluate()(x_input,
y_input)
-print("z: {}".format(z_output_myfloat))
-
-print("x:\t\t{}".format(x_input))
-print("y:\t\t{}".format(y_input))
-print("z (float32):\t{}".format(z_output))
-print("z (myfloat32):\t{}".format(z_output_myfloat))
-
-# Perhaps as expected, the ``myfloat32`` results and ``float32`` are exactly
the same!
-
-######################################################################
-# Running Models With Custom Datatypes
-# ------------------------------------
-#
-# We will first choose the model which we would like to run with myfloat.
-# In this case we use `Mobilenet <https://arxiv.org/abs/1704.04861>`_.
-# We choose Mobilenet due to its small size.
-# In this alpha state of the Bring Your Own Datatypes framework, we have not
implemented any software optimizations for running software emulations of
custom datatypes; the result is poor performance due to many calls into our
datatype emulation library.
-#
-# First let us define two helper functions to get the mobilenet model and a
cat image.
-
-
-def get_mobilenet():
- dshape = (1, 3, 224, 224)
- from mxnet.gluon.model_zoo.vision import get_model
-
- block = get_model("mobilenet0.25", pretrained=True)
- shape_dict = {"data": dshape}
- return relay.frontend.from_mxnet(block, shape_dict)
-
-
-def get_cat_image():
- from tvm.contrib.download import download_testdata
- from PIL import Image
-
- url =
"https://gist.githubusercontent.com/zhreshold/bcda4716699ac97ea44f791c24310193/raw/fa7ef0e9c9a5daea686d6473a62aacd1a5885849/cat.png"
- dst = "cat.png"
- real_dst = download_testdata(url, dst, module="data")
- img = Image.open(real_dst).resize((224, 224))
- # CoreML's standard model image format is BGR
- img_bgr = np.array(img)[:, :, ::-1]
- img = np.transpose(img_bgr, (2, 0, 1))[np.newaxis, :]
- return np.asarray(img, dtype="float32")
-
-
-try:
- module, params = get_mobilenet()
-except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- sys.exit(0)
-
-######################################################################
-# It's easy to execute MobileNet with native TVM:
-
-ex = tvm.relay.create_executor("graph", mod=module, params=params)
-input = get_cat_image()
-result = ex.evaluate()(input).numpy()
-# print first 10 elements
-print(result.flatten()[:10])
-
-######################################################################
-# Now, we would like to change the model to use myfloat internally. To do so,
we need to convert the network. To do this, we first define a function which
will help us convert tensors:
-
-
-def convert_ndarray(dst_dtype, array):
- """Converts an NDArray into the specified datatype"""
- x = relay.var("x", shape=array.shape, dtype=str(array.dtype))
- cast = relay.Function([x], x.astype(dst_dtype))
- with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
- return relay.create_executor("graph").evaluate(cast)(array)
-
-
-######################################################################
-# Now, to actually convert the entire network, we have written `a pass in
Relay
<https://github.com/gussmith23/tvm/blob/ea174c01c54a2529e19ca71e125f5884e728da6e/python/tvm/relay/frontend/change_datatype.py#L21>`_
which simply converts all nodes within the model to use the new datatype.
-
-from tvm.relay.frontend.change_datatype import ChangeDatatype
-
-src_dtype = "float32"
-dst_dtype = "custom[myfloat]32"
-
-module = relay.transform.InferType()(module)
-
-# Currently, custom datatypes only work if you run simplify_inference
beforehand
-module = tvm.relay.transform.SimplifyInference()(module)
-
-# Run type inference before changing datatype
-module = tvm.relay.transform.InferType()(module)
-
-# Change datatype from float to myfloat and re-infer types
-cdtype = ChangeDatatype(src_dtype, dst_dtype)
-expr = cdtype.visit(module["main"])
-module = tvm.relay.transform.InferType()(module)
-
-# We also convert the parameters:
-params = {k: convert_ndarray(dst_dtype, v) for k, v in params.items()}
-
-# We also need to convert our input:
-input = convert_ndarray(dst_dtype, input)
-
-# Finally, we can try to run the converted model:
-try:
- # Vectorization is not implemented with custom datatypes.
- with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
- result_myfloat = tvm.relay.create_executor("graph",
mod=module).evaluate(expr)(
- input, **params
- )
-except tvm.TVMError as e:
- print(str(e).split("\n")[-1])
-
-######################################################################
-# When we attempt to run the model, we get a familiar error telling us that
more functions need to be registered for myfloat.
-#
-# Because this is a neural network, many more operations are required.
-# Here, we register all the needed functions:
-
-tvm.target.datatype.register_op(
- tvm.target.datatype.create_lower_func({32: "FloatToCustom32"}),
- "FloatImm",
- "llvm",
- "myfloat",
-)
-
-tvm.target.datatype.register_op(
- tvm.target.datatype.lower_ite, "Call", "llvm", "myfloat",
intrinsic_name="tir.if_then_else"
-)
-
-tvm.target.datatype.register_op(
- tvm.target.datatype.lower_call_pure_extern,
- "Call",
- "llvm",
- "myfloat",
- intrinsic_name="tir.call_pure_extern",
-)
-
-tvm.target.datatype.register_op(
- tvm.target.datatype.create_lower_func({32: "Custom32Mul"}),
- "Mul",
- "llvm",
- "myfloat",
-)
-tvm.target.datatype.register_op(
- tvm.target.datatype.create_lower_func({32: "Custom32Div"}),
- "Div",
- "llvm",
- "myfloat",
-)
-
-tvm.target.datatype.register_op(
- tvm.target.datatype.create_lower_func({32: "Custom32Sqrt"}),
- "Call",
- "llvm",
- "myfloat",
- intrinsic_name="tir.sqrt",
-)
-
-tvm.target.datatype.register_op(
- tvm.target.datatype.create_lower_func({32: "Custom32Sub"}),
- "Sub",
- "llvm",
- "myfloat",
-)
-
-tvm.target.datatype.register_op(
- tvm.target.datatype.create_lower_func({32: "Custom32Exp"}),
- "Call",
- "llvm",
- "myfloat",
- intrinsic_name="tir.exp",
-)
-
-tvm.target.datatype.register_op(
- tvm.target.datatype.create_lower_func({32: "Custom32Max"}),
- "Max",
- "llvm",
- "myfloat",
-)
-
-tvm.target.datatype.register_min_func(
- tvm.target.datatype.create_min_lower_func({32: "MinCustom32"}, "myfloat"),
- "myfloat",
-)
-
-######################################################################
-# Note we are making use of two new functions: ``register_min_func`` and
``create_min_lower_func``.
-#
-# ``register_min_func`` takes in an integer ``num_bits`` for the bit length,
and should return an operation
-# representing the minimum finite representable value for the custom data type
with the specified bit length.
-#
-# Similar to ``register_op`` and ``create_lower_func``, the
``create_min_lower_func`` handles the general case
-# where the minimum representable custom datatype value is implemented using
calls to an external library.
-#
-# Now we can finally run the model:
-
-# Vectorization is not implemented with custom datatypes.
-with tvm.transform.PassContext(config={"tir.disable_vectorize": True}):
- result_myfloat = relay.create_executor(mod=module).evaluate(expr)(input,
**params)
- result_myfloat = convert_ndarray(src_dtype, result_myfloat).numpy()
- # print first 10 elements
- print(result_myfloat.flatten()[:10])
-
-# Again, note that the output using 32-bit myfloat exactly the same as 32-bit
floats,
-# because myfloat is exactly a float!
-np.testing.assert_array_equal(result, result_myfloat)
diff --git a/gallery/how_to/tune_with_autoscheduler/tune_network_arm.py
b/gallery/how_to/tune_with_autoscheduler/tune_network_arm.py
index 0b59038f19..d795c3aba2 100644
--- a/gallery/how_to/tune_with_autoscheduler/tune_network_arm.py
+++ b/gallery/how_to/tune_with_autoscheduler/tune_network_arm.py
@@ -120,19 +120,6 @@ def get_network(name, batch_size, layout="NHWC",
dtype="float32", use_sparse=Fal
elif name == "inception_v3":
input_shape = (batch_size, 3, 299, 299) if layout == "NCHW" else
(batch_size, 299, 299, 3)
mod, params =
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
- elif name == "mxnet":
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
-
- assert layout == "NCHW"
-
- block = get_model("resnet50_v1", pretrained=True)
- mod, params = relay.frontend.from_mxnet(block, shape={"data":
input_shape}, dtype=dtype)
- net = mod["main"]
- net = relay.Function(
- net.params, relay.nn.softmax(net.body), None, net.type_params,
net.attrs
- )
- mod = tvm.IRModule.from_expr(net)
elif name == "mlp":
mod, params = relay.testing.mlp.get_workload(
batch_size=batch_size, dtype=dtype, image_shape=image_shape,
num_classes=1000
@@ -265,13 +252,9 @@ log_file = "%s-%s-B%d-%s.json" % (network, layout,
batch_size, target.kind.name)
# Extract tasks from the network
print("Get model...")
-try:
- mod, params, input_shape, output_shape = get_network(
- network, batch_size, layout, dtype=dtype, use_sparse=use_sparse
- )
-except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- sys.exit(0)
+mod, params, input_shape, output_shape = get_network(
+ network, batch_size, layout, dtype=dtype, use_sparse=use_sparse
+)
print("Extract tasks...")
tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
diff --git a/gallery/how_to/tune_with_autoscheduler/tune_network_cuda.py
b/gallery/how_to/tune_with_autoscheduler/tune_network_cuda.py
index 41e7e8fb41..1f8c0cc13a 100644
--- a/gallery/how_to/tune_with_autoscheduler/tune_network_cuda.py
+++ b/gallery/how_to/tune_with_autoscheduler/tune_network_cuda.py
@@ -114,20 +114,6 @@ def get_network(name, batch_size, layout="NHWC",
dtype="float32"):
elif name == "inception_v3":
input_shape = (batch_size, 3, 299, 299) if layout == "NCHW" else
(batch_size, 299, 299, 3)
mod, params =
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
- elif name == "mxnet":
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
-
- assert layout == "NCHW"
-
- block = get_model("resnet18_v1", pretrained=True)
- mod, params = relay.frontend.from_mxnet(block, shape={"data":
input_shape}, dtype=dtype)
- net = mod["main"]
- net = relay.Function(
- net.params, relay.nn.softmax(net.body), None, net.type_params,
net.attrs
- )
- mod = tvm.IRModule.from_expr(net)
-
return mod, params, input_shape, output_shape
@@ -152,11 +138,7 @@ log_file = "%s-%s-B%d-%s.json" % (network, layout,
batch_size, target.kind.name)
# Extract tasks from the network
print("Extract tasks...")
-try:
- mod, params, input_shape, output_shape = get_network(network, batch_size,
layout, dtype=dtype)
-except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- sys.exit(0)
+mod, params, input_shape, output_shape = get_network(network, batch_size,
layout, dtype=dtype)
tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
for idx, task in enumerate(tasks):
diff --git a/gallery/how_to/tune_with_autoscheduler/tune_network_mali.py
b/gallery/how_to/tune_with_autoscheduler/tune_network_mali.py
index 1c531a5303..15f3379013 100644
--- a/gallery/how_to/tune_with_autoscheduler/tune_network_mali.py
+++ b/gallery/how_to/tune_with_autoscheduler/tune_network_mali.py
@@ -117,19 +117,6 @@ def get_network(name, batch_size, layout="NHWC",
dtype="float32"):
elif name == "inception_v3":
input_shape = (batch_size, 3, 299, 299) if layout == "NCHW" else
(batch_size, 299, 299, 3)
mod, params =
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
- elif name == "mxnet":
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
-
- assert layout == "NCHW"
-
- block = get_model("resnet50_v1", pretrained=True)
- mod, params = relay.frontend.from_mxnet(block, shape={"data":
input_shape}, dtype=dtype)
- net = mod["main"]
- net = relay.Function(
- net.params, relay.nn.softmax(net.body), None, net.type_params,
net.attrs
- )
- mod = tvm.IRModule.from_expr(net)
return mod, params, input_shape, output_shape
@@ -171,11 +158,7 @@ device_key = "rk3399"
# Extract tasks from the network
print("Extract tasks...")
-try:
- mod, params, input_shape, output_shape = get_network(network, batch_size,
layout, dtype=dtype)
-except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- sys.exit(0)
+mod, params, input_shape, output_shape = get_network(network, batch_size,
layout, dtype=dtype)
tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
for idx, task in enumerate(tasks):
diff --git a/gallery/how_to/tune_with_autoscheduler/tune_network_x86.py
b/gallery/how_to/tune_with_autoscheduler/tune_network_x86.py
index 96df3942ab..169567122f 100644
--- a/gallery/how_to/tune_with_autoscheduler/tune_network_x86.py
+++ b/gallery/how_to/tune_with_autoscheduler/tune_network_x86.py
@@ -117,19 +117,6 @@ def get_network(name, batch_size, layout="NHWC",
dtype="float32", use_sparse=Fal
elif name == "inception_v3":
input_shape = (batch_size, 3, 299, 299) if layout == "NCHW" else
(batch_size, 299, 299, 3)
mod, params =
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
- elif name == "mxnet":
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
-
- assert layout == "NCHW"
-
- block = get_model("resnet50_v1", pretrained=True)
- mod, params = relay.frontend.from_mxnet(block, shape={"data":
input_shape}, dtype=dtype)
- net = mod["main"]
- net = relay.Function(
- net.params, relay.nn.softmax(net.body), None, net.type_params,
net.attrs
- )
- mod = tvm.IRModule.from_expr(net)
elif name == "mlp":
mod, params = relay.testing.mlp.get_workload(
batch_size=batch_size, dtype=dtype, image_shape=image_shape,
num_classes=1000
@@ -169,17 +156,13 @@ log_file = "%s-%s-B%d-%s.json" % (network, layout,
batch_size, target.kind.name)
# Extract tasks from the network
print("Get model...")
-try:
- mod, params, input_shape, output_shape = get_network(
- network,
- batch_size,
- layout,
- dtype=dtype,
- use_sparse=use_sparse,
- )
-except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- sys.exit(0)
+mod, params, input_shape, output_shape = get_network(
+ network,
+ batch_size,
+ layout,
+ dtype=dtype,
+ use_sparse=use_sparse,
+)
print("Extract tasks...")
tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target)
diff --git a/gallery/how_to/tune_with_autotvm/tune_relay_arm.py
b/gallery/how_to/tune_with_autotvm/tune_relay_arm.py
index 4f2e952ce3..13674f5cda 100644
--- a/gallery/how_to/tune_with_autotvm/tune_relay_arm.py
+++ b/gallery/how_to/tune_with_autotvm/tune_relay_arm.py
@@ -105,17 +105,6 @@ def get_network(name, batch_size):
elif name == "inception_v3":
input_shape = (batch_size, 3, 299, 299)
mod, params =
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
- elif name == "mxnet":
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
-
- block = get_model("resnet18_v1", pretrained=True)
- mod, params = relay.frontend.from_mxnet(block, shape={"data":
input_shape}, dtype=dtype)
- net = mod["main"]
- net = relay.Function(
- net.params, relay.nn.softmax(net.body), None, net.type_params,
net.attrs
- )
- mod = tvm.IRModule.from_expr(net)
else:
raise ValueError("Unsupported network: " + name)
diff --git a/gallery/how_to/tune_with_autotvm/tune_relay_cuda.py
b/gallery/how_to/tune_with_autotvm/tune_relay_cuda.py
index 47ea99884d..53d56c709d 100644
--- a/gallery/how_to/tune_with_autotvm/tune_relay_cuda.py
+++ b/gallery/how_to/tune_with_autotvm/tune_relay_cuda.py
@@ -104,17 +104,6 @@ def get_network(name, batch_size):
elif name == "inception_v3":
input_shape = (batch_size, 3, 299, 299)
mod, params =
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
- elif name == "mxnet":
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
-
- block = get_model("resnet18_v1", pretrained=True)
- mod, params = relay.frontend.from_mxnet(block, shape={"data":
input_shape}, dtype=dtype)
- net = mod["main"]
- net = relay.Function(
- net.params, relay.nn.softmax(net.body), None, net.type_params,
net.attrs
- )
- mod = tvm.IRModule.from_expr(net)
else:
raise ValueError("Unsupported network: " + name)
diff --git a/gallery/how_to/tune_with_autotvm/tune_relay_mobile_gpu.py
b/gallery/how_to/tune_with_autotvm/tune_relay_mobile_gpu.py
index 3c2f173c23..d5b4b217ab 100644
--- a/gallery/how_to/tune_with_autotvm/tune_relay_mobile_gpu.py
+++ b/gallery/how_to/tune_with_autotvm/tune_relay_mobile_gpu.py
@@ -104,17 +104,6 @@ def get_network(name, batch_size):
elif name == "inception_v3":
input_shape = (batch_size, 3, 299, 299)
mod, params =
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
- elif name == "mxnet":
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
-
- block = get_model("resnet18_v1", pretrained=True)
- mod, params = relay.frontend.from_mxnet(block, shape={"data":
input_shape}, dtype=dtype)
- net = mod["main"]
- net = relay.Function(
- net.params, relay.nn.softmax(net.body), None, net.type_params,
net.attrs
- )
- mod = tvm.IRModule.from_expr(net)
else:
raise ValueError("Unsupported network: " + name)
diff --git a/gallery/how_to/tune_with_autotvm/tune_relay_x86.py
b/gallery/how_to/tune_with_autotvm/tune_relay_x86.py
index a637f7222e..b56ec0ad0e 100644
--- a/gallery/how_to/tune_with_autotvm/tune_relay_x86.py
+++ b/gallery/how_to/tune_with_autotvm/tune_relay_x86.py
@@ -74,17 +74,6 @@ def get_network(name, batch_size):
elif name == "inception_v3":
input_shape = (batch_size, 3, 299, 299)
mod, params =
relay.testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype)
- elif name == "mxnet":
- # an example for mxnet model
- from mxnet.gluon.model_zoo.vision import get_model
-
- block = get_model("resnet18_v1", pretrained=True)
- mod, params = relay.frontend.from_mxnet(block, shape={input_name:
input_shape}, dtype=dtype)
- net = mod["main"]
- net = relay.Function(
- net.params, relay.nn.softmax(net.body), None, net.type_params,
net.attrs
- )
- mod = tvm.IRModule.from_expr(net)
else:
raise ValueError("Unsupported network: " + name)
diff --git a/vta/scripts/tune_resnet.py b/vta/scripts/tune_resnet.py
deleted file mode 100644
index 7fa6ec42ce..0000000000
--- a/vta/scripts/tune_resnet.py
+++ /dev/null
@@ -1,377 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""Perform ResNet autoTVM tuning on VTA using Relay."""
-
-import argparse, os, sys, time
-from mxnet.gluon.model_zoo import vision
-import numpy as np
-from PIL import Image
-
-from tvm import topi
-import tvm
-from tvm import te
-from tvm import rpc, autotvm, relay
-from tvm.autotvm.measure.measure_methods import request_remote
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.contrib import graph_executor, utils, download
-from tvm.contrib.debugger import debug_executor
-import vta
-from vta.testing import simulator
-from vta.top import graph_pack
-from tvm.autotvm.task import extract_from_program
-
-
-def parse_arguments():
-
- parser = argparse.ArgumentParser(description="Train a model for image
classification.")
- parser.add_argument(
- "--model",
- type=str,
- default="resnet18_v1",
- choices=["resnet18_v1"],
- help="Input model name.",
- )
- parser.add_argument(
- "--start-name",
- type=str,
- default="nn.max_pool2d",
- help="The name of the node where packing starts",
- )
- parser.add_argument(
- "--stop-name",
- type=str,
- default="nn.global_avg_pool2d",
- help="The name of the node where packing stops",
- )
- parser.add_argument(
- "--debug-profile", action="store_true", help="Show layer-wise time
cost profiling results"
- )
- parser.add_argument(
- "--device", default="vta", choices=["vta", "arm_cpu"], help="Select
device target"
- )
- parser.add_argument(
- "--measurements", type=int, default=1, help="Number of measurements
during AutoTVM search"
- )
- parser.add_argument("--tuner", type=str, default="random", help="AutoTVM
search strategy")
- parser.add_argument(
- "--log-filename", type=str, default="resnet-18.log", help="AutoTVM log
file name"
- )
-
- return parser.parse_args()
-
-
-def register_vta_tuning_tasks():
- from tvm.autotvm.task.topi_integration import TaskExtractEnv,
deserialize_args
-
- @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
- def my_clip(x, a_min, a_max):
- """Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.tir.const(a_min, x.dtype)
- const_max = tvm.tir.const(a_max, x.dtype)
- x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max),
name="clipA")
- x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min),
name="clipB")
- return x
-
- # init autotvm env to register VTA operator
- TaskExtractEnv()
-
- @autotvm.task.register("topi_nn_conv2d", override=True)
- def _topi_nn_conv2d(*args, **kwargs):
- assert not kwargs, "Do not support kwargs in template function call"
- args = deserialize_args(args)
- A, W = args[:2]
-
- with tvm.target.vta():
- res = topi.nn.conv2d(*args, **kwargs)
- res = topi.right_shift(res, 8)
- res = my_clip(res, 0, 127)
- res = topi.cast(res, "int8")
-
- if tvm.target.Target.current().device_name == "vta":
- s = topi.generic.schedule_conv2d_nchw([res])
- else:
- s = te.create_schedule([res.op])
- return s, [A, W, res]
-
- @autotvm.task.register("topi_nn_dense", override=True)
- def _topi_nn_dense(*args, **kwargs):
- assert not kwargs, "Do not support kwargs in template function call"
- args = deserialize_args(args)
- A, W = args[:2]
-
- with tvm.target.vta():
- res = topi.nn.dense(*args, **kwargs)
- res = topi.right_shift(res, 8)
- res = my_clip(res, 0, 127)
- res = topi.cast(res, "int8")
-
- if tvm.target.Target.current().device_name == "vta":
- s = topi.generic.schedule_dense([res])
- else:
- s = te.create_schedule([res.op])
-
- return s, [A, W, res]
-
-
-def compile_network(opt, env, target):
-
- # Populate the shape and data type dictionary
- dtype_dict = {"data": "float32"}
- shape_dict = {"data": (env.BATCH, 3, 224, 224)}
-
- # Get off the shelf gluon model, and convert to relay
- gluon_model = vision.get_model(opt.model, pretrained=True)
- mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)
-
- # Update shape and type dictionary
- shape_dict.update({k: v.shape for k, v in params.items()})
- dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
-
- # Perform quantization in Relay
- # Note: We set opt_level to 3 in order to fold batch norm
- with tvm.transform.PassContext(opt_level=3):
- with relay.quantize.qconfig(global_scale=8.0, skip_conv_layers=[0]):
- relay_prog = relay.quantize.quantize(mod["main"], params=params)
-
- # Perform graph packing and constant folding for VTA target
- if target.device_name == "vta":
- assert env.BLOCK_IN == env.BLOCK_OUT
- relay_prog = graph_pack(
- relay_prog,
- env.BATCH,
- env.BLOCK_OUT,
- env.WGT_WIDTH,
- start_name=opt.start_name,
- stop_name=opt.stop_name,
- )
-
- return relay_prog, params
-
-
-def tune_tasks(
- tasks,
- measure_option,
- tuner="xgb",
- n_trial=1000,
- early_stopping=None,
- log_filename="tuning.log",
- use_transfer_learning=True,
- try_winograd=True,
-):
-
- # create tmp log file
- tmp_log_file = log_filename + ".tmp"
- if os.path.exists(tmp_log_file):
- os.remove(tmp_log_file)
-
- for i, tsk in enumerate(reversed(tasks)):
- prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
-
- # create tuner
- if tuner == "xgb":
- tuner_obj = XGBTuner(tsk, loss_type="reg")
- elif tuner == "xgb_knob":
- tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="knob")
- elif tuner == "xgb_itervar":
- tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="itervar")
- elif tuner == "xgb_curve":
- tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="curve")
- elif tuner == "xgb_rank":
- tuner_obj = XGBTuner(tsk, loss_type="rank")
- elif tuner == "xgb_rank_knob":
- tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob")
- elif tuner == "xgb_rank_itervar":
- tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="itervar")
- elif tuner == "xgb_rank_curve":
- tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="curve")
- elif tuner == "xgb_rank_binary":
- tuner_obj = XGBTuner(tsk, loss_type="rank-binary")
- elif tuner == "xgb_rank_binary_knob":
- tuner_obj = XGBTuner(tsk, loss_type="rank-binary",
feature_type="knob")
- elif tuner == "xgb_rank_binary_itervar":
- tuner_obj = XGBTuner(tsk, loss_type="rank-binary",
feature_type="itervar")
- elif tuner == "xgb_rank_binary_curve":
- tuner_obj = XGBTuner(tsk, loss_type="rank-binary",
feature_type="curve")
- elif tuner == "ga":
- tuner_obj = GATuner(tsk, pop_size=50)
- elif tuner == "random":
- tuner_obj = RandomTuner(tsk)
- elif tuner == "gridsearch":
- tuner_obj = GridSearchTuner(tsk)
- else:
- raise ValueError("Invalid tuner: " + tuner)
-
- if use_transfer_learning:
- if os.path.isfile(tmp_log_file):
-
tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
-
- # do tuning
- n_trial_ = min(n_trial, len(tsk.config_space))
- tuner_obj.tune(
- n_trial_,
- early_stopping=early_stopping,
- measure_option=measure_option,
- callbacks=[
- autotvm.callback.progress_bar(n_trial_, prefix=prefix),
- autotvm.callback.log_to_file(tmp_log_file),
- ],
- )
-
- # pick best records to a cache file
- autotvm.record.pick_best(tmp_log_file, log_filename)
- os.remove(tmp_log_file)
-
-
-if __name__ == "__main__":
-
- opt = parse_arguments()
-
- # Make sure that TVM was compiled with RPC=1
- assert tvm.runtime.enabled("rpc")
-
- # Read in VTA environment
- env = vta.get_env()
-
- # Get remote from fleet node
- tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
- tracker_port = os.environ.get("TVM_TRACKER_PORT", None)
- if not tracker_host or not tracker_port:
- print("Set your AutoTVM tracker node host and port variables to run
the autotuner")
- exit()
-
- # Get remote
- if env.TARGET != "sim":
-
- # Measure build start time
- reconfig_start = time.time()
-
- # Get remote from fleet node
- remote = autotvm.measure.request_remote(
- env.TARGET, tracker_host, int(tracker_port), timeout=10000
- )
-
- # Reconfigure the JIT runtime and FPGA.
- # You can program the FPGA with your own custom bitstream
- # by passing the path to the bitstream file instead of None.
- vta.reconfig_runtime(remote)
- vta.program_fpga(remote, bitstream=None)
-
- # Report on reconfiguration time
- reconfig_time = time.time() - reconfig_start
- print("Reconfigured FPGA and RPC runtime in
{0:.2f}s!".format(reconfig_time))
-
- # In simulation mode, host the RPC server locally.
- else:
- remote = rpc.LocalSession()
-
- # VTA target and execution context
- target = env.target if opt.device == "vta" else env.target_vta_cpu
- ctx = remote.ext_dev(0) if opt.device == "vta" else remote.cpu(0)
-
- # Compile Relay program
- print("Initial compile...")
- try:
- relay_prog, params = compile_network(opt, env, target)
- except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- sys.exit(0)
-
- # Register VTA tuning tasks
- register_vta_tuning_tasks()
-
- # Perform task extraction on Relay program
- print("Extracting tasks...")
- tasks = extract_from_program(
- func=relay_prog,
- params=params,
- ops=(relay.op.get("nn.conv2d"),),
- target=tvm.target.Target(target, host=env.target_host),
- )
-
- # Perform Autotuning
- print("Tuning...")
- tuning_opt = {
- "log_filename": opt.log_filename,
- "tuner": opt.tuner,
- "n_trial": 1e9,
- "early_stopping": None,
- "measure_option": autotvm.measure_option(
-
builder=autotvm.LocalBuilder(build_func=vta.vta_autotvm_build_func),
- runner=autotvm.RPCRunner(
- env.TARGET,
- tracker_host,
- tracker_port,
- number=4,
- min_repeat_ms=150,
- repeat=opt.measurements,
- timeout=60,
- # check_correctness=True, # TODO: re-enable when
check_correctness works again.
- ),
- ),
- }
- tune_tasks(tasks, **tuning_opt)
-
- # Compile kernels with history best records
- with autotvm.tophub.context(target, extra_files=[opt.log_filename]):
-
- # Compile network
- print("Compiling network with best tuning parameters...")
- if target.device_name != "vta":
- with tvm.transform.PassContext(opt_level=3,
disabled_pass={"AlterOpLayout"}):
- graph, lib, params = relay.build(
- relay_prog,
- target=tvm.target.Target(target, host=env.target_host),
- params=params,
- )
- else:
- with vta.build_config(opt_level=3,
disabled_pass={"AlterOpLayout"}):
- graph, lib, params = relay.build(
- relay_prog,
- target=tvm.target.Target(target, host=env.target_host),
- params=params,
- )
-
- # Export library
- temp = utils.tempdir()
- lib.save(temp.relpath("graphlib.o"))
- remote.upload(temp.relpath("graphlib.o"))
- lib = remote.load_module("graphlib.o")
-
- # If detailed runtime info is needed build with debug runtime
- if opt.debug_profile:
- m = debug_executor.create(graph, lib, ctx)
- else:
- m = graph_executor.create(graph, lib, ctx)
-
- # Set the network parameters and synthetic input
- image = tvm.nd.array((np.random.uniform(size=(1, 3, 224,
224))).astype("float32"))
- m.set_input(**params)
- m.set_input("data", image)
-
- # Perform inference
- timer = m.module.time_evaluator("run", ctx, number=4,
repeat=opt.measurements)
- tcost = timer()
- prof_res = np.array(tcost.results) * 1000 # convert to millisecond
- print(
- "Mean inference time (std dev): %.2f ms (%.2f ms)"
- % (np.mean(prof_res), np.std(prof_res))
- )
-
- # Display profile information
- if opt.debug_profile:
- m.run()
diff --git a/vta/tutorials/autotvm/README.txt b/vta/tutorials/autotvm/README.txt
deleted file mode 100644
index a282a740ec..0000000000
--- a/vta/tutorials/autotvm/README.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Auto tuning
--------------
diff --git a/vta/tutorials/autotvm/tune_alu_vta.py
b/vta/tutorials/autotvm/tune_alu_vta.py
deleted file mode 100644
index 8ee58fe990..0000000000
--- a/vta/tutorials/autotvm/tune_alu_vta.py
+++ /dev/null
@@ -1,345 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Auto-tuning a ALU fused op on VTA
----------------------------------
-"""
-
-import os
-import sys
-from mxnet.gluon.model_zoo import vision
-import numpy as np
-from PIL import Image
-
-from tvm import topi
-import tvm
-from tvm import te
-from tvm import rpc, autotvm, relay
-from tvm.contrib import download
-from tvm.autotvm.measure.measure_methods import request_remote
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-from tvm.autotvm import record
-
-import vta
-from vta.testing import simulator
-from vta.top import graph_pack
-import copy
-
-
-#################################################################
-# Compile network
-# ---------------
-# Perform vta-specific compilation with Relay from a Gluon model
-def compile_network(env, target, model, start_pack, stop_pack):
-
- # Populate the shape and data type dictionary
- dtype_dict = {"data": "float32"}
- shape_dict = {"data": (env.BATCH, 3, 224, 224)}
-
- # Get off the shelf gluon model, and convert to relay
- gluon_model = vision.get_model(model, pretrained=True)
- mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)
-
- # Update shape and type dictionary
- shape_dict.update({k: v.shape for k, v in params.items()})
- dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
-
- # Perform quantization in Relay
- # Note: We set opt_level to 3 in order to fold batch norm
- with relay.build_config(opt_level=3):
- with relay.quantize.qconfig(global_scale=8.0, skip_conv_layers=[0]):
- mod = relay.quantize.quantize(mod, params=params)
-
- # Perform graph packing and constant folding for VTA target
- if target.device_name == "vta":
- assert env.BLOCK_IN == env.BLOCK_OUT
- relay_prog = graph_pack(
- mod["main"],
- env.BATCH,
- env.BLOCK_OUT,
- env.WGT_WIDTH,
- start_name=start_pack,
- stop_name=stop_pack,
- )
-
- return relay_prog, params
-
-
-###########################################
-# Set Tuning Options
-# ------------------
-# Before tuning, we should apply some configurations.
-# Here we use an Pynq-Z1 board as an example.
-
-# Tracker host and port can be set by your environment
-tracker_host = os.environ.get("TVM_TRACKER_HOST", "0.0.0.0")
-tracker_port = int(os.environ.get("TVM_TRACKER_PORT", 9190))
-
-# Load VTA parameters from the vta/config/vta_config.json file
-env = vta.get_env()
-
-# This target is used for cross compilation. You can query it by :code:`gcc
-v` on your device.
-# Set ``device=arm_cpu`` to run inference on the CPU
-# or ``device=vta`` to run inference on the FPGA.
-device = "vta"
-target = env.target if device == "vta" else env.target_vta_cpu
-
-# Name of Gluon model to compile
-# The ``start_pack`` and ``stop_pack`` labels indicate where
-# to start and end the graph packing relay pass: in other words
-# where to start and finish offloading to VTA.
-network = "resnet50_v2"
-start_pack = "nn.max_pool2d"
-stop_pack = "nn.global_avg_pool2d"
-
-# Tuning option
-log_file = "%s.alu.%s.log" % (device, network)
-tuning_option = {
- "log_filename": log_file,
- "tuner": "random",
- "n_trial": 1000,
- "early_stopping": None,
- "measure_option": autotvm.measure_option(
- builder=autotvm.LocalBuilder(n_parallel=1),
- runner=autotvm.RPCRunner(
- env.TARGET,
- host=tracker_host,
- port=tracker_port,
- number=5,
- timeout=60,
- # check_correctness=True, # TODO: re-enable when check_correctness
works again.
- ),
- ),
-}
-
-
-def log_to_file(file_out, protocol="json"):
- """Log the tuning records into file.
- The rows of the log are stored in the format of autotvm.record.encode.
- for lhs == rhs, we add an extra rhs = [] record
-
- Parameters
- ----------
- file_out : str
- The file to log to.
- protocol: str, optional
- The log protocol. Can be 'json' or 'pickle'
-
- Returns
- -------
- callback : callable
- Callback function to do the logging.
- """
-
- def _callback(_, inputs, results):
- with open(file_out, "a") as f:
- for inp, result in zip(inputs, results):
- f.write(record.encode(inp, result, protocol) + "\n")
-
- # we only consider task with same lhs and rhs
- if inp.task.args[0] == inp.task.args[1]:
- args = list(inp.task.args)
- args[1] = (args[0][0], (), args[0][2])
- inp_copy = copy.deepcopy(inp)
- inp_copy.task.args = tuple(args)
- f.write(record.encode(inp_copy, result, protocol) + "\n")
-
- return _callback
-
-
-def tune_tasks(
- tasks,
- measure_option,
- tuner="xgb",
- n_trial=10,
- early_stopping=None,
- log_filename="tuning.log",
- use_transfer_learning=True,
-):
-
- # create tmp log file
- tmp_log_file = log_filename + ".tmp"
- if os.path.exists(tmp_log_file):
- os.remove(tmp_log_file)
-
- for i, tsk in enumerate(reversed(tasks)):
- prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
-
- # create tuner
- if tuner == "xgb":
- tuner_obj = XGBTuner(tsk, loss_type="reg")
- elif tuner == "xgb_knob":
- tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="knob")
- elif tuner == "xgb_itervar":
- tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="itervar")
- elif tuner == "xgb_curve":
- tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="curve")
- elif tuner == "xgb_rank":
- tuner_obj = XGBTuner(tsk, loss_type="rank")
- elif tuner == "xgb_rank_knob":
- tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob")
- elif tuner == "xgb_rank_itervar":
- tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="itervar")
- elif tuner == "xgb_rank_curve":
- tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="curve")
- elif tuner == "xgb_rank_binary":
- tuner_obj = XGBTuner(tsk, loss_type="rank-binary")
- elif tuner == "xgb_rank_binary_knob":
- tuner_obj = XGBTuner(tsk, loss_type="rank-binary",
feature_type="knob")
- elif tuner == "xgb_rank_binary_itervar":
- tuner_obj = XGBTuner(tsk, loss_type="rank-binary",
feature_type="itervar")
- elif tuner == "xgb_rank_binary_curve":
- tuner_obj = XGBTuner(tsk, loss_type="rank-binary",
feature_type="curve")
- elif tuner == "ga":
- tuner_obj = GATuner(tsk, pop_size=50)
- elif tuner == "random":
- tuner_obj = RandomTuner(tsk)
- elif tuner == "gridsearch":
- tuner_obj = GridSearchTuner(tsk)
- else:
- raise ValueError("Invalid tuner: " + tuner)
-
- if use_transfer_learning:
- if os.path.isfile(tmp_log_file):
-
tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
-
- # do tuning
- tsk_trial = min(n_trial, len(tsk.config_space))
- tuner_obj.tune(
- n_trial=tsk_trial,
- early_stopping=early_stopping,
- measure_option=measure_option,
- callbacks=[
- autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
- log_to_file(tmp_log_file),
- ],
- )
-
- # pick best records to a cache file
- autotvm.record.pick_best(tmp_log_file, log_filename)
- os.remove(tmp_log_file)
-
-
-########################################################################
-# Register VTA-specific tuning tasks
-def register_vta_tuning_tasks():
- from tvm.autotvm.task import TaskExtractEnv
-
- @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
- def my_clip(x, a_min, a_max):
- """Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.tir.const(a_min, x.dtype)
- const_max = tvm.tir.const(a_max, x.dtype)
- x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max),
name="clipA")
- x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min),
name="clipB")
- return x
-
- # init autotvm env to register VTA operator
- TaskExtractEnv()
-
- @autotvm.template("add.vta")
- def _topi_add(*args, **kwargs):
- assert not kwargs, "Do not support kwargs in template function call"
- A, B = args[:2]
-
- with tvm.target.vta():
- res = vta.top.op.add_packed(*args, **kwargs)
- res = my_clip(res, 0, 127)
- res = topi.cast(res, "int8")
-
- if tvm.target.Target.current().device_name == "vta":
- s = vta.top.op.schedule_add_packed([res])
- else:
- s = te.create_schedule([res.op])
- return s, [A, B, res]
-
- @autotvm.template("multiply.vta")
- def _topi_multiply(*args, **kwargs):
- assert not kwargs, "Do not support kwargs in template function call"
- A, B = args[:2]
-
- with tvm.target.vta():
- res = vta.top.op.multiply_packed(*args, **kwargs)
- res = my_clip(res, 0, 127)
- res = topi.cast(res, "int8")
-
- if tvm.target.Target.current().device_name == "vta":
- s = vta.top.op.schedule_multiply_packed([res])
- else:
- s = te.create_schedule([res.op])
- return s, [A, B, res]
-
-
-########################################################################
-# Finally, we launch tuning jobs and evaluate the end-to-end performance.
-def tune_and_evaluate(tuning_opt):
-
- if env.TARGET != "intelfocl":
- print("ALU only op only available for intelfocl target")
- return
-
- # Register VTA tuning tasks
- register_vta_tuning_tasks()
-
- # Perform task extraction on Relay program
- print("Extract tasks...")
- relay_prog, params = compile_network(env, target, network, start_pack,
stop_pack)
- mod = tvm.IRModule.from_expr(relay_prog)
- tasks = autotvm.task.extract_from_program(
- mod,
- params=params,
- ops=(
- relay.op.get("add"),
- relay.op.get("multiply"),
- ),
- target=tvm.target.Target(target, host=env.target_host),
- )
-
- # filter out non-packed alu task
- tasks = list(filter(lambda t: len(t.args[0][1]) > 4, tasks))
- # filter out float alu task
- tasks = list(filter(lambda t: t.args[0][2] != "float32", tasks))
-
- # We should have extracted 10 convolution tasks
- tasks_set = {}
- print("Extracted {} alu tasks:".format(len(tasks)))
- for tsk in tasks:
- print("tsk = ", tsk)
-
- if len(tsk.args[1][1]) == 0:
- args = list(tsk.args)
- args[1] = args[0]
- tsk.args = tuple(args)
-
- if (tsk.name, tsk.args) in tasks_set:
- print("task {} already exists".format(tsk))
- tasks_set[(tsk.name, tsk.args)] = tsk
-
- tasks = list(tasks_set.values())
- print("After merged, final #tasks={}, tasks = {}".format(len(tasks),
tasks))
-
- # run tuning tasks
- print("Tuning...")
- tune_tasks(tasks, **tuning_opt)
-
-
-# Run the tuning and evaluate the results
-try:
- tune_and_evaluate(tuning_option)
-except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- sys.exit(0)
diff --git a/vta/tutorials/autotvm/tune_relay_vta.py
b/vta/tutorials/autotvm/tune_relay_vta.py
deleted file mode 100644
index b5de247883..0000000000
--- a/vta/tutorials/autotvm/tune_relay_vta.py
+++ /dev/null
@@ -1,538 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Auto-tuning a convolutional network on VTA
-==========================================
-**Author**: `Lianmin Zheng <https://github.com/merrymercy>`_, `Thierry Moreau
<https://homes.cs.washington.edu/~moreau/>`_
-
-Auto-tuning for a specific accelerator design is critical for getting the best
-performance for any given operator. This is a tutorial showcases how to tune a
-whole convolutional network on VTA.
-
-The operator implementation for VTA in TVM is written in template form.
-The template has many tunable knobs (tile factor, virtual threads, etc).
-We will tune all convolution operators in the neural network. After tuning,
-we produce a log file which stores the best schedule parameters for all tuned
-operators. When the TVM compiler compiles these operators, it will query this
-log file to get the best knob parameters.
-
-"""
-
-######################################################################
-# Install dependencies
-# --------------------
-# To use the autotvm package in tvm, we need to install some extra
dependencies.
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-# pip3 install --user psutil xgboost tornado mxnet requests "Pillow<7"
cloudpickle
-#
-# To make TVM run faster during tuning, it is recommended to use cython
-# as FFI of TVM. In the root directory of TVM, execute
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-# pip3 install --user cython
-# sudo make cython3
-#
-# Now return to python code. Import packages.
-
-import os
-import sys
-
-from mxnet.gluon.model_zoo import vision
-import numpy as np
-from PIL import Image
-
-from tvm import topi
-import tvm
-from tvm import te
-from tvm import rpc, autotvm, relay
-from tvm.contrib import graph_executor, utils, download
-from tvm.autotvm.measure.measure_methods import request_remote
-from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
-
-import vta
-from vta.testing import simulator
-from vta.top import graph_pack
-
-#################################################################
-# Compile network
-# ---------------
-# Perform vta-specific compilation with Relay from a Gluon model
-
-
-def compile_network(env, target, model, start_pack, stop_pack):
-
- # Populate the shape and data type dictionary
- dtype_dict = {"data": "float32"}
- shape_dict = {"data": (env.BATCH, 3, 224, 224)}
-
- # Get off the shelf gluon model, and convert to relay
- gluon_model = vision.get_model(model, pretrained=True)
- mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)
-
- # Update shape and type dictionary
- shape_dict.update({k: v.shape for k, v in params.items()})
- dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
-
- # Perform quantization in Relay
- # Note: We set opt_level to 3 in order to fold batch norm
- with tvm.transform.PassContext(opt_level=3):
- with relay.quantize.qconfig(global_scale=8.0, skip_conv_layers=[0]):
- mod = relay.quantize.quantize(mod, params=params)
-
- # Perform graph packing and constant folding for VTA target
- if target.device_name == "vta":
- assert env.BLOCK_IN == env.BLOCK_OUT
- relay_prog = graph_pack(
- mod["main"],
- env.BATCH,
- env.BLOCK_OUT,
- env.WGT_WIDTH,
- start_name=start_pack,
- stop_name=stop_pack,
- )
-
- return relay_prog, params
-
-
-#################################################################
-# Start RPC Tracker
-# -----------------
-# TVM uses an RPC session to communicate with Pynq boards.
-# During tuning, the tuner will send the generated code to the board and
-# measure the speed of code on the board.
-#
-# To scale up tuning, TVM uses an RPC Tracker to manage multiple devices.
-# The RPC Tracker is a centralized controller node. We can register all
devices to
-# the tracker. For example, if we have 10 Pynq boards, we can register all of
them
-# to the tracker, and run 10 measurements in parallel, accelerating the tuning
process.
-#
-# To start an RPC tracker, run this command on the host machine. The tracker is
-# required during the whole tuning process, so we need to open a new terminal
for
-# this command:
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.rpc_tracker --host=0.0.0.0 --port=9190
-#
-# The expected output is:
-#
-# .. code-block:: bash
-#
-# INFO:RPCTracker:bind to 0.0.0.0:9190
-
-#################################################################
-# Register devices to RPC Tracker
-# -----------------------------------
-# Now we can register our devices to the tracker. The first step is to
-# build the TVM runtime for the Pynq devices.
-#
-# Follow :ref:`vta-index`
-# to build the TVM runtime on the device. Then register the device to the
tracker with:
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.rpc_server --tracker=[HOST_IP]:9190 --key=pynq
-#
-# (replace :code:`[HOST_IP]` with the IP address of your host machine)
-#
-# After registering devices, we can confirm it by querying the rpc_tracker:
-#
-# .. code-block:: bash
-#
-# python -m tvm.exec.query_rpc_tracker --host=0.0.0.0 --port=9190
-#
-# For example, if we have 6 Pynq boards and 11 Raspberry Pi 3B,
-# the output can be
-#
-# .. code-block:: bash
-#
-# Queue Status
-# ----------------------------------
-# key total free pending
-# ----------------------------------
-# pynq 6 6 0
-# rpi3b 11 11 0
-# ----------------------------------
-#
-# You can register multiple devices to the tracker to accelerate tuning.
-
-###########################################
-# Set Tuning Options
-# ------------------
-# Before tuning, we should apply some configurations.
-# Here we use an Pynq-Z1 board as an example.
-
-# Tracker host and port can be set by your environment
-tracker_host = os.environ.get("TVM_TRACKER_HOST", "127.0.0.1")
-tracker_port = int(os.environ.get("TVM_TRACKER_PORT", 9190))
-
-# Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file
-env = vta.get_env()
-
-# This target is used for cross compilation. You can query it by :code:`gcc
-v` on your device.
-# Set ``device=arm_cpu`` to run inference on the CPU
-# or ``device=vta`` to run inference on the FPGA.
-device = "vta"
-target = env.target if device == "vta" else env.target_vta_cpu
-
-# Name of Gluon model to compile
-# The ``start_pack`` and ``stop_pack`` labels indicate where
-# to start and end the graph packing relay pass: in other words
-# where to start and finish offloading to VTA.
-network = "resnet18_v1"
-start_pack = "nn.max_pool2d"
-stop_pack = "nn.global_avg_pool2d"
-
-# Tuning option
-log_file = "%s.%s.log" % (device, network)
-tuning_option = {
- "log_filename": log_file,
- "tuner": "random",
- "n_trial": 1000,
- "early_stopping": None,
- "measure_option": autotvm.measure_option(
- builder=autotvm.LocalBuilder(),
- runner=autotvm.RPCRunner(
- env.TARGET,
- host=tracker_host,
- port=tracker_port,
- number=5,
- timeout=60,
- module_loader=vta.module_loader(),
- # check_correctness=True, # TODO: re-enable when check_correctness
works again.
- ),
- ),
-}
-
-####################################################################
-#
-# .. note:: How to set tuning options
-#
-# In general, the default values provided here work well.
-# If you have enough time budget, you can set :code:`n_trial`,
:code:`early_stopping`
-# to larger values, makes the tuning run for longer.
-# If your device is under-powered or your conv2d operators are large,
consider
-# setting a longer timeout.
-#
-
-###################################################################
-# Begin Tuning
-# ------------
-# Now we can extract tuning tasks from the network and begin tuning.
-# Here, we provide a simple utility function to tune a list of tasks.
-# This function is just an initial implementation which tunes them in
sequential order.
-# We will introduce a more sophisticated tuning scheduler in the future.
-#
-# Given that the tuning will be done on Pynq FPGA boards, make sure that
-# the ```TARGET`` entry in the ``vta_config.json`` file is set to ``pynq``.
-
-
-# You can skip the implementation of this function for this tutorial.
-def tune_tasks(
- tasks,
- measure_option,
- tuner="xgb",
- n_trial=1000,
- early_stopping=None,
- log_filename="tuning.log",
- use_transfer_learning=True,
-):
-
- # create tmp log file
- tmp_log_file = log_filename + ".tmp"
- if os.path.exists(tmp_log_file):
- os.remove(tmp_log_file)
-
- for i, tsk in enumerate(reversed(tasks)):
- prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
-
- # create tuner
- if tuner == "xgb":
- tuner_obj = XGBTuner(tsk, loss_type="reg")
- elif tuner == "xgb_knob":
- tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="knob")
- elif tuner == "xgb_itervar":
- tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="itervar")
- elif tuner == "xgb_curve":
- tuner_obj = XGBTuner(tsk, loss_type="reg", feature_type="curve")
- elif tuner == "xgb_rank":
- tuner_obj = XGBTuner(tsk, loss_type="rank")
- elif tuner == "xgb_rank_knob":
- tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob")
- elif tuner == "xgb_rank_itervar":
- tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="itervar")
- elif tuner == "xgb_rank_curve":
- tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="curve")
- elif tuner == "xgb_rank_binary":
- tuner_obj = XGBTuner(tsk, loss_type="rank-binary")
- elif tuner == "xgb_rank_binary_knob":
- tuner_obj = XGBTuner(tsk, loss_type="rank-binary",
feature_type="knob")
- elif tuner == "xgb_rank_binary_itervar":
- tuner_obj = XGBTuner(tsk, loss_type="rank-binary",
feature_type="itervar")
- elif tuner == "xgb_rank_binary_curve":
- tuner_obj = XGBTuner(tsk, loss_type="rank-binary",
feature_type="curve")
- elif tuner == "ga":
- tuner_obj = GATuner(tsk, pop_size=50)
- elif tuner == "random":
- tuner_obj = RandomTuner(tsk)
- elif tuner == "gridsearch":
- tuner_obj = GridSearchTuner(tsk)
- else:
- raise ValueError("Invalid tuner: " + tuner)
-
- if use_transfer_learning:
- if os.path.isfile(tmp_log_file):
-
tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
-
- # do tuning
- tsk_trial = min(n_trial, len(tsk.config_space))
- tuner_obj.tune(
- n_trial=tsk_trial,
- early_stopping=early_stopping,
- measure_option=measure_option,
- callbacks=[
- autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
- autotvm.callback.log_to_file(tmp_log_file),
- ],
- )
-
- # pick best records to a cache file
- autotvm.record.pick_best(tmp_log_file, log_filename)
- os.remove(tmp_log_file)
-
-
-########################################################################
-# Register VTA-specific tuning tasks
-
-
-def register_vta_tuning_tasks():
- from tvm.autotvm.task import TaskExtractEnv
-
- @tvm.te.tag_scope(tag=topi.tag.ELEMWISE)
- def my_clip(x, a_min, a_max):
- """Unlike topi's current clip, put min and max into two stages."""
- const_min = tvm.tir.const(a_min, x.dtype)
- const_max = tvm.tir.const(a_max, x.dtype)
- x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max),
name="clipA")
- x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min),
name="clipB")
- return x
-
- # init autotvm env to register VTA operator
- TaskExtractEnv()
-
- @autotvm.template("conv2d_packed.vta")
- def _topi_nn_conv2d(*args, **kwargs):
- assert not kwargs, "Do not support kwargs in template function call"
- A, W = args[:2]
-
- with tvm.target.vta():
- res = vta.top.conv2d_packed(*args, **kwargs)
- res = topi.right_shift(res, 8)
- res = my_clip(res, 0, 127)
- res = topi.cast(res, "int8")
-
- if tvm.target.Target.current().device_name == "vta":
- s = vta.top.schedule_conv2d_packed([res])
- else:
- s = te.create_schedule([res.op])
- return s, [A, W, res]
-
-
-########################################################################
-# Finally, we launch tuning jobs and evaluate the end-to-end performance.
-
-
-def tune_and_evaluate(tuning_opt):
-
- # Register VTA tuning tasks
- register_vta_tuning_tasks()
-
- # Perform task extraction on Relay program
- print("Extract tasks...")
- relay_prog, params = compile_network(env, target, network, start_pack,
stop_pack)
- mod = tvm.IRModule.from_expr(relay_prog)
- tasks = autotvm.task.extract_from_program(
- mod,
- params=params,
- ops=(relay.op.get("nn.conv2d"),),
- target=target,
- target_host=env.target_host,
- )
-
- # filter out non-packed conv2d task
- tasks = list(filter(lambda t: len(t.args[0][1]) > 4 and "conv" in t.name,
tasks))
-
- # We should have extracted 10 convolution tasks
- assert len(tasks) == 10
- print("Extracted {} conv2d tasks:".format(len(tasks)))
- for tsk in tasks:
- inp = tsk.args[0][1]
- wgt = tsk.args[1][1]
- batch = inp[0] * inp[4]
- in_filter = inp[1] * inp[5]
- out_filter = wgt[0] * wgt[4]
- height, width = inp[2], inp[3]
- hkernel, wkernel = wgt[2], wgt[3]
- hstride, wstride = tsk.args[2][0], tsk.args[2][1]
- hpad, wpad = tsk.args[3][0], tsk.args[3][1]
- print(
- "({}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {})".format(
- batch,
- height,
- width,
- in_filter,
- out_filter,
- hkernel,
- wkernel,
- hpad,
- wpad,
- hstride,
- wstride,
- )
- )
-
- # We do not run the tuning in our webpage server since it takes too long.
- # Comment the following line to run it by yourself.
- return
-
- # run tuning tasks
- print("Tuning...")
- tune_tasks(tasks, **tuning_opt)
-
- # evaluate with tuning history
- if env.TARGET != "sim":
- # Get remote from fleet node
- remote = autotvm.measure.request_remote(
- env.TARGET, tracker_host, tracker_port, timeout=10000
- )
- # Reconfigure the JIT runtime and FPGA.
- vta.reconfig_runtime(remote)
- vta.program_fpga(remote, bitstream=None)
- else:
- # In simulation mode, host the RPC server locally.
- remote = rpc.LocalSession()
-
- # compile kernels with history best records
- with autotvm.tophub.context(target, extra_files=[log_file]):
- # Compile network
- print("Compile...")
- if target.device_name != "vta":
- with tvm.transform.PassContext(opt_level=3,
disabled_pass={"AlterOpLayout"}):
- lib = relay.build(
- relay_prog, target=target, params=params,
target_host=env.target_host
- )
- else:
- with vta.build_config(opt_level=3,
disabled_pass={"AlterOpLayout"}):
- lib = relay.build(
- relay_prog, target=target, params=params,
target_host=env.target_host
- )
-
- # Export library
- print("Upload...")
- temp = utils.tempdir()
- lib.export_library(temp.relpath("graphlib.tar"))
- remote.upload(temp.relpath("graphlib.tar"))
- lib = remote.load_module("graphlib.tar")
-
- # Generate the graph executor
- ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
- m = graph_executor.GraphModule(lib["default"](ctx))
-
- # upload parameters to device
- image = tvm.nd.array((np.random.uniform(size=(1, 3, 224,
224))).astype("float32"))
- m.set_input("data", image)
-
- # evaluate
- print("Evaluate inference time cost...")
- timer = m.module.time_evaluator("run", ctx, number=1, repeat=10)
- tcost = timer()
- prof_res = np.array(tcost.results) * 1000 # convert to millisecond
- print(
- "Mean inference time (std dev): %.2f ms (%.2f ms)"
- % (np.mean(prof_res), np.std(prof_res))
- )
-
-
-# Run the tuning and evaluate the results
-try:
- tune_and_evaluate(tuning_option)
-except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- sys.exit(0)
-
-######################################################################
-# Sample Output
-# -------------
-# The tuning needs to compile many programs and extract feature from them.
-# So a high performance CPU is recommended.
-# One sample output is listed below.
-# It takes about 2 hours on a 16T CPU, and 6 Pynq boards.
-#
-# .. code-block:: bash
-#
-# Extract tasks...
-# [Warning] Invalid shape during AutoTVM task creation
-# Extracted 10 conv2d tasks:
-# Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 16, 14, 14, 1,
16), 'int8'), ('TENSOR', (32, 16, 1, 1, 16, 16), 'int8'), (2, 2), (0, 0), (1,
1), 'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 16, 14, 14, 1,
16, 'int8'), (32, 16, 1, 1, 16, 16, 'int8'), (2, 2), (0, 0), (1, 1),
'NCHW1n16c', 'int32'))
-# Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 8, 28, 28, 1,
16), 'int8'), ('TENSOR', (16, 8, 1, 1, 16, 16), 'int8'), (2, 2), (0, 0), (1,
1), 'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 8, 28, 28, 1, 16,
'int8'), (16, 8, 1, 1, 16, 16, 'int8'), (2, 2), (0, 0), (1, 1), 'NCHW1n16c',
'int32'))
-# Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 4, 56, 56, 1,
16), 'int8'), ('TENSOR', (8, 4, 1, 1, 16, 16), 'int8'), (2, 2), (0, 0), (1, 1),
'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 4, 56, 56, 1, 16,
'int8'), (8, 4, 1, 1, 16, 16, 'int8'), (2, 2), (0, 0), (1, 1), 'NCHW1n16c',
'int32'))
-# Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 4, 56, 56, 1,
16), 'int8'), ('TENSOR', (4, 4, 3, 3, 16, 16), 'int8'), (1, 1), (1, 1), (1, 1),
'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 4, 56, 56, 1, 16,
'int8'), (4, 4, 3, 3, 16, 16, 'int8'), (1, 1), (1, 1), (1, 1), 'NCHW1n16c',
'int32'))
-# Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 8, 28, 28, 1,
16), 'int8'), ('TENSOR', (8, 8, 3, 3, 16, 16), 'int8'), (1, 1), (1, 1), (1, 1),
'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 8, 28, 28, 1, 16,
'int8'), (8, 8, 3, 3, 16, 16, 'int8'), (1, 1), (1, 1), (1, 1), 'NCHW1n16c',
'int32'))
-# Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 4, 56, 56, 1,
16), 'int8'), ('TENSOR', (8, 4, 3, 3, 16, 16), 'int8'), (2, 2), (1, 1), (1, 1),
'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 4, 56, 56, 1, 16,
'int8'), (8, 4, 3, 3, 16, 16, 'int8'), (2, 2), (1, 1), (1, 1), 'NCHW1n16c',
'int32'))
-# Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 16, 14, 14, 1,
16), 'int8'), ('TENSOR', (16, 16, 3, 3, 16, 16), 'int8'), (1, 1), (1, 1), (1,
1), 'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 16, 14, 14, 1,
16, 'int8'), (16, 16, 3, 3, 16, 16, 'int8'), (1, 1), (1, 1), (1, 1),
'NCHW1n16c', 'int32'))
-# Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 8, 28, 28, 1,
16), 'int8'), ('TENSOR', (16, 8, 3, 3, 16, 16), 'int8'), (2, 2), (1, 1), (1,
1), 'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 8, 28, 28, 1, 16,
'int8'), (16, 8, 3, 3, 16, 16, 'int8'), (2, 2), (1, 1), (1, 1), 'NCHW1n16c',
'int32'))
-# Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 32, 7, 7, 1, 16),
'int8'), ('TENSOR', (32, 32, 3, 3, 16, 16), 'int8'), (1, 1), (1, 1), (1, 1),
'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 32, 7, 7, 1, 16,
'int8'), (32, 32, 3, 3, 16, 16, 'int8'), (1, 1), (1, 1), (1, 1), 'NCHW1n16c',
'int32'))
-# Task(func_name=topi_nn_conv2d, args=(('TENSOR', (1, 16, 14, 14, 1,
16), 'int8'), ('TENSOR', (32, 16, 3, 3, 16, 16), 'int8'), (2, 2), (1, 1), (1,
1), 'NCHW1n16c', 'int32'), kwargs={}, workload=('conv2d', (1, 16, 14, 14, 1,
16, 'int8'), (32, 16, 3, 3, 16, 16, 'int8'), (2, 2), (1, 1), (1, 1),
'NCHW1n16c', 'int32'))
-# Tuning...
-# [Task 1/10] Current/Best: 0.72/ 23.24 GFLOPS | Progress: (480/1000)
| 640.31 s Done.
-# [Task 2/10] Current/Best: 0.00/ 27.69 GFLOPS | Progress: (576/1000)
| 810.09 s Done.
-# [Task 3/10] Current/Best: 0.00/ 22.97 GFLOPS | Progress:
(1000/1000) | 1125.37 s Done.
-# [Task 4/10] Current/Best: 0.00/ 31.26 GFLOPS | Progress:
(1000/1000) | 1025.52 s Done.
-# [Task 5/10] Current/Best: 0.00/ 15.15 GFLOPS | Progress:
(1000/1000) | 1236.58 s Done.
-# [Task 6/10] Current/Best: 0.00/ 22.74 GFLOPS | Progress:
(1000/1000) | 906.60 s Done.
-# [Task 7/10] Current/Best: 0.00/ 15.27 GFLOPS | Progress:
(1000/1000) | 1056.25 s Done.
-# [Task 8/10] Current/Best: 0.00/ 2.18 GFLOPS | Progress:
(1000/1000) | 2275.29 s Done.
-# [Task 9/10] Current/Best: 2.23/ 3.99 GFLOPS | Progress:
(1000/1000) | 2527.25 s Done.
-# [Task 10/10] Current/Best: 1.56/ 6.32 GFLOPS | Progress: (480/1000)
| 1304.84 s Done.
-# Compile...
-# Upload...
-# Evaluate inference time cost...
-# Mean inference time (std dev): 621.79 ms (0.14 ms)
-
-######################################################################
-#
-# .. note:: **Experiencing Difficulties?**
-#
-# The auto tuning module is error-prone. If you always see " 0.00/ 0.00
GFLOPS",
-# then there must be something wrong.
-#
-# First, make sure you set the correct configuration of your device.
-# Then, you can print debug information by adding these lines in the
beginning
-# of the script. It will print every measurement result, where you can find
useful
-# error messages.
-#
-# .. code-block:: python
-#
-# import logging
-# logging.getLogger('autotvm').setLevel(logging.DEBUG)
-#
-# Finally, always feel free to ask our community for help on
https://discuss.tvm.apache.org
diff --git a/vta/tutorials/frontend/deploy_classification.py
b/vta/tutorials/frontend/deploy_classification.py
deleted file mode 100644
index c741a1678f..0000000000
--- a/vta/tutorials/frontend/deploy_classification.py
+++ /dev/null
@@ -1,313 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Deploy Pretrained Vision Model from MxNet on VTA
-================================================
-**Author**: `Thierry Moreau <https://homes.cs.washington.edu/~moreau/>`_
-
-This tutorial provides an end-to-end demo, on how to run ImageNet
classification
-inference onto the VTA accelerator design to perform ImageNet classification
tasks.
-It showcases Relay as a front end compiler that can perform quantization (VTA
-only supports int8/32 inference) as well as graph packing (in order to enable
-tensorization in the core) to massage the compute graph for the hardware
target.
-"""
-
-######################################################################
-# Install dependencies
-# --------------------
-# To use the autotvm package in tvm, we need to install some extra
dependencies.
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-# pip3 install --user mxnet requests "Pillow<7"
-#
-# Now return to the python code. Import packages.
-
-from __future__ import absolute_import, print_function
-
-import argparse, json, os, requests, sys, time
-from io import BytesIO
-from os.path import join, isfile
-import sys
-from PIL import Image
-
-from mxnet.gluon.model_zoo import vision
-import numpy as np
-from matplotlib import pyplot as plt
-
-import tvm
-from tvm import te
-from tvm import rpc, autotvm, relay
-from tvm.contrib import graph_executor, utils, download
-from tvm.contrib.debugger import debug_executor
-from tvm.relay import transform
-
-import vta
-from vta.testing import simulator
-from vta.top import graph_pack
-
-
-# Make sure that TVM was compiled with RPC=1
-assert tvm.runtime.enabled("rpc")
-
-######################################################################
-# Define the platform and model targets
-# -------------------------------------
-# Execute on CPU vs. VTA, and define the model.
-
-# Load VTA parameters from the 3rdparty/vta-hw/config/vta_config.json file
-env = vta.get_env()
-
-# Set ``device=arm_cpu`` to run inference on the CPU
-# or ``device=vta`` to run inference on the FPGA.
-device = "vta"
-target = env.target if device == "vta" else env.target_vta_cpu
-
-# Dictionary lookup for when to start/end bit packing
-pack_dict = {
- "resnet18_v1": ["nn.max_pool2d", "nn.global_avg_pool2d"],
- "resnet34_v1": ["nn.max_pool2d", "nn.global_avg_pool2d"],
- "resnet18_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
- "resnet34_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
- "resnet50_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
- "resnet101_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
-}
-
-# Name of Gluon model to compile
-# The ``start_pack`` and ``stop_pack`` labels indicate where
-# to start and end the graph packing relay pass: in other words
-# where to start and finish offloading to VTA.
-model = "resnet18_v1"
-assert model in pack_dict
-
-######################################################################
-# Obtain an execution remote
-# --------------------------
-# When target is 'pynq', reconfigure FPGA and runtime.
-# Otherwise, if target is 'sim', execute locally.
-
-if env.TARGET not in ["sim", "tsim", "intelfocl"]:
-
- # Get remote from tracker node if environment variable is set.
- # To set up the tracker, you'll need to follow the "Auto-tuning
- # a convolutional network for VTA" tutorial.
- tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
- tracker_port = os.environ.get("TVM_TRACKER_PORT", None)
- # Otherwise if you have a device you want to program directly from
- # the host, make sure you've set the variables below to the IP of
- # your board.
- device_host = os.environ.get("VTA_RPC_HOST", "192.168.2.99")
- device_port = os.environ.get("VTA_RPC_PORT", "9091")
- if not tracker_host or not tracker_port:
- remote = rpc.connect(device_host, int(device_port))
- else:
- remote = autotvm.measure.request_remote(
- env.TARGET, tracker_host, int(tracker_port), timeout=10000
- )
-
- # Reconfigure the JIT runtime and FPGA.
- # You can program the FPGA with your own custom bitstream
- # by passing the path to the bitstream file instead of None.
- reconfig_start = time.time()
- vta.reconfig_runtime(remote)
- vta.program_fpga(remote, bitstream=None)
- reconfig_time = time.time() - reconfig_start
- print("Reconfigured FPGA and RPC runtime in
{0:.2f}s!".format(reconfig_time))
-
-# In simulation mode, host the RPC server locally.
-else:
- remote = rpc.LocalSession()
-
- if env.TARGET in ["intelfocl"]:
- # program intelfocl aocx
- vta.program_fpga(remote, bitstream="vta.bitstream")
-
-# Get execution context from remote
-ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
-
-######################################################################
-# Build the inference graph executor
-# ----------------------------------
-# Grab vision model from Gluon model zoo and compile with Relay.
-# The compilation steps are:
-#
-# 1. Front end translation from MxNet into Relay module.
-# 2. Apply 8-bit quantization: here we skip the first conv layer,
-# and dense layer which will both be executed in fp32 on the CPU.
-# 3. Perform graph packing to alter the data layout for tensorization.
-# 4. Perform constant folding to reduce number of operators (e.g. eliminate
batch norm multiply).
-# 5. Perform relay build to object file.
-# 6. Load the object file onto remote (FPGA device).
-# 7. Generate graph executor, `m`.
-#
-
-# Load pre-configured AutoTVM schedules
-with autotvm.tophub.context(target):
-
- # Populate the shape and data type dictionary for ImageNet classifier input
- dtype_dict = {"data": "float32"}
- shape_dict = {"data": (env.BATCH, 3, 224, 224)}
-
- # Get off the shelf gluon model, and convert to relay
- try:
- gluon_model = vision.get_model(model, pretrained=True)
- except RuntimeError:
- print("Downloads from mxnet no longer supported", file=sys.stderr)
- sys.exit(0)
-
- # Measure build start time
- build_start = time.time()
-
- # Start front end compilation
- mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)
-
- # Update shape and type dictionary
- shape_dict.update({k: v.shape for k, v in params.items()})
- dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
-
- if target.device_name == "vta":
- # Perform quantization in Relay
- # Note: We set opt_level to 3 in order to fold batch norm
- with tvm.transform.PassContext(opt_level=3):
- with relay.quantize.qconfig(global_scale=8.0,
skip_conv_layers=[0]):
- mod = relay.quantize.quantize(mod, params=params)
- # Perform graph packing and constant folding for VTA target
- assert env.BLOCK_IN == env.BLOCK_OUT
- # do device annotation if target is intelfocl or sim
- relay_prog = graph_pack(
- mod["main"],
- env.BATCH,
- env.BLOCK_OUT,
- env.WGT_WIDTH,
- start_name=pack_dict[model][0],
- stop_name=pack_dict[model][1],
- device_annot=(env.TARGET == "intelfocl"),
- )
- else:
- relay_prog = mod["main"]
-
- # Compile Relay program with AlterOpLayout disabled
- if target.device_name != "vta":
- with tvm.transform.PassContext(opt_level=3,
disabled_pass={"AlterOpLayout"}):
- graph, lib, params = relay.build(
- relay_prog, target=tvm.target.Target(target,
host=env.target_host), params=params
- )
- else:
- if env.TARGET == "intelfocl":
- # multiple targets to run both on cpu and vta
- target = {"cpu": env.target_vta_cpu, "ext_dev": target}
- with vta.build_config(
- opt_level=3, disabled_pass={"AlterOpLayout",
"tir.CommonSubexprElimTIR"}
- ):
- graph, lib, params = relay.build(
- relay_prog, target=tvm.target.Target(target,
host=env.target_host), params=params
- )
-
- # Measure Relay build time
- build_time = time.time() - build_start
- print(model + " inference graph built in {0:.2f}s!".format(build_time))
-
- # Send the inference library over to the remote RPC server
- temp = utils.tempdir()
- lib.export_library(temp.relpath("graphlib.tar"))
- remote.upload(temp.relpath("graphlib.tar"))
- lib = remote.load_module("graphlib.tar")
-
- if env.TARGET == "intelfocl":
- ctxes = [remote.ext_dev(0), remote.cpu(0)]
- m = graph_executor.create(graph, lib, ctxes)
- else:
- # Graph runtime
- m = graph_executor.create(graph, lib, ctx)
-
-######################################################################
-# Perform image classification inference
-# --------------------------------------
-# We run classification on an image sample from ImageNet
-# We just need to download the categories files, `synset.txt`
-# and an input test image.
-
-# Download ImageNet categories
-categ_url = "https://github.com/uwsampl/web-data/raw/main/vta/models/"
-categ_fn = "synset.txt"
-download.download(join(categ_url, categ_fn), categ_fn)
-synset = eval(open(categ_fn).read())
-
-# Download test image
-image_url = "https://homes.cs.washington.edu/~moreau/media/vta/cat.jpg"
-image_fn = "cat.png"
-download.download(image_url, image_fn)
-
-# Prepare test image for inference
-image = Image.open(image_fn).resize((224, 224))
-plt.imshow(image)
-plt.show()
-image = np.array(image) - np.array([123.0, 117.0, 104.0])
-image /= np.array([58.395, 57.12, 57.375])
-image = image.transpose((2, 0, 1))
-image = image[np.newaxis, :]
-image = np.repeat(image, env.BATCH, axis=0)
-
-# Set the network parameters and inputs
-m.set_input(**params)
-m.set_input("data", image)
-
-# Perform inference and gather execution statistics
-# More on: :py:method:`tvm.runtime.Module.time_evaluator`
-num = 4 # number of times we run module for a single measurement
-rep = 3 # number of measurements (we derive std dev from this)
-timer = m.module.time_evaluator("run", ctx, number=num, repeat=rep)
-
-if env.TARGET in ["sim", "tsim"]:
- simulator.clear_stats()
- timer()
- sim_stats = simulator.stats()
- print("\nExecution statistics:")
- for k, v in sim_stats.items():
- # Since we execute the workload many times, we need to normalize stats
- # Note that there is always one warm up run
- # Therefore we divide the overall stats by (num * rep + 1)
- print("\t{:<16}: {:>16}".format(k, v // (num * rep + 1)))
-else:
- tcost = timer()
- std = np.std(tcost.results) * 1000
- mean = tcost.mean * 1000
- print("\nPerformed inference in %.2fms (std = %.2f) for %d samples" %
(mean, std, env.BATCH))
- print("Average per sample inference time: %.2fms" % (mean / env.BATCH))
-
-# Get classification results
-tvm_output = m.get_output(0, tvm.nd.empty((env.BATCH, 1000), "float32",
remote.cpu(0)))
-for b in range(env.BATCH):
- top_categories = np.argsort(tvm_output.numpy()[b])
- # Report top-5 classification results
- print("\n{} prediction for sample {}".format(model, b))
- print("\t#1:", synset[top_categories[-1]])
- print("\t#2:", synset[top_categories[-2]])
- print("\t#3:", synset[top_categories[-3]])
- print("\t#4:", synset[top_categories[-4]])
- print("\t#5:", synset[top_categories[-5]])
- # This just checks that one of the 5 top categories
- # is one variety of cat; this is by no means an accurate
- # assessment of how quantization affects classification
- # accuracy but is meant to catch changes to the
- # quantization pass that would accuracy in the CI.
- cat_detected = False
- for k in top_categories[-5:]:
- if "cat" in synset[k]:
- cat_detected = True
- assert cat_detected