masahi commented on code in PR #12318:
URL: https://github.com/apache/tvm/pull/12318#discussion_r969252514


##########
gallery/how_to/work_with_pytorch/using_as_torch.py:
##########
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Wrap Your TVMscript with PyTorch Module
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial on wrapping the TVMscript code with 
the PyTorch module.
+By the decorator `as_torch`, users can wrap a TVMscript code into a PyTorch 
nn.Module naturally.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch, as well as necessary libraries
+import torch
+import torch.nn.functional as F
+import torch.utils.benchmark as benchmark
+
+import tvm
+from tvm.contrib.torch import as_torch
+from tvm.script import tir as T
+
+######################################################################
+# Write your own PyTorch operator by TVMscript
+# -------------------------------
+# PyTorch is a very popular machine learning framework which contains
+# optimized implementations of most commonly used operators.
+# Nevertheless, sometimes you might want to write your own operators in 
PyTorch.
+# In that case, the performance of such custom operators might not be 
satisfactory for your needs.
+#
+# For example, suppose that we are going to define a 1-d depthwise convolution 
operator.
+# Assume the number of in_channel and out_channel are both 70,
+# the width is 80 and the kernel size is 20,
+# then the 1-d depthwise conv could be written in PyTorch in one line:
+
+in_channel = 70
+out_channel = 70
+width = 80
+kernel_size = 20
+
+
+def torch_depthwise(inputs, filters):
+    return F.conv1d(inputs, filters.view(out_channel, 1, kernel_size), 
groups=out_channel)
+
+
+# We can run this function as:
+
+inputs = torch.randn(in_channel, width)
+filters = torch.randn(out_channel, kernel_size)
+ret_torch = torch_depthwise(inputs, filters)
+
+# The `torch_depthwise` function, in a plain Python code, could be written as:
+
+
+def vanilla_depthwise(input, weight):
+    ret = torch.zeros(out_channel, width - kernel_size + 1)
+    for j in range(out_channel):
+        for i in range(width - kernel_size + 1):
+            for k in range(kernel_size):
+                ret[j, i] += weight[j, k] * input[j, i + k]
+    return ret
+
+
+# Then, we plan to optimize the `depthwise` function by leveraging the power 
of TVM.
+# TVM community proposes an embedded Domain Specific Language on Python called 
TVMscript,
+# serving for a high-level abstraction of TVM intermediate representative,

Review Comment:
   which serves as the high-level frontend for TVM's Tensor IR.



##########
gallery/how_to/work_with_pytorch/using_as_torch.py:
##########
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Wrap Your TVMscript with PyTorch Module
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial on wrapping the TVMscript code with 
the PyTorch module.
+By the decorator `as_torch`, users can wrap a TVMscript code into a PyTorch 
nn.Module naturally.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch, as well as necessary libraries
+import torch
+import torch.nn.functional as F
+import torch.utils.benchmark as benchmark
+
+import tvm
+from tvm.contrib.torch import as_torch
+from tvm.script import tir as T
+
+######################################################################
+# Write your own PyTorch operator by TVMscript
+# -------------------------------
+# PyTorch is a very popular machine learning framework which contains
+# optimized implementations of most commonly used operators.
+# Nevertheless, sometimes you might want to write your own operators in 
PyTorch.
+# In that case, the performance of such custom operators might not be 
satisfactory for your needs.
+#
+# For example, suppose that we are going to define a 1-d depthwise convolution 
operator.
+# Assume the number of in_channel and out_channel are both 70,
+# the width is 80 and the kernel size is 20,
+# then the 1-d depthwise conv could be written in PyTorch in one line:
+
+in_channel = 70
+out_channel = 70
+width = 80
+kernel_size = 20
+
+
+def torch_depthwise(inputs, filters):
+    return F.conv1d(inputs, filters.view(out_channel, 1, kernel_size), 
groups=out_channel)
+
+
+# We can run this function as:
+
+inputs = torch.randn(in_channel, width)
+filters = torch.randn(out_channel, kernel_size)
+ret_torch = torch_depthwise(inputs, filters)
+
+# The `torch_depthwise` function, in a plain Python code, could be written as:
+
+
+def vanilla_depthwise(input, weight):
+    ret = torch.zeros(out_channel, width - kernel_size + 1)
+    for j in range(out_channel):
+        for i in range(width - kernel_size + 1):
+            for k in range(kernel_size):
+                ret[j, i] += weight[j, k] * input[j, i + k]
+    return ret
+
+
+# Then, we plan to optimize the `depthwise` function by leveraging the power 
of TVM.
+# TVM community proposes an embedded Domain Specific Language on Python called 
TVMscript,
+# serving for a high-level abstraction of TVM intermediate representative,
+# which is easy to impose transformations and optimizations and deploy on 
various hardware backends.
+
+# As a concrete example, we can write such a TVMscript for 1-d depthwise conv 
code as below.
+# The computation procedure of `tvm_depthwise` is corresponding to the code 
snippet of `vanilla_depthwise`.

Review Comment:
   Replace the two sentences with:
   
   "The depthwise 1D convolution code above can be translated to TVMScript as 
follows."



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial to optimize PyTorch models by using 
`tvm.contrib.torch.optimize_torch`.
+To follow this tutorial, PyTorch, as well as TorchVision, should be installed.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import library for profiling
+import torch.utils.benchmark as benchmark
+from torchvision.models import resnet18
+
+# Import `optimize_torch` function
+from tvm.contrib.torch import optimize_torch
+from tvm.meta_schedule import TuneConfig
+
+######################################################################
+# Define a simple module written by PyTorch
+# ------------------------------
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5)
+        self.conv2 = nn.Conv2d(20, 20, 5)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        return F.relu(self.conv2(x))
+
+
+######################################################################
+# Optimized SimpleModel by TVM MetaSchedule
+# ------------------------------
+# We provide a `optimize_torch` function, which has the similar usage as 
`torch.jit.trace`.
+# The PyTorch model to optimize, along with its example input, are provided by 
users.
+# We can optimized the PyTorch's module by calling the `optimized_torch` 
method in default setting.
+# Without providing extra information, the model will be tuned for CPU.
+
+simple_model = SimpleModel()
+example_input = torch.randn(20, 1, 10, 10)
+model_optimized_by_tvm = optimize_torch(simple_model, example_input)
+
+######################################################################
+# Save/Load module
+# ------------------------------
+# We can save and load our tuned module like the standard `nn.module`.
+
+# Let us run our tuned module.
+ret1 = model_optimized_by_tvm(example_input)
+
+torch.save(model_optimized_by_tvm, "model_optimized.pt")
+model_loaded = torch.load("model_optimized.pt")
+
+# We load the module and run it again.
+ret2 = model_loaded(example_input)
+
+# We show that the results from original SimpleModel,
+# optimized model and loaded model are the same.
+
+ret3 = simple_model.forward(example_input)
+testing.assert_allclose(ret1.detach().numpy(), ret2.detach().numpy(), 
atol=1e-5, rtol=1e-5)
+testing.assert_allclose(ret1.detach().numpy(), ret3.detach().numpy(), 
atol=1e-5, rtol=1e-5)
+
+######################################################################
+# Resnet18 optimized by TVM MetaSchedule
+# ------------------------------
+# In the following, we will show that our approach is able to
+# accelerate common and large models, such as Resnet18.
+
+# We will tune our model on the GPU.
+target_cuda = "nvidia/geforce-rtx-3070"
+
+# For PyTorch users, the nn.Module could be written as usual, except for
+# applying "optimize_torch" function on the resnet18 model.
+
+
+class MyResNet18(torch.nn.Module):
+    def __init__(self, target):
+        super(MyResNet18, self).__init__()
+        # Here we impose the `optimize_torch` function
+        # The default setting is adapted automatically by the number of 
operations of the optimized model.
+        self.resnet = optimize_torch(resnet18(), [torch.rand(1, 3, 224, 224)], 
target=target)
+
+    def forward(self, input):
+        return self.resnet(input)
+
+
+tvm_module_resnet18 = MyResNet18(target_cuda)
+
+
+######################################################################
+# Resnet18 optimized by TorchScript
+# ------------------------------
+# Let us write down a resnet18 model in a standard way.
+
+
+class JitModule(torch.nn.Module):
+    def __init__(self):
+        super(JitModule, self).__init__()
+        # Here we impose the `optimize_for_inference` function
+        # TorchScript also provides a built-in "optimize_for_inference" 
function to accelerate the inference.
+        self.resnet = 
torch.jit.optimize_for_inference(torch.jit.script(resnet18().cuda().eval()))
+
+    def forward(self, input):
+        return self.resnet(input)
+
+
+jit_module_resnet18 = JitModule()
+
+
+######################################################################
+# Compare the performance between two approaches.
+# ------------------------------
+# Using PyTorch's benchmark Compare class, we can have a direct comparison 
result between two inference models.

Review Comment:
   Drop this sentence.



##########
gallery/how_to/work_with_pytorch/using_as_torch.py:
##########
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Wrap Your TVMscript with PyTorch Module

Review Comment:
   Replace all `TVMscript` in this file with `TVMScript`



##########
gallery/how_to/work_with_pytorch/using_as_torch.py:
##########
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Wrap Your TVMscript with PyTorch Module
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial on wrapping the TVMscript code with 
the PyTorch module.
+By the decorator `as_torch`, users can wrap a TVMscript code into a PyTorch 
nn.Module naturally.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch, as well as necessary libraries
+import torch
+import torch.nn.functional as F
+import torch.utils.benchmark as benchmark
+
+import tvm
+from tvm.contrib.torch import as_torch
+from tvm.script import tir as T
+
+######################################################################
+# Write your own PyTorch operator by TVMscript
+# -------------------------------
+# PyTorch is a very popular machine learning framework which contains
+# optimized implementations of most commonly used operators.
+# Nevertheless, sometimes you might want to write your own operators in 
PyTorch.
+# In that case, the performance of such custom operators might not be 
satisfactory for your needs.
+#
+# For example, suppose that we are going to define a 1-d depthwise convolution 
operator.
+# Assume the number of in_channel and out_channel are both 70,
+# the width is 80 and the kernel size is 20,
+# then the 1-d depthwise conv could be written in PyTorch in one line:
+
+in_channel = 70
+out_channel = 70
+width = 80
+kernel_size = 20
+
+
+def torch_depthwise(inputs, filters):
+    return F.conv1d(inputs, filters.view(out_channel, 1, kernel_size), 
groups=out_channel)
+
+
+# We can run this function as:
+
+inputs = torch.randn(in_channel, width)
+filters = torch.randn(out_channel, kernel_size)
+ret_torch = torch_depthwise(inputs, filters)
+
+# The `torch_depthwise` function, in a plain Python code, could be written as:
+
+
+def vanilla_depthwise(input, weight):
+    ret = torch.zeros(out_channel, width - kernel_size + 1)
+    for j in range(out_channel):
+        for i in range(width - kernel_size + 1):
+            for k in range(kernel_size):
+                ret[j, i] += weight[j, k] * input[j, i + k]
+    return ret
+
+
+# Then, we plan to optimize the `depthwise` function by leveraging the power 
of TVM.
+# TVM community proposes an embedded Domain Specific Language on Python called 
TVMscript,
+# serving for a high-level abstraction of TVM intermediate representative,
+# which is easy to impose transformations and optimizations and deploy on 
various hardware backends.

Review Comment:
   This sentence can be dropped



##########
gallery/how_to/work_with_pytorch/using_as_torch.py:
##########
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Wrap Your TVMscript with PyTorch Module
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_

Review Comment:
   No need to add me as an author.



##########
gallery/how_to/work_with_pytorch/using_as_torch.py:
##########
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Wrap Your TVMscript with PyTorch Module
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial on wrapping the TVMscript code with 
the PyTorch module.
+By the decorator `as_torch`, users can wrap a TVMscript code into a PyTorch 
nn.Module naturally.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch, as well as necessary libraries
+import torch
+import torch.nn.functional as F
+import torch.utils.benchmark as benchmark
+
+import tvm
+from tvm.contrib.torch import as_torch
+from tvm.script import tir as T
+
+######################################################################
+# Write your own PyTorch operator by TVMscript
+# -------------------------------
+# PyTorch is a very popular machine learning framework which contains
+# optimized implementations of most commonly used operators.
+# Nevertheless, sometimes you might want to write your own operators in 
PyTorch.
+# In that case, the performance of such custom operators might not be 
satisfactory for your needs.
+#
+# For example, suppose that we are going to define a 1-d depthwise convolution 
operator.
+# Assume the number of in_channel and out_channel are both 70,
+# the width is 80 and the kernel size is 20,
+# then the 1-d depthwise conv could be written in PyTorch in one line:
+
+in_channel = 70
+out_channel = 70
+width = 80
+kernel_size = 20
+
+
+def torch_depthwise(inputs, filters):
+    return F.conv1d(inputs, filters.view(out_channel, 1, kernel_size), 
groups=out_channel)
+
+
+# We can run this function as:
+
+inputs = torch.randn(in_channel, width)
+filters = torch.randn(out_channel, kernel_size)
+ret_torch = torch_depthwise(inputs, filters)
+
+# The `torch_depthwise` function, in a plain Python code, could be written as:
+
+
+def vanilla_depthwise(input, weight):
+    ret = torch.zeros(out_channel, width - kernel_size + 1)
+    for j in range(out_channel):
+        for i in range(width - kernel_size + 1):
+            for k in range(kernel_size):
+                ret[j, i] += weight[j, k] * input[j, i + k]
+    return ret
+
+
+# Then, we plan to optimize the `depthwise` function by leveraging the power 
of TVM.
+# TVM community proposes an embedded Domain Specific Language on Python called 
TVMscript,

Review Comment:
   in Python



##########
gallery/how_to/work_with_pytorch/using_as_torch.py:
##########
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Wrap Your TVMscript with PyTorch Module
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial on wrapping the TVMscript code with 
the PyTorch module.
+By the decorator `as_torch`, users can wrap a TVMscript code into a PyTorch 
nn.Module naturally.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch, as well as necessary libraries
+import torch
+import torch.nn.functional as F
+import torch.utils.benchmark as benchmark
+
+import tvm
+from tvm.contrib.torch import as_torch
+from tvm.script import tir as T
+
+######################################################################
+# Write your own PyTorch operator by TVMscript
+# -------------------------------
+# PyTorch is a very popular machine learning framework which contains
+# optimized implementations of most commonly used operators.
+# Nevertheless, sometimes you might want to write your own operators in 
PyTorch.
+# In that case, the performance of such custom operators might not be 
satisfactory for your needs.
+#
+# For example, suppose that we are going to define a 1-d depthwise convolution 
operator.
+# Assume the number of in_channel and out_channel are both 70,
+# the width is 80 and the kernel size is 20,
+# then the 1-d depthwise conv could be written in PyTorch in one line:
+
+in_channel = 70
+out_channel = 70
+width = 80
+kernel_size = 20
+
+
+def torch_depthwise(inputs, filters):
+    return F.conv1d(inputs, filters.view(out_channel, 1, kernel_size), 
groups=out_channel)
+
+
+# We can run this function as:
+
+inputs = torch.randn(in_channel, width)
+filters = torch.randn(out_channel, kernel_size)
+ret_torch = torch_depthwise(inputs, filters)
+
+# The `torch_depthwise` function, in a plain Python code, could be written as:
+
+
+def vanilla_depthwise(input, weight):
+    ret = torch.zeros(out_channel, width - kernel_size + 1)
+    for j in range(out_channel):
+        for i in range(width - kernel_size + 1):
+            for k in range(kernel_size):
+                ret[j, i] += weight[j, k] * input[j, i + k]
+    return ret
+
+
+# Then, we plan to optimize the `depthwise` function by leveraging the power 
of TVM.
+# TVM community proposes an embedded Domain Specific Language on Python called 
TVMscript,
+# serving for a high-level abstraction of TVM intermediate representative,
+# which is easy to impose transformations and optimizations and deploy on 
various hardware backends.
+
+# As a concrete example, we can write such a TVMscript for 1-d depthwise conv 
code as below.
+# The computation procedure of `tvm_depthwise` is corresponding to the code 
snippet of `vanilla_depthwise`.
+
+# In our `tvm_depthwise` function, both inputs and outputs are set to be 
function parameters
+# that are held on the multi-dimension buffers. For each buffer, the shape and 
data type information are required.
+# In the function body, the syntactic sugar `T.grid`, `T.block` and 
`T.axis.remap`
+# are for writing multiple nested iterators, wrapping a basic unit of 
computation, and
+# creating spatial or reduce block iterators and bounding to the loop 
iterators, respectively.
+# Inside the computation statement, `C[vj, vi]` is initialized and sums up 
along the axis k.
+# Finally, we place 2 decorators `T.prim_func` and `as_torch` above the 
definition of the function,
+# which converts the Python AST to TVMscript and then converts to PyTorch's 
`nn.Module`.

Review Comment:
   I think you only need to briefly explain `as_torch` decorator. Everything 
else in this paragraph can be dropped, since this is not a detailed tutorial 
for TVMScript.



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial to optimize PyTorch models by using 
`tvm.contrib.torch.optimize_torch`.
+To follow this tutorial, PyTorch, as well as TorchVision, should be installed.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import library for profiling
+import torch.utils.benchmark as benchmark
+from torchvision.models import resnet18
+
+# Import `optimize_torch` function
+from tvm.contrib.torch import optimize_torch
+from tvm.meta_schedule import TuneConfig
+
+######################################################################
+# Define a simple module written by PyTorch
+# ------------------------------
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5)
+        self.conv2 = nn.Conv2d(20, 20, 5)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        return F.relu(self.conv2(x))
+
+
+######################################################################
+# Optimized SimpleModel by TVM MetaSchedule
+# ------------------------------
+# We provide a `optimize_torch` function, which has the similar usage as 
`torch.jit.trace`.
+# The PyTorch model to optimize, along with its example input, are provided by 
users.
+# We can optimized the PyTorch's module by calling the `optimized_torch` 
method in default setting.

Review Comment:
   The PyTorch module will be tuned by TVM for the target hardware.



##########
gallery/how_to/work_with_pytorch/using_as_torch.py:
##########
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Wrap Your TVMscript with PyTorch Module
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial on wrapping the TVMscript code with 
the PyTorch module.
+By the decorator `as_torch`, users can wrap a TVMscript code into a PyTorch 
nn.Module naturally.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch, as well as necessary libraries
+import torch
+import torch.nn.functional as F
+import torch.utils.benchmark as benchmark
+
+import tvm
+from tvm.contrib.torch import as_torch
+from tvm.script import tir as T
+
+######################################################################
+# Write your own PyTorch operator by TVMscript
+# -------------------------------
+# PyTorch is a very popular machine learning framework which contains
+# optimized implementations of most commonly used operators.
+# Nevertheless, sometimes you might want to write your own operators in 
PyTorch.
+# In that case, the performance of such custom operators might not be 
satisfactory for your needs.
+#
+# For example, suppose that we are going to define a 1-d depthwise convolution 
operator.
+# Assume the number of in_channel and out_channel are both 70,
+# the width is 80 and the kernel size is 20,
+# then the 1-d depthwise conv could be written in PyTorch in one line:
+
+in_channel = 70
+out_channel = 70
+width = 80
+kernel_size = 20
+
+
+def torch_depthwise(inputs, filters):
+    return F.conv1d(inputs, filters.view(out_channel, 1, kernel_size), 
groups=out_channel)
+
+
+# We can run this function as:
+
+inputs = torch.randn(in_channel, width)
+filters = torch.randn(out_channel, kernel_size)
+ret_torch = torch_depthwise(inputs, filters)
+
+# The `torch_depthwise` function, in a plain Python code, could be written as:
+
+
+def vanilla_depthwise(input, weight):
+    ret = torch.zeros(out_channel, width - kernel_size + 1)
+    for j in range(out_channel):
+        for i in range(width - kernel_size + 1):
+            for k in range(kernel_size):
+                ret[j, i] += weight[j, k] * input[j, i + k]
+    return ret
+
+
+# Then, we plan to optimize the `depthwise` function by leveraging the power 
of TVM.
+# TVM community proposes an embedded Domain Specific Language on Python called 
TVMscript,
+# serving for a high-level abstraction of TVM intermediate representative,
+# which is easy to impose transformations and optimizations and deploy on 
various hardware backends.
+
+# As a concrete example, we can write such a TVMscript for 1-d depthwise conv 
code as below.
+# The computation procedure of `tvm_depthwise` is corresponding to the code 
snippet of `vanilla_depthwise`.
+
+# In our `tvm_depthwise` function, both inputs and outputs are set to be 
function parameters
+# that are held on the multi-dimension buffers. For each buffer, the shape and 
data type information are required.
+# In the function body, the syntactic sugar `T.grid`, `T.block` and 
`T.axis.remap`
+# are for writing multiple nested iterators, wrapping a basic unit of 
computation, and
+# creating spatial or reduce block iterators and bounding to the loop 
iterators, respectively.
+# Inside the computation statement, `C[vj, vi]` is initialized and sums up 
along the axis k.
+# Finally, we place 2 decorators `T.prim_func` and `as_torch` above the 
definition of the function,
+# which converts the Python AST to TVMscript and then converts to PyTorch's 
`nn.Module`.
+
+
+@as_torch
[email protected]_func
+def tvm_depthwise(
+    A: T.Buffer((70, 80), "float32"),
+    B: T.Buffer((70, 20), "float32"),
+    C: T.Buffer((70, 61), "float32"),
+) -> None:
+    for j, i, k in T.grid(70, 61, 20):
+        with T.block():
+            vi, vj, vk = T.axis.remap("SSR", [i, j, k])
+            with T.init():
+                C[vj, vi] = T.float32(0)
+            C[vj, vi] += B[vj, vk] * A[vj, vi + vk]
+
+
+# We can build the TVMscript code by calling the `tune` method in default 
setting.
+# Without providing extra information, the model will be tuned for CPU.
+
+tvm_depthwise.tune()
+
+# We can print out the tuned TVMscript code to see how the program is 
transformed, as
+
+print(tvm_depthwise.script())
+
+# We can verify that the two outputs are the same:
+
+ret_tvm = torch.zeros(out_channel, width - kernel_size + 1)
+tvm_depthwise(inputs, filters, ret_tvm)
+
+testing.assert_allclose(ret_torch.cpu().numpy(), ret_tvm.cpu().numpy(), 
atol=1e-5, rtol=1e-5)
+
+
+######################################################################
+# Benchmark
+# -------------------------------
+# We will compare two operators by using PyTorch's benchmark toolkit.
+
+results = []
+for i in range(5):
+    inputs = torch.randn(out_channel, width)
+    filters = torch.randn(out_channel, kernel_size)
+    res = torch.zeros(out_channel, width - kernel_size + 1)
+    sub_label = f"[test {i}]"
+    results.append(
+        benchmark.Timer(
+            stmt="tvm_depthwise(inputs, filters, res)",
+            setup="from __main__ import tvm_depthwise",
+            globals={"inputs": inputs, "filters": filters, "res": res},
+            sub_label=sub_label,
+            description="TVMscript",
+        ).blocked_autorange()
+    )
+    results.append(
+        benchmark.Timer(
+            stmt="torch_depthwise(inputs, filters)",
+            setup="from __main__ import torch_depthwise",
+            globals={
+                "inputs": inputs,
+                "filters": filters,
+            },
+            sub_label=sub_label,
+            description="PyTorch",
+        ).blocked_autorange()
+    )
+compare = benchmark.Compare(results)
+compare.print()
+
+# In author's environment, the average inference time of `tvm_depthwise` is 
120.0 us (TVM version is 0.9.0),

Review Comment:
   0.9.0 is the released version, I don't think this is the one you are using 
for development. There is no need to mention the TVM version.



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial to optimize PyTorch models by using 
`tvm.contrib.torch.optimize_torch`.
+To follow this tutorial, PyTorch, as well as TorchVision, should be installed.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import library for profiling
+import torch.utils.benchmark as benchmark
+from torchvision.models import resnet18
+
+# Import `optimize_torch` function
+from tvm.contrib.torch import optimize_torch
+from tvm.meta_schedule import TuneConfig
+
+######################################################################
+# Define a simple module written by PyTorch
+# ------------------------------
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5)
+        self.conv2 = nn.Conv2d(20, 20, 5)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        return F.relu(self.conv2(x))
+
+
+######################################################################
+# Optimized SimpleModel by TVM MetaSchedule
+# ------------------------------
+# We provide a `optimize_torch` function, which has the similar usage as 
`torch.jit.trace`.

Review Comment:
    We provide the `optimize_torch` function, which has a similar usage as 
`torch.jit.trace`.



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_

Review Comment:
   Drop my name



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial to optimize PyTorch models by using 
`tvm.contrib.torch.optimize_torch`.
+To follow this tutorial, PyTorch, as well as TorchVision, should be installed.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import library for profiling
+import torch.utils.benchmark as benchmark
+from torchvision.models import resnet18
+
+# Import `optimize_torch` function
+from tvm.contrib.torch import optimize_torch
+from tvm.meta_schedule import TuneConfig
+
+######################################################################
+# Define a simple module written by PyTorch
+# ------------------------------
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5)
+        self.conv2 = nn.Conv2d(20, 20, 5)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        return F.relu(self.conv2(x))
+
+
+######################################################################
+# Optimized SimpleModel by TVM MetaSchedule
+# ------------------------------
+# We provide a `optimize_torch` function, which has the similar usage as 
`torch.jit.trace`.
+# The PyTorch model to optimize, along with its example input, are provided by 
users.
+# We can optimized the PyTorch's module by calling the `optimized_torch` 
method in default setting.
+# Without providing extra information, the model will be tuned for CPU.
+
+simple_model = SimpleModel()
+example_input = torch.randn(20, 1, 10, 10)
+model_optimized_by_tvm = optimize_torch(simple_model, example_input)
+
+######################################################################
+# Save/Load module
+# ------------------------------
+# We can save and load our tuned module like the standard `nn.module`.

Review Comment:
   `nn.Module`



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial to optimize PyTorch models by using 
`tvm.contrib.torch.optimize_torch`.
+To follow this tutorial, PyTorch, as well as TorchVision, should be installed.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import library for profiling
+import torch.utils.benchmark as benchmark
+from torchvision.models import resnet18
+
+# Import `optimize_torch` function
+from tvm.contrib.torch import optimize_torch
+from tvm.meta_schedule import TuneConfig
+
+######################################################################
+# Define a simple module written by PyTorch
+# ------------------------------
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5)
+        self.conv2 = nn.Conv2d(20, 20, 5)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        return F.relu(self.conv2(x))
+
+
+######################################################################
+# Optimized SimpleModel by TVM MetaSchedule

Review Comment:
   Optimize



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial to optimize PyTorch models by using 
`tvm.contrib.torch.optimize_torch`.
+To follow this tutorial, PyTorch, as well as TorchVision, should be installed.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import library for profiling
+import torch.utils.benchmark as benchmark
+from torchvision.models import resnet18
+
+# Import `optimize_torch` function
+from tvm.contrib.torch import optimize_torch
+from tvm.meta_schedule import TuneConfig
+
+######################################################################
+# Define a simple module written by PyTorch
+# ------------------------------
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5)
+        self.conv2 = nn.Conv2d(20, 20, 5)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        return F.relu(self.conv2(x))
+
+
+######################################################################
+# Optimized SimpleModel by TVM MetaSchedule
+# ------------------------------
+# We provide a `optimize_torch` function, which has the similar usage as 
`torch.jit.trace`.
+# The PyTorch model to optimize, along with its example input, are provided by 
users.
+# We can optimized the PyTorch's module by calling the `optimized_torch` 
method in default setting.
+# Without providing extra information, the model will be tuned for CPU.
+
+simple_model = SimpleModel()
+example_input = torch.randn(20, 1, 10, 10)
+model_optimized_by_tvm = optimize_torch(simple_model, example_input)
+
+######################################################################
+# Save/Load module
+# ------------------------------
+# We can save and load our tuned module like the standard `nn.module`.
+
+# Let us run our tuned module.
+ret1 = model_optimized_by_tvm(example_input)
+
+torch.save(model_optimized_by_tvm, "model_optimized.pt")
+model_loaded = torch.load("model_optimized.pt")
+
+# We load the module and run it again.
+ret2 = model_loaded(example_input)
+
+# We show that the results from original SimpleModel,
+# optimized model and loaded model are the same.
+
+ret3 = simple_model.forward(example_input)
+testing.assert_allclose(ret1.detach().numpy(), ret2.detach().numpy(), 
atol=1e-5, rtol=1e-5)
+testing.assert_allclose(ret1.detach().numpy(), ret3.detach().numpy(), 
atol=1e-5, rtol=1e-5)
+
+######################################################################
+# Resnet18 optimized by TVM MetaSchedule
+# ------------------------------
+# In the following, we will show that our approach is able to
+# accelerate common and large models, such as Resnet18.

Review Comment:
   Drop "large". Resnet18 is not a large model.



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial to optimize PyTorch models by using 
`tvm.contrib.torch.optimize_torch`.
+To follow this tutorial, PyTorch, as well as TorchVision, should be installed.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import library for profiling
+import torch.utils.benchmark as benchmark
+from torchvision.models import resnet18
+
+# Import `optimize_torch` function
+from tvm.contrib.torch import optimize_torch
+from tvm.meta_schedule import TuneConfig
+
+######################################################################
+# Define a simple module written by PyTorch
+# ------------------------------
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5)
+        self.conv2 = nn.Conv2d(20, 20, 5)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        return F.relu(self.conv2(x))
+
+
+######################################################################
+# Optimized SimpleModel by TVM MetaSchedule
+# ------------------------------
+# We provide a `optimize_torch` function, which has the similar usage as 
`torch.jit.trace`.
+# The PyTorch model to optimize, along with its example input, are provided by 
users.
+# We can optimized the PyTorch's module by calling the `optimized_torch` 
method in default setting.
+# Without providing extra information, the model will be tuned for CPU.
+
+simple_model = SimpleModel()
+example_input = torch.randn(20, 1, 10, 10)
+model_optimized_by_tvm = optimize_torch(simple_model, example_input)
+
+######################################################################
+# Save/Load module
+# ------------------------------
+# We can save and load our tuned module like the standard `nn.module`.
+
+# Let us run our tuned module.
+ret1 = model_optimized_by_tvm(example_input)
+
+torch.save(model_optimized_by_tvm, "model_optimized.pt")
+model_loaded = torch.load("model_optimized.pt")
+
+# We load the module and run it again.
+ret2 = model_loaded(example_input)
+
+# We show that the results from original SimpleModel,
+# optimized model and loaded model are the same.
+
+ret3 = simple_model.forward(example_input)
+testing.assert_allclose(ret1.detach().numpy(), ret2.detach().numpy(), 
atol=1e-5, rtol=1e-5)
+testing.assert_allclose(ret1.detach().numpy(), ret3.detach().numpy(), 
atol=1e-5, rtol=1e-5)
+
+######################################################################
+# Resnet18 optimized by TVM MetaSchedule
+# ------------------------------
+# In the following, we will show that our approach is able to
+# accelerate common and large models, such as Resnet18.
+
+# We will tune our model on the GPU.
+target_cuda = "nvidia/geforce-rtx-3070"
+
+# For PyTorch users, the nn.Module could be written as usual, except for
+# applying "optimize_torch" function on the resnet18 model.
+
+
+class MyResNet18(torch.nn.Module):
+    def __init__(self, target):
+        super(MyResNet18, self).__init__()
+        # Here we impose the `optimize_torch` function
+        # The default setting is adapted automatically by the number of 
operations of the optimized model.
+        self.resnet = optimize_torch(resnet18(), [torch.rand(1, 3, 224, 224)], 
target=target)
+
+    def forward(self, input):
+        return self.resnet(input)
+
+
+tvm_module_resnet18 = MyResNet18(target_cuda)
+
+
+######################################################################
+# Resnet18 optimized by TorchScript
+# ------------------------------
+# Let us write down a resnet18 model in a standard way.
+
+
+class JitModule(torch.nn.Module):
+    def __init__(self):
+        super(JitModule, self).__init__()
+        # Here we impose the `optimize_for_inference` function
+        # TorchScript also provides a built-in "optimize_for_inference" 
function to accelerate the inference.
+        self.resnet = 
torch.jit.optimize_for_inference(torch.jit.script(resnet18().cuda().eval()))
+
+    def forward(self, input):
+        return self.resnet(input)
+
+
+jit_module_resnet18 = JitModule()
+
+
+######################################################################
+# Compare the performance between two approaches.
+# ------------------------------
+# Using PyTorch's benchmark Compare class, we can have a direct comparison 
result between two inference models.
+
+results = []
+for i in range(5):
+    test_input = torch.rand(1, 3, 224, 224).cuda()
+    sub_label = f"[test {i}]"
+    results.append(
+        benchmark.Timer(
+            stmt="tvm_module_resnet18(test_input)",
+            setup="from __main__ import tvm_module_resnet18",
+            globals={"test_input": test_input},
+            sub_label=sub_label,
+            description="tuning by meta",
+        ).blocked_autorange()
+    )
+    results.append(
+        benchmark.Timer(
+            stmt="jit_module_resnet18(test_input)",
+            setup="from __main__ import jit_module_resnet18",
+            globals={"test_input": test_input},
+            sub_label=sub_label,
+            description="tuning by jit",
+        ).blocked_autorange()
+    )
+
+compare = benchmark.Compare(results)
+compare.print()
+
+# In author's environment, the average inference time of `tvm_module_resnet18` 
is 620.0 us (TVM version is 0.9.0),

Review Comment:
   Drop the reference to TVM version (see the same comment to 
`using_as_torch.py`)



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial to optimize PyTorch models by using 
`tvm.contrib.torch.optimize_torch`.
+To follow this tutorial, PyTorch, as well as TorchVision, should be installed.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import library for profiling
+import torch.utils.benchmark as benchmark
+from torchvision.models import resnet18
+
+# Import `optimize_torch` function
+from tvm.contrib.torch import optimize_torch
+from tvm.meta_schedule import TuneConfig
+
+######################################################################
+# Define a simple module written by PyTorch
+# ------------------------------
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5)
+        self.conv2 = nn.Conv2d(20, 20, 5)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        return F.relu(self.conv2(x))
+
+
+######################################################################
+# Optimized SimpleModel by TVM MetaSchedule
+# ------------------------------
+# We provide a `optimize_torch` function, which has the similar usage as 
`torch.jit.trace`.
+# The PyTorch model to optimize, along with its example input, are provided by 
users.
+# We can optimized the PyTorch's module by calling the `optimized_torch` 
method in default setting.
+# Without providing extra information, the model will be tuned for CPU.
+
+simple_model = SimpleModel()
+example_input = torch.randn(20, 1, 10, 10)
+model_optimized_by_tvm = optimize_torch(simple_model, example_input)
+
+######################################################################
+# Save/Load module
+# ------------------------------
+# We can save and load our tuned module like the standard `nn.module`.
+
+# Let us run our tuned module.
+ret1 = model_optimized_by_tvm(example_input)
+
+torch.save(model_optimized_by_tvm, "model_optimized.pt")
+model_loaded = torch.load("model_optimized.pt")
+
+# We load the module and run it again.
+ret2 = model_loaded(example_input)
+
+# We show that the results from original SimpleModel,

Review Comment:
   "We show that the results from the original PyTorch model and the model 
tuned by TVM are the same, as expected."
   
   No need to verify the result of the "loaded model". Note that the term 
"loaded model" makes no sense. 



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial to optimize PyTorch models by using 
`tvm.contrib.torch.optimize_torch`.
+To follow this tutorial, PyTorch, as well as TorchVision, should be installed.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import library for profiling
+import torch.utils.benchmark as benchmark
+from torchvision.models import resnet18
+
+# Import `optimize_torch` function
+from tvm.contrib.torch import optimize_torch
+from tvm.meta_schedule import TuneConfig
+
+######################################################################
+# Define a simple module written by PyTorch
+# ------------------------------
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5)
+        self.conv2 = nn.Conv2d(20, 20, 5)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        return F.relu(self.conv2(x))
+
+
+######################################################################
+# Optimized SimpleModel by TVM MetaSchedule
+# ------------------------------
+# We provide a `optimize_torch` function, which has the similar usage as 
`torch.jit.trace`.
+# The PyTorch model to optimize, along with its example input, are provided by 
users.
+# We can optimized the PyTorch's module by calling the `optimized_torch` 
method in default setting.
+# Without providing extra information, the model will be tuned for CPU.
+
+simple_model = SimpleModel()
+example_input = torch.randn(20, 1, 10, 10)
+model_optimized_by_tvm = optimize_torch(simple_model, example_input)
+
+######################################################################
+# Save/Load module
+# ------------------------------
+# We can save and load our tuned module like the standard `nn.module`.
+
+# Let us run our tuned module.
+ret1 = model_optimized_by_tvm(example_input)
+
+torch.save(model_optimized_by_tvm, "model_optimized.pt")
+model_loaded = torch.load("model_optimized.pt")
+
+# We load the module and run it again.
+ret2 = model_loaded(example_input)
+
+# We show that the results from original SimpleModel,
+# optimized model and loaded model are the same.
+
+ret3 = simple_model.forward(example_input)
+testing.assert_allclose(ret1.detach().numpy(), ret2.detach().numpy(), 
atol=1e-5, rtol=1e-5)
+testing.assert_allclose(ret1.detach().numpy(), ret3.detach().numpy(), 
atol=1e-5, rtol=1e-5)
+
+######################################################################
+# Resnet18 optimized by TVM MetaSchedule

Review Comment:
   Optimize Resnet18 by MetaSchedule



##########
gallery/how_to/work_with_pytorch/using_as_torch.py:
##########
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Wrap Your TVMscript with PyTorch Module
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial on wrapping the TVMscript code with 
the PyTorch module.
+By the decorator `as_torch`, users can wrap a TVMscript code into a PyTorch 
nn.Module naturally.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch, as well as necessary libraries
+import torch
+import torch.nn.functional as F
+import torch.utils.benchmark as benchmark
+
+import tvm
+from tvm.contrib.torch import as_torch
+from tvm.script import tir as T
+
+######################################################################
+# Write your own PyTorch operator by TVMscript
+# -------------------------------
+# PyTorch is a very popular machine learning framework which contains
+# optimized implementations of most commonly used operators.
+# Nevertheless, sometimes you might want to write your own operators in 
PyTorch.
+# In that case, the performance of such custom operators might not be 
satisfactory for your needs.
+#
+# For example, suppose that we are going to define a 1-d depthwise convolution 
operator.
+# Assume the number of in_channel and out_channel are both 70,
+# the width is 80 and the kernel size is 20,
+# then the 1-d depthwise conv could be written in PyTorch in one line:
+
+in_channel = 70
+out_channel = 70
+width = 80
+kernel_size = 20
+
+
+def torch_depthwise(inputs, filters):
+    return F.conv1d(inputs, filters.view(out_channel, 1, kernel_size), 
groups=out_channel)
+
+
+# We can run this function as:
+
+inputs = torch.randn(in_channel, width)
+filters = torch.randn(out_channel, kernel_size)
+ret_torch = torch_depthwise(inputs, filters)
+
+# The `torch_depthwise` function, in a plain Python code, could be written as:
+
+
+def vanilla_depthwise(input, weight):
+    ret = torch.zeros(out_channel, width - kernel_size + 1)
+    for j in range(out_channel):
+        for i in range(width - kernel_size + 1):
+            for k in range(kernel_size):
+                ret[j, i] += weight[j, k] * input[j, i + k]
+    return ret
+
+
+# Then, we plan to optimize the `depthwise` function by leveraging the power 
of TVM.
+# TVM community proposes an embedded Domain Specific Language on Python called 
TVMscript,
+# serving for a high-level abstraction of TVM intermediate representative,
+# which is easy to impose transformations and optimizations and deploy on 
various hardware backends.
+
+# As a concrete example, we can write such a TVMscript for 1-d depthwise conv 
code as below.
+# The computation procedure of `tvm_depthwise` is corresponding to the code 
snippet of `vanilla_depthwise`.
+
+# In our `tvm_depthwise` function, both inputs and outputs are set to be 
function parameters
+# that are held on the multi-dimension buffers. For each buffer, the shape and 
data type information are required.
+# In the function body, the syntactic sugar `T.grid`, `T.block` and 
`T.axis.remap`
+# are for writing multiple nested iterators, wrapping a basic unit of 
computation, and
+# creating spatial or reduce block iterators and bounding to the loop 
iterators, respectively.
+# Inside the computation statement, `C[vj, vi]` is initialized and sums up 
along the axis k.
+# Finally, we place 2 decorators `T.prim_func` and `as_torch` above the 
definition of the function,
+# which converts the Python AST to TVMscript and then converts to PyTorch's 
`nn.Module`.
+
+
+@as_torch
[email protected]_func
+def tvm_depthwise(
+    A: T.Buffer((70, 80), "float32"),
+    B: T.Buffer((70, 20), "float32"),
+    C: T.Buffer((70, 61), "float32"),
+) -> None:
+    for j, i, k in T.grid(70, 61, 20):
+        with T.block():
+            vi, vj, vk = T.axis.remap("SSR", [i, j, k])
+            with T.init():
+                C[vj, vi] = T.float32(0)
+            C[vj, vi] += B[vj, vk] * A[vj, vi + vk]
+
+
+# We can build the TVMscript code by calling the `tune` method in default 
setting.
+# Without providing extra information, the model will be tuned for CPU.
+
+tvm_depthwise.tune()
+
+# We can print out the tuned TVMscript code to see how the program is 
transformed, as
+
+print(tvm_depthwise.script())
+
+# We can verify that the two outputs are the same:
+
+ret_tvm = torch.zeros(out_channel, width - kernel_size + 1)
+tvm_depthwise(inputs, filters, ret_tvm)
+
+testing.assert_allclose(ret_torch.cpu().numpy(), ret_tvm.cpu().numpy(), 
atol=1e-5, rtol=1e-5)
+
+
+######################################################################
+# Benchmark
+# -------------------------------
+# We will compare two operators by using PyTorch's benchmark toolkit.

Review Comment:
   Drop this sentence. It is not useful.



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,193 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: `Yaoda Zhou <https://github.com/juda/>`_
+This article is an introductory tutorial to optimize PyTorch models by using 
`tvm.contrib.torch.optimize_torch`.
+For us to follow this tutorial, PyTorch, as well as TorchVision, should be 
installed.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import library for profiling
+import torch.utils.benchmark as benchmark
+from torchvision.models import resnet18
+
+# Import `optimize_torch` function
+from tvm.contrib.torch import optimize_torch
+from tvm.meta_schedule import TuneConfig
+
+######################################################################
+# Define a simple module written by PyTorch
+# ------------------------------
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5)
+        self.conv2 = nn.Conv2d(20, 20, 5)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        return F.relu(self.conv2(x))
+
+
+######################################################################
+# Optimized SimpleModel by TVM MetaSchedule
+# ------------------------------
+# We provide a `optimize_torch` function, which has the similar usage as 
`torch.jit.trace`.
+# The optimized function/model and example input are required to provide by 
users.
+# If the third parameter `tuning_config` is not provided, a default 
configuration is loaded.
+# If the parameter `target` is empty, the model will deploy on the CPU.
+
+
+example_input = torch.randn(20, 1, 10, 10)
+
+# We use the default configuration for the first example.
+model_optimized_by_meta = optimize_torch(SimpleModel(), example_input)
+
+######################################################################
+# Save/Load module
+# ------------------------------
+# We can save and load our tuned module like the standard `nn.module`.
+
+# Let us run our tuned module and see the result.
+ret1 = model_optimized_by_meta(example_input)
+
+torch.save(model_optimized_by_meta, "meta_model.pt")
+model_loaded = torch.load("meta_model.pt")
+
+# We load the module and run it again, and it will return the same result as 
above.
+ret2 = model_loaded(example_input)
+
+testing.assert_allclose(ret1.numpy(), ret2.numpy(), atol=1e-5, rtol=1e-5)
+
+######################################################################
+# Define the resnet18 optimized by MetaSchedule
+# ------------------------------
+# In another example, we compare the two optimizers about the performance of 
resnet18
+# For learning how to define a resnet18 model via PyTorch's nn.Module,
+# you can refer to 
https://pytorch.org/docs/stable/jit.html#mixing-tracing-and-scripting
+
+# We will deploy our model on the GPU.
+# In the working machine, the GPU is nvidia/geforce-rtx-3070.
+target_cuda = "nvidia/geforce-rtx-3070"
+
+# The default setting is adapted automatically by the number of operations of 
the optimized model
+# When needed, we can define the configuration by ourselves, like:
+tuning_config = TuneConfig(
+    strategy="evolutionary",
+    num_trials_per_iter=64,
+    max_trials_per_task=20000,
+    max_trials_global=20000,
+)
+
+# For PyTorch users, the nn.Module could be written as usual, except for
+# applying "optimize_torch" function on the resnet18 model.
+# In such a way, we obtain a new resnet18 model optimized by MetaSchedule.
+
+
+class MyResNet18(torch.nn.Module):
+    def __init__(self, config, target=None):
+        super(MyResNet18, self).__init__()
+        self.means = torch.nn.Parameter(
+            torch.tensor([103.939, 116.779, 123.68]).resize_(1, 3, 1, 1)
+        ).cuda()
+        # Here we impose the `optimize_torch` function
+        self.resnet = optimize_torch(resnet18(), [torch.rand(1, 3, 224, 224)], 
config, target)
+
+    def forward(self, input):
+        return self.resnet(input - self.means)
+
+
+# Since we set the number of trials largely,
+# we might need to wait more time for the search.
+meta_module_resnet18 = MyResNet18(tuning_config, target_cuda)
+
+
+######################################################################
+# Define the resnet18 optimized by TorchScript
+# ------------------------------
+# Besides, let us define a resnet18 model in a standard way.
+# TorchScript also provides a built-in "optimize_for_inference" function to 
accelerate the inference,
+# we will compare the performance of those two optimizers later.
+
+
+class JitModule(torch.nn.Module):

Review Comment:
   Please address this comment. There is no need to have `JitModule`.
   



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,176 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: 
+`Yaoda Zhou <https://github.com/juda>`_,
+`Masahiro Masuda <https://github.com/masahi>`_
+
+This article is an introductory tutorial to optimize PyTorch models by using 
`tvm.contrib.torch.optimize_torch`.
+To follow this tutorial, PyTorch, as well as TorchVision, should be installed.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import library for profiling
+import torch.utils.benchmark as benchmark
+from torchvision.models import resnet18
+
+# Import `optimize_torch` function
+from tvm.contrib.torch import optimize_torch
+from tvm.meta_schedule import TuneConfig
+
+######################################################################
+# Define a simple module written by PyTorch
+# ------------------------------
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5)
+        self.conv2 = nn.Conv2d(20, 20, 5)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        return F.relu(self.conv2(x))
+
+
+######################################################################
+# Optimized SimpleModel by TVM MetaSchedule
+# ------------------------------
+# We provide a `optimize_torch` function, which has the similar usage as 
`torch.jit.trace`.
+# The PyTorch model to optimize, along with its example input, are provided by 
users.
+# We can optimized the PyTorch's module by calling the `optimized_torch` 
method in default setting.
+# Without providing extra information, the model will be tuned for CPU.
+
+simple_model = SimpleModel()
+example_input = torch.randn(20, 1, 10, 10)
+model_optimized_by_tvm = optimize_torch(simple_model, example_input)
+
+######################################################################
+# Save/Load module
+# ------------------------------
+# We can save and load our tuned module like the standard `nn.module`.
+
+# Let us run our tuned module.
+ret1 = model_optimized_by_tvm(example_input)
+
+torch.save(model_optimized_by_tvm, "model_optimized.pt")
+model_loaded = torch.load("model_optimized.pt")
+
+# We load the module and run it again.
+ret2 = model_loaded(example_input)
+
+# We show that the results from original SimpleModel,
+# optimized model and loaded model are the same.
+
+ret3 = simple_model.forward(example_input)

Review Comment:
   Drop `forward`



##########
gallery/how_to/work_with_pytorch/using_optimized_torch.py:
##########
@@ -0,0 +1,193 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Compile PyTorch Models
+======================
+**Author**: `Yaoda Zhou <https://github.com/juda/>`_
+This article is an introductory tutorial to optimize PyTorch models by using 
`tvm.contrib.torch.optimize_torch`.
+For us to follow this tutorial, PyTorch, as well as TorchVision, should be 
installed.
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+# Import PyTorch
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+# Import library for profiling
+import torch.utils.benchmark as benchmark
+from torchvision.models import resnet18
+
+# Import `optimize_torch` function
+from tvm.contrib.torch import optimize_torch
+from tvm.meta_schedule import TuneConfig
+
+######################################################################
+# Define a simple module written by PyTorch
+# ------------------------------
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 20, 5)
+        self.conv2 = nn.Conv2d(20, 20, 5)
+
+    def forward(self, x):
+        x = F.relu(self.conv1(x))
+        return F.relu(self.conv2(x))
+
+
+######################################################################
+# Optimized SimpleModel by TVM MetaSchedule
+# ------------------------------
+# We provide a `optimize_torch` function, which has the similar usage as 
`torch.jit.trace`.
+# The optimized function/model and example input are required to provide by 
users.
+# If the third parameter `tuning_config` is not provided, a default 
configuration is loaded.
+# If the parameter `target` is empty, the model will deploy on the CPU.
+
+
+example_input = torch.randn(20, 1, 10, 10)
+
+# We use the default configuration for the first example.
+model_optimized_by_meta = optimize_torch(SimpleModel(), example_input)
+
+######################################################################
+# Save/Load module
+# ------------------------------
+# We can save and load our tuned module like the standard `nn.module`.
+
+# Let us run our tuned module and see the result.
+ret1 = model_optimized_by_meta(example_input)
+
+torch.save(model_optimized_by_meta, "meta_model.pt")
+model_loaded = torch.load("meta_model.pt")
+
+# We load the module and run it again, and it will return the same result as 
above.
+ret2 = model_loaded(example_input)
+
+testing.assert_allclose(ret1.numpy(), ret2.numpy(), atol=1e-5, rtol=1e-5)
+
+######################################################################
+# Define the resnet18 optimized by MetaSchedule
+# ------------------------------
+# In another example, we compare the two optimizers about the performance of 
resnet18
+# For learning how to define a resnet18 model via PyTorch's nn.Module,
+# you can refer to 
https://pytorch.org/docs/stable/jit.html#mixing-tracing-and-scripting
+
+# We will deploy our model on the GPU.
+# In the working machine, the GPU is nvidia/geforce-rtx-3070.
+target_cuda = "nvidia/geforce-rtx-3070"
+
+# The default setting is adapted automatically by the number of operations of 
the optimized model
+# When needed, we can define the configuration by ourselves, like:
+tuning_config = TuneConfig(
+    strategy="evolutionary",
+    num_trials_per_iter=64,
+    max_trials_per_task=20000,
+    max_trials_global=20000,
+)
+
+# For PyTorch users, the nn.Module could be written as usual, except for
+# applying "optimize_torch" function on the resnet18 model.
+# In such a way, we obtain a new resnet18 model optimized by MetaSchedule.
+
+
+class MyResNet18(torch.nn.Module):

Review Comment:
   Please address this comment. There is no need to have `MyResNet18`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to