[tvm] branch unity updated: [Unity] Refactor Relax Build JIT UX (#14088)

ruihangl Wed, 22 Feb 2023 11:47:29 -0800

This is an automated email from the ASF dual-hosted git repository.

ruihangl pushed a commit to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/unity by this push:
     new 37048c066e [Unity] Refactor Relax Build JIT UX (#14088)
37048c066e is described below

commit 37048c066ea54b6603728bf8ce6f804632427f14
Author: Tianqi Chen <[email protected]>
AuthorDate: Wed Feb 22 14:47:11 2023 -0500

    [Unity] Refactor Relax Build JIT UX (#14088)
    
    This PR refactors relax build so it get exposed at the opt-level.
    We also introduces an explicit jit functionality to handle
    live loading of compiled artifacts from cutlass.
    
    We also move relax vm to runtime so it can be clearly isolated
    from the rest of the compiler stack.
---
 apps/relax_examples/e2e_auto_tir.py                |   2 +-
 apps/relax_examples/mlp.py                         |   2 +-
 apps/relax_examples/nn_module.py                   |   2 +-
 apps/relax_examples/resnet.py                      |   2 +-
 python/tvm/contrib/cutlass/build.py                |  26 --
 python/tvm/meta_schedule/relax_integration.py      |   6 +-
 python/tvm/relax/__init__.py                       |   8 +-
 python/tvm/relax/exec_builder.py                   |   2 +-
 python/tvm/relax/frontend/torch/dynamo.py          |   4 +-
 python/tvm/relax/transform/transform.py            |   4 +
 .../transform/tuning_api/default_functions.py      |   6 +-
 python/tvm/relax/vm_build.py                       | 317 +++++++++++++++++++++
 python/tvm/{relax/vm.py => runtime/relax_vm.py}    | 231 ++-------------
 tests/python/relax/test_codegen_cutlass.py         |   6 +-
 tests/python/relax/test_codegen_dnnl.py            |   2 +-
 tests/python/relax/test_codegen_tensorrt.py        |   2 +-
 tests/python/relax/test_pipeline.py                |   2 +-
 tests/python/relax/test_relay_translator.py        |   2 +-
 tests/python/relax/test_transform_bind_params.py   |   4 +-
 tests/python/relax/test_transform_codegen_pass.py  |  12 +-
 tests/python/relax/test_vm_build.py                |  62 ++--
 tests/python/relax/test_vm_codegen_only.py         |  10 +-
 tests/python/relax/test_vm_codegen_tir.py          |   2 +-
 tests/python/relax/test_vm_profiler.py             |   8 +-
 tests/python/relay/test_vm.py                      |  10 +-
 25 files changed, 423 insertions(+), 311 deletions(-)

diff --git a/apps/relax_examples/e2e_auto_tir.py 
b/apps/relax_examples/e2e_auto_tir.py
index 92cda16f79..8113f942d1 100644
--- a/apps/relax_examples/e2e_auto_tir.py
+++ b/apps/relax_examples/e2e_auto_tir.py
@@ -142,7 +142,7 @@ def apply_opt_before_tuning(
 def f_measurement(
     rt_mod: runtime.Module, device: runtime.ndarray.Device, input_data: 
Dict[str, runtime.NDArray]
 ):
-    vm = relax.vm.VirtualMachine(exec=rt_mod, device=device)
+    vm = relax.VirtualMachine(rt_mod, device=device)
     vm.save_function("main", "measure_func", **input_data, 
include_return=False)
     evaluator = vm.time_evaluator(
         func_name="measure_func",
diff --git a/apps/relax_examples/mlp.py b/apps/relax_examples/mlp.py
index 02e17dc304..2a81b61543 100644
--- a/apps/relax_examples/mlp.py
+++ b/apps/relax_examples/mlp.py
@@ -47,7 +47,7 @@ if __name__ == "__main__":
 
     # build and create vm executor
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target)
+    ex = relax.build(mod, target)
     vm = relax.VirtualMachine(ex, tvm.cpu())
 
     # run the mlp model on relax vm
diff --git a/apps/relax_examples/nn_module.py b/apps/relax_examples/nn_module.py
index b57cb00685..57a13e4fb5 100644
--- a/apps/relax_examples/nn_module.py
+++ b/apps/relax_examples/nn_module.py
@@ -56,7 +56,7 @@ if __name__ == "__main__":
 
     # build the IRModule and create relax vm
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target)
+    ex = relax.build(mod, target)
     vm = relax.VirtualMachine(ex, tvm.cpu())
 
     # init parameters
diff --git a/apps/relax_examples/resnet.py b/apps/relax_examples/resnet.py
index df0cab02f1..6c7350d778 100644
--- a/apps/relax_examples/resnet.py
+++ b/apps/relax_examples/resnet.py
@@ -36,7 +36,7 @@ if __name__ == "__main__":
     relax_mod.show()
 
     # build the IRModule and create relax vm
-    ex = relax.vm.build(relax_mod, target)
+    ex = relax.build(relax_mod, target)
     vm = relax.VirtualMachine(ex, tvm.cpu())
 
     # init weights and run the model on relax vm
diff --git a/python/tvm/contrib/cutlass/build.py 
b/python/tvm/contrib/cutlass/build.py
index ad0e59af02..c6e5adacec 100644
--- a/python/tvm/contrib/cutlass/build.py
+++ b/python/tvm/contrib/cutlass/build.py
@@ -851,29 +851,3 @@ def finalize_modules_vm(vm_exec, lib_path="compile.so", 
vmcode_path="vmcode.ro",
         fo.write(code)
     lib = tvm.runtime.load_module(lib_path)
     return tvm.runtime.vm.Executable.load_exec(code, lib)
-
-
-def finalize_modules_relax(vm_exec, lib_path="compile.so", tmp_dir="./tmp"):
-    """finalize_modules_vm equivalent for Relax VM.
-
-    Parameters
-    ----------
-    vm_exec : vm.Executable
-        The output from relax.vm.build containing compiled host code and 
kernels.
-
-    lib_path : string
-        The path to a shared library which will be generated as the result of 
the build process.
-
-    tmp_dir : string
-        A temporary directory where intermediate compiled artifacts will be 
stored.
-
-    Returns
-    -------
-    updated_vm_exec : relax.vm.Executable
-        The updated VM executable with all compilation and linking completed.
-    """
-    lib_path = os.path.join(tmp_dir, lib_path)
-    vm_exec.mod.export_library(lib_path, workspace_dir=tmp_dir, cc="nvcc")
-    lib = tvm.runtime.load_module(lib_path)
-
-    return relax.vm.Executable(lib)
diff --git a/python/tvm/meta_schedule/relax_integration.py 
b/python/tvm/meta_schedule/relax_integration.py
index a82d899685..db22214b76 100644
--- a/python/tvm/meta_schedule/relax_integration.py
+++ b/python/tvm/meta_schedule/relax_integration.py
@@ -317,7 +317,7 @@ def compile_relax(
     mod: IRModule,
     target: Union[Target, str],
     params: Optional[Dict[str, NDArray]],
-) -> "relax.vm.Executable":
+) -> "relax.Executable":
     """Compile a relax program with a MetaSchedule database.
 
     Parameters
@@ -333,12 +333,12 @@ def compile_relax(
 
     Returns
     -------
-    lib : relax.vm.Executable
+    lib : relax.Executable
         The built runtime module or vm Executable for the given relax workload.
     """
     # pylint: disable=import-outside-toplevel
     from tvm.relax.transform import BindParams, MetaScheduleApplyDatabase
-    from tvm.relax.vm import build as relax_build
+    from tvm.relax import build as relax_build
 
     # pylint: enable=import-outside-toplevel
     if not isinstance(target, Target):
diff --git a/python/tvm/relax/__init__.py b/python/tvm/relax/__init__.py
index 33a9c2eece..d0a1942ebd 100644
--- a/python/tvm/relax/__init__.py
+++ b/python/tvm/relax/__init__.py
@@ -16,6 +16,9 @@
 # under the License.
 # pylint: disable=invalid-name, wrong-import-position
 """The Relax IR namespace containing the IR, type, operator, builder, vm, 
etc."""
+from tvm.runtime import relax_vm as vm
+from tvm.runtime.relax_vm import VirtualMachine
+
 # Expr
 from .expr import (
     Expr,
@@ -51,7 +54,6 @@ from .ty import Type, ObjectType, ShapeType, DynTensorType, 
TupleType, FuncType,
 
 # VM
 from .exec_builder import ExecBuilder
-from .vm import VirtualMachine
 
 # Operator
 from .op.base import call_tir
@@ -82,7 +84,9 @@ from . import expr
 from . import ty
 from . import analysis
 from . import transform
-from . import vm
 from . import block_builder
 from . import op
 from . import struct_info
+
+# VM
+from .vm_build import build, Executable
diff --git a/python/tvm/relax/exec_builder.py b/python/tvm/relax/exec_builder.py
index 1e28c967d1..140c497eb9 100644
--- a/python/tvm/relax/exec_builder.py
+++ b/python/tvm/relax/exec_builder.py
@@ -21,7 +21,7 @@ from typing import Optional, Union, List
 import tvm
 from tvm.runtime import Object
 from tvm.runtime.container import ShapeTuple
-from .vm import Executable
+from .vm_build import Executable
 from . import _ffi_api
 
 
diff --git a/python/tvm/relax/frontend/torch/dynamo.py 
b/python/tvm/relax/frontend/torch/dynamo.py
index 94de73a431..589c6be3b5 100644
--- a/python/tvm/relax/frontend/torch/dynamo.py
+++ b/python/tvm/relax/frontend/torch/dynamo.py
@@ -23,7 +23,7 @@ import functools
 from typing import Optional
 
 import tvm
-from tvm.relax.vm import build as relax_build
+from tvm.relax import build as relax_build
 from tvm.relax.frontend.torch.fx_translator import from_fx
 
 
@@ -96,7 +96,7 @@ def relax_dynamo(pipeline: Optional[tvm.transform.Pass] = 
None):
 
         ex = relax_build(mod, target=target)
 
-        vm = tvm.relax.vm.VirtualMachine(exec=ex.mod, device=dev)
+        vm = tvm.relax.VirtualMachine(ex.mod, device=dev)
 
         def exec_tvm(*i_args):
             args = [a.contiguous() for a in i_args]
diff --git a/python/tvm/relax/transform/transform.py 
b/python/tvm/relax/transform/transform.py
index c72d053290..7044314e85 100644
--- a/python/tvm/relax/transform/transform.py
+++ b/python/tvm/relax/transform/transform.py
@@ -222,6 +222,10 @@ def RunCodegen(
     """
     if entry_functions is None:
         entry_functions = ["main"]
+    # enable cutlass byoc registries
+    # pylint: disable=unused-import,import-outside-toplevel
+    from tvm.contrib import cutlass as _cutlass
+
     return _ffi_api.RunCodegen(target_options, entry_functions)  # type: ignore
 
 
diff --git a/python/tvm/relax/transform/tuning_api/default_functions.py 
b/python/tvm/relax/transform/tuning_api/default_functions.py
index b72b2f30ee..7cdb211bd3 100644
--- a/python/tvm/relax/transform/tuning_api/default_functions.py
+++ b/python/tvm/relax/transform/tuning_api/default_functions.py
@@ -176,7 +176,7 @@ def default_evaluate(
         ):
             if params:
                 mod = tvm.relax.transform.BindParams("main", params)(mod)
-            relax_exec = tvm.relax.vm.build(mod, target)
+            relax_exec = tvm.relax.build(mod, target)
             return relax_exec.mod
 
         builder = LocalBuilder(f_build=relax_build)
@@ -185,8 +185,8 @@ def default_evaluate(
     if runner is None:
 
         def relax_eval_func(rt_mod, device, evaluator_config, repeated_args):
-            relax_exec = tvm.relax.vm.Executable(rt_mod)
-            relax_vm = tvm.relax.VirtualMachine(exec=relax_exec, device=device)
+            relax_exec = tvm.relax.Executable(rt_mod)
+            relax_vm = tvm.relax.VirtualMachine(relax_exec, device=device)
 
             evaluator = relax_vm.module.time_evaluator(
                 func_name="main",
diff --git a/python/tvm/relax/vm_build.py b/python/tvm/relax/vm_build.py
new file mode 100644
index 0000000000..35fc65bdc6
--- /dev/null
+++ b/python/tvm/relax/vm_build.py
@@ -0,0 +1,317 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, no-member
+"""VM build logics"""
+from typing import List, Optional, Union, Dict, Any
+
+import tvm
+from tvm import relax
+
+from tvm.contrib import utils as _utils
+
+from tvm.ir.module import IRModule
+from tvm.tir.function import PrimFunc
+
+from . import _ffi_api
+
+
+class Executable:
+    """The executable object emitted by the VM compiler or the ExecBuilder."""
+
+    def __init__(self, mod: tvm.runtime.Module):
+        self.mod = mod
+        self._stats = self.mod["stats"]
+        self._as_text = self.mod["as_text"]
+        self._as_python = self.mod["as_python"]
+
+    def stats(self) -> str:
+        """print the detailed statistics of the executable."""
+        return self._stats()
+
+    def as_text(self) -> str:
+        """print the instructions as text format."""
+        return self._as_text()
+
+    def as_python(self) -> str:
+        """print the instructions as python program."""
+        return self._as_python()
+
+    def jit(self, fcompile=None, addons=None, **kwargs) -> tvm.runtime.Module:
+        """Just-in-time compile and link the modules.
+
+        The Executable returned by relax.build may not be directly
+        runnable as they may contain cuda source files and objects that
+        are yet to be compiled and linked.
+        This function helps to create a runtime.Module for these cases.
+
+        Parameters
+        ----------
+        fcompile : function(target, file_list, kwargs), optional
+            The compilation function to use create the final library object 
during
+
+        kwargs : dict, optional
+            Additional arguments passed to fcompile
+
+        Returns
+        -------
+        rt_mod: tvm.runtime.Module
+            A runnable runtime module that can be passed to VirtualMachine.
+
+        Examples
+        --------
+        .. code:: python
+
+            ex = relax.build(mod, target)
+            # build a runnable module using nvcc to link everything
+            rt_mod = ex.jit()
+            vm = tvm.relax.VirtualMachine(rt_mod, tvm.cuda())
+        """
+        # TODO(tvm-team): Update runtime.Module interfac
+        # to query these properties as bitmask.
+        def _not_runnable(x):
+            return x.type_key in ("c", "static_library")
+
+        # pylint:disable = protected-access
+        not_runnable_list = self.mod._collect_from_import_tree(_not_runnable)
+
+        # everything is runnable, directly return mod.
+        if len(not_runnable_list) == 0:
+            return self.mod
+
+        # found source module, or other not runnable modules
+        # need to be export and load
+        # TODO(tvm-team): Support runnable but not exportable module.
+        # by collecting the link and allow export_library skip those modules.
+        workspace_dir = _utils.tempdir()
+        dso_path = workspace_dir.relpath("exported.so")
+        self.mod.export_library(dso_path, fcompile=fcompile, addons=addons, 
**kwargs)
+        return tvm.runtime.load_module(dso_path)
+
+    def export_library(
+        self,
+        file_name: str,
+        fcompile: Optional[Union[str, callable]] = None,
+        workspace_dir: Optional[str] = None,
+        **kwargs,
+    ) -> Any:
+        """Export the executable to a library which can then be loaded back.
+
+        Parameters
+        ----------
+        file_name : str
+            The name of the shared library.
+
+        fcompile : function(target, file_list, kwargs), optional
+            The compilation function to use create the final library object 
during
+
+        workspace_dir : str, optional
+            The path of the directory used to create the intermediate
+            artifacts when exporting the module.
+            If this is not provided a temporary dir will be created.
+
+        kwargs : dict, optional
+            Additional arguments passed to fcompile
+
+        Returns
+        -------
+        result of fcompile()  : unknown, optional
+            If the compilation function returns an artifact it would be 
returned via
+            export_library, if any.
+
+        Examples
+        --------
+        .. code:: python
+
+            ex = relax.build(mod, target)
+            # export the library
+            ex.export_library("exported.so")
+
+            # load it back for future uses.
+            rt_mod = tvm.runtime.load_module("exported.so")
+            vm = tvm.relax.VirtualMachine(rt_mod, tvm.cuda())
+        """
+        return self.mod.export_library(
+            file_name=file_name, fcompile=fcompile, 
workspace_dir=workspace_dir, **kwargs
+        )
+
+
+def _vmcodegen(
+    builder: "relax.ExecBuilder",
+    mod: tvm.IRModule,
+    exec_mode: str = "bytecode",
+) -> tvm.IRModule:
+    """Running VM codegen.
+
+    Parameters
+    ----------
+    builder: relax.ExecBuilder
+        ExecBuilder to collect the vm executable.
+
+    mod: IRModule
+        The input IRModule to be built.
+
+    exec_mode: {"bytecode", "compiled"}
+        The execution mode.
+
+    Return
+    ------
+    leftover: IRModule
+        Left over IRModule that may contain extra functions.
+    """
+
+    if exec_mode == "bytecode":
+        return _ffi_api.VMCodeGen(builder, mod)  # type:ignore
+    if exec_mode == "compiled":
+        return _ffi_api.VMTIRCodeGen(builder, mod)  # type: ignore
+    raise ValueError("Unknown exec_mode %s" % exec_mode)
+
+
+def _vmlink(
+    builder: "relax.ExecBuilder",
+    target: Union[str, tvm.target.Target],
+    tir_mod: Optional[tvm.IRModule] = None,
+    ext_libs: List[tvm.runtime.Module] = None,
+    params: Optional[Dict[str, list]] = None,
+):
+    """
+    Internal codegen function to make executable.
+
+    This function is only used for unit-testing purpoes.
+
+    Use build instead.
+
+    Parameters
+    ----------
+    builder: relax.ExecBuilder
+        Builder used to collect executables.
+
+    target : Union[str, tvm.target.Target]
+        A build target which can have optional host side compilation target.
+
+    tir_mod: IRModule
+        The input TIR IRModule to be linked together.
+
+    ext_libs:  List[tvm.runtime.Module]
+        List of compiled external modules.
+
+    params: Optional[Dict[str, list]]
+        Extra parameter mappings.
+
+    Returns
+    -------
+    ex: tvm.relax.Executable
+        An executable that can be loaded by virtual machine.
+    """
+    if isinstance(target, str):
+        target = tvm.target.Target(target)
+    if params is None:
+        params = {}
+    if ext_libs is None:
+        ext_libs = []
+    lib = None
+    if tir_mod is not None:
+        lib = tvm.build(tir_mod, target=target)
+    return Executable(_ffi_api.VMLink(builder, target, lib, ext_libs, params)) 
 # type: ignore
+
+
+def build(
+    mod: tvm.IRModule,
+    target: Union[str, tvm.target.Target],
+    params: Optional[Dict[str, list]] = None,
+    exec_mode: str = "bytecode",
+) -> Executable:
+    """
+    Build an IRModule to VM executable.
+
+    Parameters
+    ----------
+    mod: IRModule
+        The input IRModule to be built.
+
+    target : Union[str, tvm.target.Target]
+        A build target which can have optional host side compilation target.
+
+        When TVM compiles device specific program such as CUDA,
+        we also need host(CPU) side code to interact with the driver
+        to setup the dimensions and parameters correctly.
+        host is used to specify the host side codegen target.
+        By default, llvm is used if it is enabled,
+        otherwise a stackvm interpreter is used.
+
+    params: Optional[Dict[str, list]]
+        Parameters for the input IRModule that will be bound.
+
+    exec_mode: {"bytecode", "compiled"}
+        The execution mode.
+
+    Returns
+    -------
+    ex: tvm.relax.Executable
+        An executable that can be loaded by virtual machine.
+
+    Example
+    -------
+
+    .. code-block:: python
+        class InputModule:
+            @R.function
+            def foo(x: Tensor((3, 4), "float32"), y: Tensor((3, 4), 
"float32")):
+                z = R.add(x, y)
+                return z
+
+        mod = InputModule
+        target = tvm.target.Target("llvm", host="llvm")
+        ex = relax.build(mod, target)
+    """
+    if isinstance(target, str):
+        target = tvm.target.Target(target)
+
+    passes = []
+    passes.append(relax.transform.RewriteDataflowReshape())
+    passes.append(relax.transform.ToNonDataflow())
+    passes.append(relax.transform.CallTIRRewrite())
+    passes.append(relax.transform.StaticPlanBlockMemory())
+    passes.append(relax.transform.VMBuiltinLower())
+    passes.append(relax.transform.VMShapeLower())
+    passes.append(relax.transform.AttachGlobalSymbol())
+    seq = tvm.transform.Sequential(passes)
+    new_mod = seq(mod)
+
+    # Extract external runtime modules if exist.
+    attrs = dict(mod.attrs) if mod.attrs else {}
+
+    ext_libs = attrs.get("external_mods", [])
+    constants = attrs.get("const_name_to_constant", {})
+
+    if params is not None:
+        params.update(dict(constants))
+    else:
+        params = constants
+
+    # builder collects the executable
+    builder = relax.ExecBuilder()
+    leftover_mod = _vmcodegen(builder, new_mod, exec_mode=exec_mode)
+    tir_mod = _filter_tir(leftover_mod)
+    return _vmlink(builder, target, tir_mod, ext_libs, params)
+
+
+def _filter_tir(mod: tvm.IRModule) -> tvm.IRModule:
+    tir_mod = IRModule({})
+    for gv in mod.get_global_vars():
+        if isinstance(mod[gv], PrimFunc):
+            tir_mod[gv] = mod[gv]
+    return tir_mod
diff --git a/python/tvm/relax/vm.py b/python/tvm/runtime/relax_vm.py
similarity index 73%
rename from python/tvm/relax/vm.py
rename to python/tvm/runtime/relax_vm.py
index a3578c8a40..9defcb7d80 100644
--- a/python/tvm/relax/vm.py
+++ b/python/tvm/runtime/relax_vm.py
@@ -14,43 +14,18 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-# pylint: disable=invalid-name, redefined-builtin, no-else-return
-"""The Relax virtual machine"""
+# pylint: disable=invalid-name, redefined-builtin, no-else-return, 
consider-using-dict-items
+"""The Relax virtual machine."""
 from typing import Callable, List, Optional, Union, Dict, Tuple, Any
 import numpy as np  # type: ignore
 
-from tvm._ffi import base as _base
 import tvm
-from tvm import relax
-from tvm.ir.module import IRModule
-from tvm.runtime import Device, Module, PackedFunc, container
-from tvm.runtime.object import Object
-from tvm.runtime.profiling import Report
-from tvm.tir.function import PrimFunc
-from . import _ffi_api
-from ..rpc.base import RPC_SESS_MASK
-
-
-class Executable(object):
-    """The executable object emitted by the VM compiler or the ExecBuilder."""
-
-    def __init__(self, mod: Module):
-        self.mod = mod
-        self._stats = self.mod["stats"]
-        self._as_text = self.mod["as_text"]
-        self._as_python = self.mod["as_python"]
-
-    def stats(self) -> str:
-        """print the detailed statistics of the executable."""
-        return self._stats()
+from tvm._ffi import base as _base
 
-    def as_text(self) -> str:
-        """print the instructions as text format."""
-        return self._as_text()
+from tvm.runtime import Device, PackedFunc, container, Object
+from tvm.runtime.profiling import Report
 
-    def as_python(self) -> str:
-        """print the instructions as python program."""
-        return self._as_python()
+from ..rpc.base import RPC_SESS_MASK
 
 
 class VirtualMachine(object):
@@ -61,7 +36,7 @@ class VirtualMachine(object):
 
     def __init__(
         self,
-        exec: Union[Executable, Module],
+        rt_mod: Union[tvm.runtime.Module, "tvm.relax.Executable"],
         device: Union[Device, List[Device]],
         memory_cfg: Optional[Union[str, Dict[Device, str]]] = None,
         profile: bool = False,
@@ -71,8 +46,8 @@ class VirtualMachine(object):
 
         Parameters
         ----------
-        exec: Union[Executable, Module]
-            The VM executable or Runtime Module
+        mod: Union[tvm.runtime.Module, tvm.relax.Executable]
+            Runtime module exported by the result of build.
 
         device : Union[Device, List[Device]]
             The device to deploy the module.
@@ -88,8 +63,20 @@ class VirtualMachine(object):
         profile : Optional[bool]
             Whether or not to enable profiling.
         """
+        if not isinstance(rt_mod, tvm.runtime.Module):
+            # important to keep this import local
+            # as the relax_vm needs to be isolated from compiler
+            # if we do not use the jit feature
+            # pylint:disable=import-outside-toplevel
+            from tvm import relax
+
+            if isinstance(rt_mod, relax.Executable):
+                rt_mod = rt_mod.jit()
+            else:
+                raise ValueError("Expect the rt_mod to be an runtime.Module")
+
         load_exec = "vm_profiler_load_executable" if profile else 
"vm_load_executable"
-        self.module = exec.mod[load_exec]() if isinstance(exec, Executable) 
else exec[load_exec]()
+        self.module = rt_mod[load_exec]()
         self._invoke_closure = self.module["invoke_closure"]
         self._save_function = self.module["save_function"]
         self._set_input = self.module["set_input"]
@@ -408,7 +395,7 @@ class VirtualMachine(object):
         .. code-block:: python
 
             target = tvm.target.Target("llvm", host="llvm")
-            ex = relax.vm.build(TestTimeEvaluator, target)
+            ex = relax.build(TestTimeEvaluator, target)
             vm = relax.VirtualMachine(mod, tvm.cpu())
             timing_res = vm.time_evaluator("func_name", tvm.cpu())(arg0, arg1, 
..., argn)
 
@@ -417,7 +404,7 @@ class VirtualMachine(object):
         .. code-block:: python
 
             target = tvm.target.Target("llvm", host="llvm")
-            ex = relax.vm.build(TestTimeEvaluator, target)
+            ex = relax.build(TestTimeEvaluator, target)
             vm = relax.VirtualMachine(mod, tvm.cpu())
             vm.set_input("func_name", arg0, arg1, ..., argn)
             timing_res = vm.time_evaluator("invoke_stateful", 
tvm.cpu())("func_name")
@@ -428,7 +415,7 @@ class VirtualMachine(object):
         .. code-block:: python
 
             target = tvm.target.Target("llvm", host="llvm")
-            ex = relax.vm.build(TestTimeEvaluator, target)
+            ex = relax.build(TestTimeEvaluator, target)
             vm = relax.VirtualMachine(mod, tvm.cpu())
             vm.save_function("func_name", "func_name_saved", arg0, arg1, ..., 
argn)
             timing_res = vm.time_evaluator("func_name_saved", tvm.cpu())()
@@ -471,171 +458,3 @@ class VirtualMachine(object):
 
         report_json = self.module["profile"](func_name, *cargs)
         return Report.from_json(report_json)
-
-
-def _vmcodegen(
-    builder: "relax.ExecBuilder",
-    mod: tvm.IRModule,
-    exec_mode: str = "bytecode",
-) -> tvm.IRModule:
-    """Running VM codegen.
-
-    Parameters
-    ----------
-    builder: relax.ExecBuilder
-        ExecBuilder to collect the vm executable.
-
-    mod: IRModule
-        The input IRModule to be built.
-
-    exec_mode: {"bytecode", "compiled"}
-        The execution mode.
-
-    Return
-    ------
-    leftover: IRModule
-        Left over IRModule that may contain extra functions.
-    """
-
-    if exec_mode == "bytecode":
-        return _ffi_api.VMCodeGen(builder, mod)  # type:ignore
-    if exec_mode == "compiled":
-        return _ffi_api.VMTIRCodeGen(builder, mod)  # type: ignore
-    raise ValueError("Unknown exec_mode %s" % exec_mode)
-
-
-def _vmlink(
-    builder: "relax.ExecBuilder",
-    target: Union[str, tvm.target.Target],
-    tir_mod: Optional[tvm.IRModule] = None,
-    ext_libs: List[tvm.runtime.Module] = None,
-    params: Optional[Dict[str, list]] = None,
-):
-    """
-    Internal codegen function to make executable.
-
-    This function is only used for unit-testing purpoes.
-
-    Use build instead.
-
-    Parameters
-    ----------
-    builder: relax.ExecBuilder
-        Builder used to collect executables.
-
-    target : Union[str, tvm.target.Target]
-        A build target which can have optional host side compilation target.
-
-    tir_mod: IRModule
-        The input TIR IRModule to be linked together.
-
-    ext_libs:  List[tvm.runtime.Module]
-        List of compiled external modules.
-
-    params: Optional[Dict[str, list]]
-        Extra parameter mappings.
-
-    Returns
-    -------
-    ex: tvm.relax.vm.Executable
-        An executable that can be loaded by virtual machine.
-    """
-    if isinstance(target, str):
-        target = tvm.target.Target(target)
-    if params is None:
-        params = {}
-    if ext_libs is None:
-        ext_libs = []
-    lib = None
-    if tir_mod is not None:
-        lib = tvm.build(tir_mod, target=target)
-    return Executable(_ffi_api.VMLink(builder, target, lib, ext_libs, params)) 
 # type: ignore
-
-
-def build(
-    mod: tvm.IRModule,
-    target: Union[str, tvm.target.Target],
-    params: Optional[Dict[str, list]] = None,
-    exec_mode: str = "bytecode",
-) -> Executable:
-    """
-    Build an IRModule to VM executable.
-
-    Parameters
-    ----------
-    mod: IRModule
-        The input IRModule to be built.
-
-    target : Union[str, tvm.target.Target]
-        A build target which can have optional host side compilation target.
-
-        When TVM compiles device specific program such as CUDA,
-        we also need host(CPU) side code to interact with the driver
-        to setup the dimensions and parameters correctly.
-        host is used to specify the host side codegen target.
-        By default, llvm is used if it is enabled,
-        otherwise a stackvm interpreter is used.
-
-    params: Optional[Dict[str, list]]
-        Parameters for the input IRModule that will be bound.
-
-    exec_mode: {"bytecode", "compiled"}
-        The execution mode.
-
-    Returns
-    -------
-    ex: tvm.relax.vm.Executable
-        An executable that can be loaded by virtual machine.
-
-    Example
-    -------
-
-    .. code-block:: python
-        class InputModule:
-            @R.function
-            def foo(x: Tensor((3, 4), "float32"), y: Tensor((3, 4), 
"float32")):
-                z = R.add(x, y)
-                return z
-
-        mod = InputModule
-        target = tvm.target.Target("llvm", host="llvm")
-        ex = relax.vm.build(mod, target)
-    """
-    if isinstance(target, str):
-        target = tvm.target.Target(target)
-
-    passes = []
-    passes.append(relax.transform.RewriteDataflowReshape())
-    passes.append(relax.transform.ToNonDataflow())
-    passes.append(relax.transform.CallTIRRewrite())
-    passes.append(relax.transform.StaticPlanBlockMemory())
-    passes.append(relax.transform.VMBuiltinLower())
-    passes.append(relax.transform.VMShapeLower())
-    passes.append(relax.transform.AttachGlobalSymbol())
-    seq = tvm.transform.Sequential(passes)
-    new_mod = seq(mod)
-
-    # Extract external runtime modules if exist.
-    attrs = dict(mod.attrs) if mod.attrs else {}
-
-    ext_libs = attrs.get("external_mods", [])
-    constants = attrs.get("const_name_to_constant", {})
-
-    if params is not None:
-        params.update(dict(constants))
-    else:
-        params = constants
-
-    # builder collects the executable
-    builder = relax.ExecBuilder()
-    leftover_mod = _vmcodegen(builder, new_mod, exec_mode=exec_mode)
-    tir_mod = _filter_tir(leftover_mod)
-    return _vmlink(builder, target, tir_mod, ext_libs, params)
-
-
-def _filter_tir(mod: tvm.IRModule) -> tvm.IRModule:
-    tir_mod = IRModule({})
-    for gv in mod.get_global_vars():
-        if isinstance(mod[gv], PrimFunc):
-            tir_mod[gv] = mod[gv]
-    return tir_mod
diff --git a/tests/python/relax/test_codegen_cutlass.py 
b/tests/python/relax/test_codegen_cutlass.py
index 1eafb1bc1c..5556d1e5d9 100644
--- a/tests/python/relax/test_codegen_cutlass.py
+++ b/tests/python/relax/test_codegen_cutlass.py
@@ -23,7 +23,6 @@ import pytest
 import tvm
 import tvm.testing
 from tvm import relax, relay
-from tvm.contrib.cutlass.build import finalize_modules_relax
 from tvm.relax.dpl import make_fused_bias_activation_pattern, 
make_matmul_pattern
 from tvm.script import relax as R
 
@@ -214,7 +213,7 @@ has_cutlass = tvm.get_global_func("relax.ext.cutlass", True)
 
 cutlass_enabled = pytest.mark.skipif(
     not has_cutlass,
-    reason="CUTLASS note enabled.",
+    reason="CUTLASS not enabled.",
 )
 
 pytestmark = [cutlass_enabled]
@@ -231,8 +230,7 @@ def get_result_with_relax_cutlass_offload(mod, patterns: 
List[Tuple], *args):
     mod = seq(mod)
 
     target = tvm.target.Target("cuda")
-    ex = relax.vm.build(mod, target)
-    ex = finalize_modules_relax(ex)
+    ex = relax.build(mod, target)
 
     dev = tvm.gpu(0)
     vm = relax.VirtualMachine(ex, dev)
diff --git a/tests/python/relax/test_codegen_dnnl.py 
b/tests/python/relax/test_codegen_dnnl.py
index 69139b28ef..885c88f3b0 100644
--- a/tests/python/relax/test_codegen_dnnl.py
+++ b/tests/python/relax/test_codegen_dnnl.py
@@ -88,7 +88,7 @@ def test_dnnl_offload():
     mod = seq(Conv2dReLUx2)
 
     target = tvm.target.Target("llvm")
-    ex = relax.vm.build(mod, target)
+    ex = relax.build(mod, target)
 
     vm = relax.VirtualMachine(ex, tvm.cpu())
     f = vm["main"]
diff --git a/tests/python/relax/test_codegen_tensorrt.py 
b/tests/python/relax/test_codegen_tensorrt.py
index 164cf3a818..47a4b1eec6 100644
--- a/tests/python/relax/test_codegen_tensorrt.py
+++ b/tests/python/relax/test_codegen_tensorrt.py
@@ -101,7 +101,7 @@ def test_tensorrt_offload():
 
     target = "cuda"
     dev = tvm.device(target, 0)
-    ex = relax.vm.build(mod, target)
+    ex = relax.build(mod, target)
 
     vm = relax.VirtualMachine(ex, dev)
     f = vm["main"]
diff --git a/tests/python/relax/test_pipeline.py 
b/tests/python/relax/test_pipeline.py
index 6d6704ae97..c66066f8f8 100644
--- a/tests/python/relax/test_pipeline.py
+++ b/tests/python/relax/test_pipeline.py
@@ -34,7 +34,7 @@ def test_pipeline_compile():
     mod = pipeline(mod)
     target = tvm.target.Target("llvm", host="llvm")
 
-    ex = relax.vm.build(mod, target)
+    ex = relax.build(mod, target)
     x_np = np.random.rand(3, 4).astype(np.float32)
     y_np = np.random.rand(3, 4).astype(np.float32)
     x = tvm.nd.array(x_np)
diff --git a/tests/python/relax/test_relay_translator.py 
b/tests/python/relax/test_relay_translator.py
index 5f7e05b02d..b4f84027eb 100644
--- a/tests/python/relax/test_relay_translator.py
+++ b/tests/python/relax/test_relay_translator.py
@@ -184,7 +184,7 @@ def translate_and_build_vms(relay_mod, target_str="llvm", 
translate_op_with_tir=
     relax_mod = relay_translator.from_relay(
         relay_mod["main"], target, translate_op_with_tir=translate_op_with_tir
     )
-    relax_ex = relax.vm.build(relax_mod, target)
+    relax_ex = relax.build(relax_mod, target)
     relax_vm = relax.VirtualMachine(relax_ex, tvm.cpu())
 
     return relay_vm, relax_vm, relax_mod
diff --git a/tests/python/relax/test_transform_bind_params.py 
b/tests/python/relax/test_transform_bind_params.py
index b96fb89e6c..ceaf8fb165 100644
--- a/tests/python/relax/test_transform_bind_params.py
+++ b/tests/python/relax/test_transform_bind_params.py
@@ -60,11 +60,11 @@ def test_bind_params(use_np_array):
     assert len(mod["main"].params) == 1
 
     target = tvm.target.Target("llvm")
-    ex_after = relax.vm.build(mod, target)
+    ex_after = relax.build(mod, target)
     vm_after = relax.VirtualMachine(ex_after, tvm.cpu())
     res_after = vm_after["main"](x_tvm)
 
-    ex_before = relax.vm.build(InputModule, target)
+    ex_before = relax.build(InputModule, target)
     vm_before = relax.VirtualMachine(ex_before, tvm.cpu())
     res_before = vm_before["main"](x_tvm, w_tvm)
 
diff --git a/tests/python/relax/test_transform_codegen_pass.py 
b/tests/python/relax/test_transform_codegen_pass.py
index e50ad8f5f4..3e9501147a 100644
--- a/tests/python/relax/test_transform_codegen_pass.py
+++ b/tests/python/relax/test_transform_codegen_pass.py
@@ -56,10 +56,10 @@ def check_executable(exec, dev, inputs, expected):
 
 def check_roundtrip(exec0, dev, inputs, expected):
     exec0.mod.export_library("exec.so")
-    exec1 = relax.vm.Executable(tvm.runtime.load_module("exec.so"))
+    exec1 = tvm.runtime.load_module("exec.so")
     os.remove("exec.so")
-    assert exec0.stats() == exec1.stats()
-    assert exec0.as_text() == exec1.as_text()
+    assert exec0.stats() == exec1["stats"]
+    assert exec0.as_text() == exec1["as_text"]()
 
     check_executable(exec0, dev, inputs, expected)
     check_executable(exec1, dev, inputs, expected)
@@ -81,7 +81,7 @@ def gen_ground_truth(mod, target, dev, inputs):
             )
             new_mod = seq(mod)
     assert relax.analysis.well_formed(new_mod)
-    exec = relax.vm.build(new_mod, target, params={})
+    exec = relax.build(new_mod, target, params={})
     vm = relax.VirtualMachine(exec, dev)
     return vm["main"](*inputs)
 
@@ -140,7 +140,7 @@ def test_tensorrt_only():
         ]
     )(mod)
 
-    ex0 = relax.vm.build(new_mod, target, params={})
+    ex0 = relax.build(new_mod, target, params={})
     # Sanity check for the correctness and rountrip
     check_roundtrip(ex0, dev, inputs, expected)
 
@@ -173,7 +173,7 @@ def test_mix_use_tensorrt_and_tvm():
             )(mod)
     assert relax.analysis.well_formed(new_mod)
     with transform.PassContext(opt_level=0):
-        ex0 = relax.vm.build(new_mod, target, params={})
+        ex0 = relax.build(new_mod, target, params={})
 
     # Sanity check for the correctness and rountrip
     check_roundtrip(ex0, dev, inputs, expected)
diff --git a/tests/python/relax/test_vm_build.py 
b/tests/python/relax/test_vm_build.py
index e78e926dcb..e51e22e323 100644
--- a/tests/python/relax/test_vm_build.py
+++ b/tests/python/relax/test_vm_build.py
@@ -46,7 +46,7 @@ def test_vm_compile_simple(exec_mode):
 
     mod = TestVMCompileStage0
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
     inp1 = tvm.nd.array(np.random.rand(3, 4).astype(np.float32))
     inp2 = tvm.nd.array(np.random.rand(3, 4).astype(np.float32))
     vm = relax.VirtualMachine(ex, tvm.cpu())
@@ -64,7 +64,7 @@ def test_match_check(exec_mode):
 
     mod = TestMatchCheck
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
     vm = relax.VirtualMachine(ex, tvm.cpu())
     x0 = tvm.nd.array(np.zeros((1, 2)).astype("int32"))
     y0 = tvm.nd.array(np.zeros((2, 1)).astype("float32"))
@@ -92,7 +92,7 @@ def test_vm_compile_stage2(exec_mode):
 
     mod = TestVMCompileStage2
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
     vm = relax.VirtualMachine(ex, tvm.cpu())
 
     shape = (32, 16)
@@ -127,7 +127,7 @@ def test_vm_compile_stage3(exec_mode):
 
     mod = TestVMCompileStage3
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
     vm = relax.VirtualMachine(ex, tvm.cpu())
 
     shape = (32, 16)
@@ -152,7 +152,7 @@ def test_vm_compile_e2e(exec_mode):
     mod = TestVMCompileE2E
 
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
     vm = relax.VirtualMachine(ex, tvm.cpu())
 
     shape = (32, 16)
@@ -193,7 +193,7 @@ def test_vm_compile_e2e_func_param_with_shape(exec_mode):
     mod = TestVMCompileE2E2
 
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
     vm = relax.VirtualMachine(ex, tvm.cpu())
 
     data = tvm.nd.array(np.random.rand(32, 16).astype(np.float32))
@@ -220,7 +220,7 @@ def test_vm_emit_te_extern(exec_mode):
     mod = bb.get()
 
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
     vm = relax.VirtualMachine(ex, tvm.cpu())
 
     data = tvm.nd.array(np.random.rand(16, 32).astype(np.float32))
@@ -249,7 +249,7 @@ def test_vm_emit_te_concat(exec_mode):
     mod = bb.get()
 
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
 
     vm = relax.VirtualMachine(ex, tvm.cpu())
     inp = tvm.nd.array(
@@ -288,7 +288,7 @@ def test_vm_emit_te_dtype_change(exec_mode):
     new_mod = relax.transform.CallTIRRewrite()(mod)
 
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
 
     vm = relax.VirtualMachine(ex, tvm.cpu())
     inp = tvm.nd.array(
@@ -317,7 +317,7 @@ def test_vm_emit_te_floor_symbolic_shape(exec_mode):
     mod = bb.get()
 
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
 
     vm = relax.VirtualMachine(ex, tvm.cpu())
     shape = (9,)
@@ -346,7 +346,7 @@ def test_vm_emit_te_constant_param_cpu(exec_mode):
         bb.emit_func_output(gv)
 
     mod = bb.get()
-    exec = relax.vm.build(mod, "llvm", exec_mode=exec_mode)
+    exec = relax.build(mod, "llvm", exec_mode=exec_mode)
     dev = tvm.cpu()
     vm = relax.VirtualMachine(exec, dev)
 
@@ -374,7 +374,7 @@ def test_vm_emit_te_constant_param_gpu(exec_mode):
     loops = sch.get_loops(sch.get_block(name="T_add", func_name="add"))
     sch.bind(loops[0], "threadIdx.x")
 
-    exec = relax.vm.build(sch.mod, "cuda", exec_mode=exec_mode)
+    exec = relax.build(sch.mod, "cuda", exec_mode=exec_mode)
     dev = tvm.cuda()
     vm = relax.VirtualMachine(exec, dev)
 
@@ -400,7 +400,7 @@ def test_vm_relax_symbolic_shape(exec_mode):
     mod = bb.get()
 
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
 
     vm = relax.VirtualMachine(ex, tvm.cpu())
     shape1 = (5,)
@@ -435,14 +435,10 @@ def test_vm_relax_dyn_tir_shape(exec_mode):
     mod = bb.get()
 
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
 
-    ex.mod.export_library("exec.so")
-    exec1 = relax.vm.Executable(tvm.runtime.load_module("exec.so"))
-    os.remove("exec.so")
-    assert ex.as_text() == exec1.as_text()
-
-    vm = relax.VirtualMachine(ex, tvm.cpu())
+    ex.export_library("exec.so")
+    vm = relax.VirtualMachine(tvm.runtime.load_module("exec.so"), tvm.cpu())
     inp = tvm.nd.array(np.random.rand(2).astype(np.float32))
     inp2 = tvm.nd.array(np.random.rand(3).astype(np.float32))
 
@@ -466,7 +462,7 @@ def test_vm_tuple(exec_mode):
     mod = bb.get()
 
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
 
     vm = relax.VirtualMachine(ex, tvm.cpu())
     shape = (5,)
@@ -496,7 +492,7 @@ def test_vm_tuplegetitem(exec_mode):
 
     mod = TestVMTupleGetItem
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
     vm = relax.VirtualMachine(ex, tvm.cpu())
     x_inp = tvm.nd.array(np.random.rand(2, 3).astype("float32"))
     y_inp = tvm.nd.array(np.random.rand(2, 3).astype("float32"))
@@ -526,7 +522,7 @@ def test_lower_memory_alloc_storage_tensor(exec_mode):
 
     mod = TestMemoryAllocStorageTensor
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
     vm = relax.VirtualMachine(ex, tvm.cpu())
     x = tvm.nd.array(np.random.rand(2, 3).astype("float32"))
     y = vm["main"](x)
@@ -577,7 +573,7 @@ def test_sub_func_call(exec_mode):
             return gv1
 
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(TestVMSubFunction, target, exec_mode=exec_mode)
+    ex = relax.build(TestVMSubFunction, target, exec_mode=exec_mode)
     vm = relax.VirtualMachine(ex, tvm.cpu())
     x_inp = tvm.nd.array(np.random.rand(32, 32).astype(np.float32))
     y_inp = tvm.nd.array(np.random.rand(32, 32).astype(np.float32))
@@ -609,7 +605,7 @@ def test_recursion(exec_mode):
             return res
 
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(TestVMRecursion, target, exec_mode=exec_mode)
+    ex = relax.build(TestVMRecursion, target, exec_mode=exec_mode)
     vm = relax.VirtualMachine(ex, tvm.cpu())
 
     inp = np.empty(1).astype("float32")
@@ -639,7 +635,7 @@ def test_vm_closure(exec_mode):
 
     mod = TestClosure
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(mod, target, exec_mode=exec_mode)
+    ex = relax.build(mod, target, exec_mode=exec_mode)
     vm = relax.VirtualMachine(ex, tvm.cpu())
     x_inp = tvm.nd.array(np.random.rand(2, 3).astype("float32"))
     y_inp = tvm.nd.array(np.array([[3.1, 4.0, 5.0], [6.0, 7.1, 9.0]], 
dtype="float32"))
@@ -658,7 +654,7 @@ def test_time_evaluator(exec_mode):
             )
 
     target = tvm.target.Target("llvm", host="llvm")
-    ex = relax.vm.build(TestTimeEvaluator, target, exec_mode=exec_mode)
+    ex = relax.build(TestTimeEvaluator, target, exec_mode=exec_mode)
     vm = relax.VirtualMachine(ex, tvm.cpu())
     x = tvm.nd.array(np.random.rand(1).astype("float32"))
     y = tvm.nd.array(np.random.rand(1).astype("float32"))
@@ -780,9 +776,9 @@ def set_input_attempt_get(vm: relax.VirtualMachine, device: 
tvm.runtime.Device)
 def make_vm(mod, exec_mode) -> Tuple[relax.VirtualMachine, tvm.runtime.Device]:
     """Returns a local VM for the given mod and the device"""
     target = tvm.target.Target("llvm", host="llvm")
-    exec = relax.vm.build(TestVMSetInput, target, exec_mode=exec_mode)
-    exec.mod.export_library("exec.so")
-    exec_loaded = relax.vm.Executable(tvm.runtime.load_module("exec.so"))
+    exec = relax.build(TestVMSetInput, target, exec_mode=exec_mode)
+    exec.export_library("exec.so")
+    exec_loaded = tvm.runtime.load_module("exec.so")
     os.remove("exec.so")
     device = tvm.cpu()
     return relax.VirtualMachine(exec_loaded, device), device
@@ -798,10 +794,10 @@ def run_on_rpc(
     The trial function should take a VM and a device
     """
     target = tvm.target.Target("llvm", host="llvm")
-    exec = relax.vm.build(mod, target, exec_mode=exec_mode)
+    exec = relax.build(mod, target, exec_mode=exec_mode)
     temp = utils.tempdir()
     path = temp.relpath("vm_library.so")
-    exec.mod.export_library(path)
+    exec.export_library(path)
 
     # Use local rpc server for testing.
     # Server must use popen so it doesn't inherit the current process state. It
@@ -817,7 +813,7 @@ def run_on_rpc(
 
         device = remote.cpu()
         # Build a VM out of the executable and context.
-        vm = relax.vm.VirtualMachine(exec=rexec, device=device)
+        vm = relax.VirtualMachine(rexec, device=device)
         trial_func(vm, device)
 
     check_remote(rpc.Server("127.0.0.1"))
diff --git a/tests/python/relax/test_vm_codegen_only.py 
b/tests/python/relax/test_vm_codegen_only.py
index 600d245617..679641de13 100644
--- a/tests/python/relax/test_vm_codegen_only.py
+++ b/tests/python/relax/test_vm_codegen_only.py
@@ -33,8 +33,8 @@ EXEC_MODE = ["bytecode", "compiled"]
 
 def codegen(mod, target, exec_mode="bytecode"):
     builder = relax.ExecBuilder()
-    tir_mod = relax.vm._vmcodegen(builder, mod, exec_mode=exec_mode)
-    return relax.vm._vmlink(builder, target, tir_mod)
+    tir_mod = relax.vm_build._vmcodegen(builder, mod, exec_mode=exec_mode)
+    return relax.vm_build._vmlink(builder, target, tir_mod)
 
 
 @pytest.mark.parametrize("exec_mode", EXEC_MODE)
@@ -95,10 +95,10 @@ def test_vm_exec_serialize_export_library(exec_mode):
 
     temp_dir = utils.tempdir()
     path_exec = temp_dir.relpath("exec.so")
-    ex.mod.export_library(path_exec)
+    ex.export_library(path_exec)
 
-    loaded_exec = relax.vm.Executable(tvm.runtime.load_module(path_exec))
-    assert ex.as_text() == loaded_exec.as_text()
+    loaded_exec = tvm.runtime.load_module(path_exec)
+    assert ex.as_text() == loaded_exec["as_text"]()
 
 
 @pytest.mark.parametrize("exec_mode", EXEC_MODE)
diff --git a/tests/python/relax/test_vm_codegen_tir.py 
b/tests/python/relax/test_vm_codegen_tir.py
index 6f3bced385..d6bac6ae15 100644
--- a/tests/python/relax/test_vm_codegen_tir.py
+++ b/tests/python/relax/test_vm_codegen_tir.py
@@ -28,7 +28,7 @@ from tvm.script import tir as T
 
 def get_tir_mod(mod):
     builder = relax.ExecBuilder()
-    return relax.vm._vmcodegen(builder, mod, exec_mode="compiled")
+    return relax.vm_build._vmcodegen(builder, mod, exec_mode="compiled")
 
 
 def test_add():
diff --git a/tests/python/relax/test_vm_profiler.py 
b/tests/python/relax/test_vm_profiler.py
index 90737cc9c9..1145967411 100644
--- a/tests/python/relax/test_vm_profiler.py
+++ b/tests/python/relax/test_vm_profiler.py
@@ -47,7 +47,7 @@ def get_exec(data_shape):
     mod = relax.transform.BindParams("main", params)(mod)
 
     target = "llvm"
-    return relax.vm.build(mod, target)
+    return relax.build(mod, target)
 
 
 def test_conv2d_cpu():
@@ -65,7 +65,7 @@ def test_conv2d_cpu():
 def with_rpc(ex, f, data_np):
     temp = utils.tempdir()
     path = temp.relpath("vm_library.so")
-    ex.mod.export_library(path)
+    ex.export_library(path)
 
     server = rpc.Server("127.0.0.1")
     remote = rpc.connect(server.host, server.port, session_timeout=10)
@@ -75,7 +75,7 @@ def with_rpc(ex, f, data_np):
 
     device = remote.cpu()
 
-    vm = relax.vm.VirtualMachine(exec=rexec, device=device, profile=True)
+    vm = relax.VirtualMachine(rexec, device=device, profile=True)
     data = tvm.nd.array(data_np, device)
 
     f(vm, data)
@@ -115,7 +115,7 @@ def test_tuple():
             return ((x, (x,)), x)
 
     target = "llvm"
-    ex = relax.vm.build(NestedTuple, target)
+    ex = relax.build(NestedTuple, target)
 
     data_np = np.random.randn(16).astype("float32")
 
diff --git a/tests/python/relay/test_vm.py b/tests/python/relay/test_vm.py
index 6443d50f9e..63ff66eaa2 100644
--- a/tests/python/relay/test_vm.py
+++ b/tests/python/relay/test_vm.py
@@ -862,7 +862,7 @@ def prepare_vm_model(path, tensor_shape):
     vm_exec = vm.compile(mod, target=target)
 
     # Export to Disk
-    vm_exec.mod.export_library(path)
+    vm_exec.export_library(path)
 
 
 def test_vm_rpc():
@@ -1393,7 +1393,7 @@ def test_large_constants():
     path_consts = temp.relpath("consts")
     vm_exec.move_late_bound_consts(path_consts, byte_limit=256)
     path_dso = temp.relpath("lib.so")
-    vm_exec.mod.export_library(path_dso)
+    vm_exec.export_library(path_dso)
 
     # Load library files and constants
     mod = runtime.load_module(path_dso)
@@ -1442,7 +1442,7 @@ def 
test_load_late_bound_consts_with_no_late_bound_consts():
     # Ensure const_data is below the byte threshold for a late-bound const.
     byte_limit = len(const_data.tobytes()) + 1
     vm_exec.move_late_bound_consts(path_consts, byte_limit=byte_limit)
-    vm_exec.mod.export_library(path_dso)
+    vm_exec.export_library(path_dso)
 
     mod = runtime.load_module(path_dso)
     mod["load_late_bound_consts"](path_consts)
@@ -1503,7 +1503,7 @@ def test_load_and_save_constants_via_map():
     # Save to constants and library files
     temp = utils.tempdir()
     path_dso = temp.relpath("lib.so")
-    vm_exec.mod.export_library(path_dso)
+    vm_exec.export_library(path_dso)
 
     # Load library files and constants
     mod = runtime.load_module(path_dso)
@@ -1551,7 +1551,7 @@ def 
test_load_late_bound_consts_via_map_with_no_late_bound_consts():
     # Ensure const_data is below the byte threshold for a late-bound const.
     byte_limit = len(const_data.tobytes()) + 1
     consts_map = vm_exec.get_late_bound_consts(byte_limit=byte_limit)
-    vm_exec.mod.export_library(path_dso)
+    vm_exec.export_library(path_dso)
 
     mod = runtime.load_module(path_dso)
     mod["load_late_bound_consts_from_map"](consts_map)

[tvm] branch unity updated: [Unity] Refactor Relax Build JIT UX (#14088)

Reply via email to