tlopex commented on code in PR #18413:
URL: https://github.com/apache/tvm/pull/18413#discussion_r2483901170
##########
docs/how_to/tutorials/cross_compilation_and_rpc.py:
##########
@@ -256,13 +256,328 @@ def run_opencl():
print("OpenCL test passed!")
+#########################################################################
+# Deploy PyTorch Models to Remote Devices with RPC
+# ------------------------------------------------
+# The above examples demonstrate cross compilation and RPC using low-level
+# TensorIR (via TE). For deploying complete neural network models from
frameworks
+# like PyTorch or ONNX, TVM's Relax provides a higher-level abstraction that is
+# better suited for end-to-end model compilation.
+#
+# This section shows a modern workflow for deploying models to **any remote
device**:
+#
+# 1. Import a PyTorch model and convert it to Relax
+# 2. Cross-compile for the target architecture (ARM, x86, RISC-V, etc.)
+# 3. Deploy via RPC to a remote device
+# 4. Run inference remotely
+#
+# This workflow is applicable to various deployment scenarios:
+#
+# - **ARM devices**: Raspberry Pi, NVIDIA Jetson, mobile phones
+# - **x86 servers**: Remote Linux servers, cloud instances
+# - **Embedded systems**: RISC-V boards, custom hardware
+# - **Accelerators**: Remote machines with GPUs, TPUs, or other accelerators
+#
+# .. note::
+# This example uses PyTorch for demonstration, but the workflow is identical
+# for ONNX models. Simply replace ``from_exported_program()`` with
+# ``from_onnx(model, keep_params_in_input=True)`` and follow the same steps.
+
+# First, let's check if PyTorch is available
+try:
+ import torch
+ from torch.export import export
+
+ HAS_TORCH = True
+except ImportError:
+ HAS_TORCH = False
+ print("PyTorch not available, skipping Relax example")
+
+
+def run_pytorch_model_via_rpc():
+ """
+ Demonstrates the complete workflow of deploying a PyTorch model to an ARM
device via RPC.
+ """
+ if not HAS_TORCH:
+ print("Skipping PyTorch example (PyTorch not installed)")
+ return
+
+ from tvm import relax
+ from tvm.relax.frontend.torch import from_exported_program
+
+ ######################################################################
+ # Step 1: Define and Export PyTorch Model
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # We use a simple MLP model for demonstration. In practice, this could be
+ # any PyTorch model (ResNet, BERT, etc.).
+
+ class TorchMLP(torch.nn.Module):
+ def __init__(self) -> None:
+ super().__init__()
+ self.net = torch.nn.Sequential(
+ torch.nn.Flatten(),
+ torch.nn.Linear(28 * 28, 128),
+ torch.nn.ReLU(),
+ torch.nn.Linear(128, 10),
+ )
+
+ def forward(self, data: torch.Tensor) -> torch.Tensor:
+ return self.net(data)
+
+ # Export the model using PyTorch 2.x export API
+ torch_model = TorchMLP().eval()
+ example_args = (torch.randn(1, 1, 28, 28, dtype=torch.float32),)
+
+ with torch.no_grad():
+ exported_program = export(torch_model, example_args)
+
+ ######################################################################
+ # Step 2: Convert to Relax and Prepare for Compilation
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # Convert the exported PyTorch program to TVM's Relax representation
+
+ mod = from_exported_program(exported_program, keep_params_as_input=True)
+ # Separate parameters from the model for flexible deployment
+ mod, params = relax.frontend.detach_params(mod)
+
+ print("Converted PyTorch model to Relax:")
+ print(f" - Number of parameters: {len(params['main'])}")
+
+ ######################################################################
+ # Step 3: Cross-Compile for Target Device
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # Compile the model for the target device architecture. The target
+ # configuration depends on your deployment scenario.
+
+ if local_demo:
+ # For demonstration on local machine, use local target
+ target = tvm.target.Target("llvm")
+ print("Using local target for demonstration")
+ else:
+ # Choose the appropriate target for your device:
+ #
+ # ARM devices:
+ # - Raspberry Pi 3/4 (32-bit): "llvm -mtriple=armv7l-linux-gnueabihf"
+ # - Raspberry Pi 4 (64-bit) / Jetson: "llvm
-mtriple=aarch64-linux-gnu"
+ # - Android: "llvm -mtriple=aarch64-linux-android"
+ #
+ # x86 servers:
+ # - Linux x86_64: "llvm -mtriple=x86_64-linux-gnu"
+ # - With AVX-512: "llvm -mtriple=x86_64-linux-gnu
-mcpu=skylake-avx512"
+ #
+ # RISC-V:
+ # - RV64: "llvm -mtriple=riscv64-unknown-linux-gnu"
+ #
+ # GPU targets:
+ # - CUDA: tvm.target.Target("cuda", host="llvm
-mtriple=x86_64-linux-gnu")
+ # - OpenCL: tvm.target.Target("opencl", host="llvm
-mtriple=aarch64-linux-gnu")
+ #
+ # For this example, we use ARM 64-bit
+ target = tvm.target.Target("llvm -mtriple=aarch64-linux-gnu")
+ print(f"Cross-compiling for target: {target}")
+
+ # Apply optimization pipeline
+ pipeline = relax.get_pipeline()
+ with target:
+ built_mod = pipeline(mod)
+
+ # Compile to executable
+ executable = tvm.compile(built_mod, target=target)
+
+ # Export to shared library
+ lib_path = temp.relpath("model_deployed.so")
+ executable.export_library(lib_path)
+ print(f"Exported library to: {lib_path}")
+
+ # Save parameters separately
+ import numpy as np
+
+ params_path = temp.relpath("model_params.npz")
+ param_arrays = {f"p_{i}": p.numpy() for i, p in enumerate(params["main"])}
+ np.savez(params_path, **param_arrays)
+ print(f"Saved parameters to: {params_path}")
+
+ ######################################################################
+ # Step 4: Deploy to Remote Device via RPC
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # Connect to the remote device, upload the compiled library and parameters,
+ # then run inference remotely. This works for any device with TVM RPC
server.
+ #
+ # Note: The following code demonstrates the RPC workflow. In local_demo
mode,
+ # we skip actual execution to avoid LocalSession compatibility issues.
+
+ if local_demo:
+ # For demonstration, show the code structure without execution
+ print("\nRPC workflow (works for any remote device):")
+ print("=" * 50)
+ print("1. Start RPC server on target device:")
+ print(" python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090")
+ print("\n2. Connect from local machine:")
+ print(" remote = rpc.connect('DEVICE_IP', 9090)")
+ print("\n3. Upload compiled library:")
+ print(" remote.upload('model_deployed.so')")
+ print(" remote.upload('model_params.npz')")
+ print("\n4. Load and run remotely:")
+ print(" lib = remote.load_module('model_deployed.so')")
+ print(" vm = relax.VirtualMachine(lib, remote.cpu())")
+ print(" result = vm['main'](input, *params)")
+ print("\nDevice examples:")
+ print(" - Raspberry Pi: 192.168.1.100")
+ print(" - Remote server: ssh tunnel or direct IP")
+ print(" - NVIDIA Jetson: 10.0.0.50")
+ print(" - Cloud instance: public IP")
+ print("\nTo run actual RPC, set local_demo=False")
+ return # Skip actual RPC execution in demo mode
+
+ # Actual RPC workflow for real deployment
+ # Connect to remote device (works for ARM, x86, RISC-V, etc.)
+ # Make sure the RPC server is running on the device:
+ # python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090
+ device_host = "192.168.1.100" # Replace with your device IP
+ device_port = 9090
+ remote = rpc.connect(device_host, device_port)
+ print(f"Connected to remote device at {device_host}:{device_port}")
+
+ # Upload library and parameters to remote device
+ remote.upload(lib_path)
+ remote.upload(params_path)
+ print("Uploaded files to remote device")
+
+ # Load the library on the remote device
+ lib = remote.load_module("model_deployed.so")
+
+ # Choose device on remote machine
+ # For CPU: dev = remote.cpu()
+ # For CUDA GPU: dev = remote.cuda(0)
+ # For OpenCL: dev = remote.cl(0)
+ dev = remote.cpu()
+
+ # Create VM and load parameters
+ vm = relax.VirtualMachine(lib, dev)
+
+ # Load parameters from the uploaded file
+ # Note: In practice, you might load this from the remote filesystem
+ params_npz = np.load(params_path)
+ remote_params = [tvm.runtime.tensor(params_npz[f"p_{i}"], dev) for i in
range(len(params_npz))]
+
+ ######################################################################
+ # Step 5: Run Inference on Remote Device
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ # Execute the model on the remote ARM device and retrieve results
+
+ # Prepare input data
+ input_data = np.random.randn(1, 1, 28, 28).astype("float32")
+ remote_input = tvm.runtime.tensor(input_data, dev)
+
+ # Run inference on remote device
+ output = vm["main"](remote_input, *remote_params)
+
+ # Extract result (handle both tuple and single tensor outputs)
+ if hasattr(output, "__len__") and len(output) > 0:
+ result = output[0]
+ else:
+ result = output
Review Comment:
Okay
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]