zxybazh commented on code in PR #13514:
URL: https://github.com/apache/tvm/pull/13514#discussion_r1042803967
##########
python/tvm/meta_schedule/relay_integration.py:
##########
@@ -97,13 +99,16 @@ def _normalize_params(
if executor is None:
executor = relay.backend.Executor("graph")
+ if runtime is None:
+ runtime = relay.backend.Runtime("cpp")
Review Comment:
May I ask if this change is necessary here and does it impact other target
usage? If so can we add some comments to explain this runtime is for cpp
runtime only?
##########
python/tvm/meta_schedule/relay_integration.py:
##########
@@ -385,7 +401,9 @@ def compile_relay(
config=pass_config,
):
if backend == "graph":
- return relay.build(mod, target=target, params=params,
executor=executor)
+ return relay.build(
+ mod, target=target, params=params, executor=executor,
runtime=runtime
Review Comment:
Would this have any impact on non-cpp build? Asking because it seems after
parameter normalizaion the runtime would be set to
`relay.backend.Runtime("cpp")`.
##########
python/tvm/contrib/micro/meta_schedule/test_autotune_ms.py:
##########
@@ -0,0 +1,181 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import numpy as np
+import pytest
+from types import MappingProxyType
+import pathlib
+import json
+
+import tvm
+from tvm import relay
+from tvm.relay.backend import Executor
+from tvm.contrib import graph_executor, utils
+from tvm import meta_schedule as ms
+from tvm.contrib.micro.meta_schedule.local_builder_micro import
get_micro_local_builder
+from tvm.contrib.micro.meta_schedule.rpc_runner_micro import
get_rpc_runner_micro
+
+
+def get_module():
+ data_shape = (1, 3, 16, 16)
+ weight_shape = (8, 3, 5, 5)
+ data = relay.var("data", relay.TensorType(data_shape, "float32"))
+ weight = relay.var("weight", relay.TensorType(weight_shape, "float32"))
+ y = relay.nn.conv2d(
+ data,
+ weight,
+ padding=(2, 2),
+ kernel_size=(5, 5),
+ kernel_layout="OIHW",
+ out_dtype="float32",
+ )
+ f = relay.Function([data, weight], y)
+ mod = tvm.IRModule.from_expr(f)
+ mod = relay.transform.InferType()(mod)
+
+ weight_sample = np.random.rand(
+ weight_shape[0], weight_shape[1], weight_shape[2], weight_shape[3]
+ ).astype("float32")
+ params = {mod["main"].params[1].name_hint: weight_sample}
+
+ model_info = {
+ "in_tensor": "data",
+ "in_shape": data_shape,
+ "in_dtype": "float32",
+ }
+
+ return mod, params, model_info
+
+
[email protected]_micro
[email protected](
+ "platform, options",
+ [
+ pytest.param("crt", None),
+ pytest.param(
+ "zephyr",
+ {
+ "board": "qemu_x86",
+ "project_type": "host_driven",
+ },
+ ),
+ ],
+)
+def test_micro_tuning_with_meta_schedule(platform, options):
+ if platform == "crt":
+ target = tvm.target.target.micro(model="host", options="-num-cores=1")
+ else:
+ boards_file = (
+ pathlib.Path(tvm.micro.get_microtvm_template_projects("zephyr")) /
"boards.json"
+ )
+ with open(boards_file) as f:
+ boards = json.load(f)
+ target = tvm.target.target.micro(
+ model=boards[options["board"]]["model"], options="-mcpu=cortex-m4
-num-cores=1"
+ )
+
+ work_dir = utils.tempdir()
+ mod, params, model_info = get_module()
+ input_name = model_info["in_tensor"]
+ input_shape = model_info["in_shape"]
+ input_dtype = model_info["in_dtype"]
+ data_sample = np.random.rand(*input_shape).astype(input_dtype)
+
+ runtime = relay.backend.Runtime("crt", {"system-lib": True})
+ executor = Executor("aot", {"link-params": True})
+ # This line is necessary for link-params to take effect during
+ # task extraction and relay.build(...).
+ mod = mod.with_attr("executor", executor)
+
+ builder = get_micro_local_builder()
+ with get_rpc_runner_micro(
+ platform=platform, options=options, session_timeout_sec=120
+ ) as runner:
+ with ms.Profiler() as profiler:
+ db: tvm.runtime.Module = ms.relay_integration.tune_relay(
Review Comment:
db should be MetaSchedule Database right?
##########
python/tvm/contrib/micro/meta_schedule/rpc_runner_micro.py:
##########
@@ -0,0 +1,243 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""RPC Runner Micro"""
+
+from contextlib import contextmanager
+from typing import Callable, List, Optional
+from collections import namedtuple
+import signal
+
+from tvm import micro
+from tvm import nd
+from tvm.contrib.popen_pool import PopenPoolExecutor
+from tvm.rpc.server import Server
+from tvm.rpc.tracker import Tracker
+from tvm.meta_schedule.logging import get_logger
+from tvm.meta_schedule.utils import (
+ cpu_count,
+ derived_object,
+)
+from tvm.meta_schedule.runner.config import EvaluatorConfig, RPCConfig
+from tvm.meta_schedule.runner import PyRunner, RunnerFuture, RunnerInput
+from tvm.meta_schedule.runner.rpc_runner import RPCRunnerFuture
+from tvm.meta_schedule.runner.utils import T_ARG_INFO_JSON_OBJ_LIST
+
+logger = get_logger(__name__) # pylint: disable=invalid-name
+
+
+@derived_object
+class RPCRunnerMicro(PyRunner):
+ """RPC based runner for tuning micro models."""
+
+ def __init__(
+ self,
+ platform: str = "crt",
+ project_options: Optional[dict] = None,
+ rpc_config: Optional[RPCConfig] = None,
+ evaluator_config: Optional[EvaluatorConfig] = None,
+ max_workers: Optional[int] = None,
+ initializer: Optional[Callable[[], None]] = None,
+ ) -> None:
+ """Constructor
+
+ Parameters
+ ----------
+ platform: str
+ The platform used for project generation.
+ project_options: dict
+ The options for the generated micro project.
+ rpc_config: RPCConfig
+ The rpc configuration.
+ evaluator_config: EvaluatorConfig
+ The evaluator configuration.
+ max_workers: Optional[int] = None
+ The maximum number of connections. Defaults to number of logical
CPU cores.
+ initializer: Optional[Callable[[], None]]
+ The initializer function.
+ """
+ super().__init__()
+ self.platform = platform
+ if project_options is None:
+ project_options = {}
+ self.project_options = project_options
+ self.rpc_config = RPCConfig._normalized(rpc_config)
+ self.evaluator_config = EvaluatorConfig._normalized(evaluator_config)
+
+ if max_workers is None:
+ max_workers = cpu_count(logical=True)
+ logger.info("RPCRunner: max_workers = %d", max_workers)
+ self.pool = PopenPoolExecutor(
+ max_workers=max_workers,
+ timeout=rpc_config.session_timeout_sec,
+ initializer=initializer,
+ )
+
+ def run(self, runner_inputs: List[RunnerInput]) -> List[RunnerFuture]:
+ results: List[RunnerFuture] = []
+
+ for runner_input in runner_inputs:
+ future = RPCRunnerFuture(
+ future=self.pool.submit(
+ _worker_func,
+ self.platform,
+ self.project_options or {},
+ self.rpc_config,
+ self.evaluator_config,
+ str(runner_input.artifact_path),
+ str(runner_input.device_type),
+ tuple(arg_info.as_json() for arg_info in
runner_input.args_info),
+ ),
+ timeout_sec=self.rpc_config.session_timeout_sec,
+ )
+ results.append(future) # type: ignore
+ return results
+
+
+def _worker_func(
+ platform: str,
+ project_options: dict,
+ rpc_config: RPCConfig,
+ evaluator_config: EvaluatorConfig,
+ artifact_path: str,
+ device_type: str,
+ args_info: T_ARG_INFO_JSON_OBJ_LIST,
+) -> List[float]:
+
+ module_loader = micro.AutoTvmModuleLoader(
+ template_project_dir=micro.get_microtvm_template_projects(platform),
+ project_options=project_options,
+ )
+
+ remote_kw = {
+ "device_key": rpc_config.tracker_key,
+ "host": rpc_config.tracker_host,
+ "port": rpc_config.tracker_port,
+ "priority": 0,
+ "timeout": 100,
+ }
+ build_result = namedtuple("BuildResult", ["filename"])(artifact_path)
+
+ with module_loader(remote_kw, build_result) as (remote, mod):
+ dev = remote.device(device_type, 0)
+ f_prepare = ""
+ if evaluator_config.enable_cpu_cache_flush:
+ f_prepare = "cache_flush_cpu_non_first_arg"
+ time_f = mod.time_evaluator(
+ mod.entry_name,
+ dev,
+ number=evaluator_config.number,
+ repeat=evaluator_config.repeat,
+ min_repeat_ms=evaluator_config.min_repeat_ms,
+ f_preproc=f_prepare,
+ )
+
+ random_fill = remote.get_function("tvm.contrib.random.random_fill")
+ args = [nd.empty(x[2], x[1], dev) for x in args_info]
+ for arg in args:
+ random_fill(arg)
+ dev.sync()
+
+ costs = time_f(*args).results
+ return costs
+
+
+@contextmanager
+def get_rpc_runner_micro(
+ platform,
+ options,
+ session_timeout_sec: int = 300,
+ number: int = 3,
+ repeat: int = 1,
+ min_repeat_ms: int = 100,
+):
+ """Parameters
+ ----------
+ platform: str
+ The platform used for project generation.
+ project_options: dict
+ The options for the generated micro project.
+ session_timeout_sec: int
+ The session timeout. if the number of candidates sent to runner is
larger
+ than the runner workers, increase the timeout.
+ number: int
+ The number of times to run the evaluator function for taking average.
+ We call these runs as one `repeat` of measurement.
+ repeat: int
+ The number of times to repeat the measurement.
+ In total, the function will be invoked (1 + number x repeat) times,
+ where the first one is warm up and will be discarded.
+ The returned result contains `repeat` costs,
+ each of which is an average of `number` costs.
+ min_repeat_ms: int
+ Minimum repeat time in ms. if the execution latency is too short,
+ increase the number of runs to the given time (in ms) to reduce the
measurement error.
+ """
+ tracker_host = "127.0.0.1"
Review Comment:
Not sure if hardcoding the tracker information here is a good idea, what
about use them as the default of another argument`rpc_config` for the function?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]