Re: [PR] [TIR] Allow symbolic bounds in IndexMap analysis [tvm]
LeiWang1999 commented on PR #15264: URL: https://github.com/apache/tvm/pull/15264#issuecomment-1865622648 insert `(*indices).MutateByApply( [&](const PrimExpr& e) { return SimplifyNonTrivialExpr(e, analyzer_); });` before https://github.com/apache/tvm/blob/f36a093c20963a3827be0cf4fd5e15a0cac69f31/src/tir/schedule/primitive/layout_transformation.cc#L806-L807 to simplify the indice works for me. looks like it's removed by this commit, wonder if there’s a specific rationale behind it. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [I] [Bug] [Unity] TVMError when loading ONNX model with CumSum operator [tvm]
Thrsu closed issue #15728: [Bug] [Unity] TVMError when loading ONNX model with CumSum operator URL: https://github.com/apache/tvm/issues/15728 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [I] [Bug][Unity] TVMError when using relax to load model with Trilu operator [tvm]
Thrsu closed issue #15729: [Bug][Unity] TVMError when using relax to load model with Trilu operator URL: https://github.com/apache/tvm/issues/15729 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
(tvm) branch nightly updated (759ee1236a -> f36a093c20)
This is an automated email from the ASF dual-hosted git repository. github-bot pushed a change to branch nightly in repository https://gitbox.apache.org/repos/asf/tvm.git from 759ee1236a [Support] Add Interrupt Handling in Pipe (#16255) add 3a57a40c1b [RUNTIME][CLML] Fix for CLML ops and enable more test case (#15896) add f36a093c20 Update conv2d.py (#16262) No new revisions were added by this update. Summary of changes: python/tvm/relay/op/contrib/clml.py | 118 ++- python/tvm/topi/intel_graphics/conv2d.py | 3 +- src/relay/backend/contrib/clml/codegen.cc| 2 +- src/runtime/contrib/clml/clml_runtime.cc | 521 - tests/python/contrib/test_clml/conftest.py | 21 +- tests/python/contrib/test_clml/infrastructure.py | 242 +++--- tests/python/contrib/test_clml/test_network.py | 249 +++--- tests/python/contrib/test_clml/test_ops.py | 942 +-- tests/scripts/task_python_adreno.sh | 1 + 9 files changed, 1333 insertions(+), 766 deletions(-)
Re: [PR] [Unity] Dispatch cumsum and sort [tvm]
yongwww commented on code in PR #16254: URL: https://github.com/apache/tvm/pull/16254#discussion_r1433425196 ## tests/python/relax/test_backend_dispatch_sort_scan.py: ## @@ -0,0 +1,415 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +import tvm +import tvm.script +import tvm.testing +from tvm.script import relax as R, tir as T, ir as I + +from tvm.relax.backend import DispatchSortScan +from tvm.ir.base import assert_structural_equal + + +def test_dispatch_cumsum(): +@I.ir_module +class Before: +I.module_global_infos({"vdevice": [I.vdevice("cuda", 0), I.vdevice("llvm", 0)]}) + +@R.function +def foo(x: R.Tensor((2, 3), "float32", "llvm")): +with R.dataflow(): +gv = R.cumsum(x, axis=1, dtype="float64") +R.output(gv) +return gv + +@I.ir_module +class Expected: +I.module_global_infos({"vdevice": [I.vdevice("cuda", 0), I.vdevice("llvm", 0)]}) + +@T.prim_func(private=True) +def cumsum(var_A: T.handle, out_buf: T.Buffer((T.int64(2), T.int64(3)), "float64")): +T.func_attr({"tir.noalias": T.bool(True)}) +A = T.match_buffer(var_A, (T.int64(2), T.int64(3)), offset_factor=1) +with T.block("cumsum_generic"): +T.reads(A[T.int64(0) : T.int64(2), T.int64(0) : T.int64(3)]) +T.writes(out_buf[T.int64(0) : T.int64(2), T.int64(0) : T.int64(3)]) +for fused in T.parallel(T.int64(2)): +out_buf[ +fused * T.int64(3) // T.int64(3), fused * T.int64(3) % T.int64(3) +] = T.Cast( +"float64", +A[fused * T.int64(3) // T.int64(3), fused * T.int64(3) % T.int64(3)], +) +for _k in range(T.int64(2)): +out_buf[ +(fused * T.int64(3) + (_k + T.int64(1))) // T.int64(3), +(fused * T.int64(3) + (_k + T.int64(1))) % T.int64(3), +] = out_buf[ +(fused * T.int64(3) + (_k + T.int64(1) - T.int64(1))) // T.int64(3), +(fused * T.int64(3) + (_k + T.int64(1) - T.int64(1))) % T.int64(3), +] + T.Cast( +"float64", +A[ +(fused * T.int64(3) + (_k + T.int64(1))) // T.int64(3), +(fused * T.int64(3) + (_k + T.int64(1))) % T.int64(3), +], +) + +@R.function +def foo( +x: R.Tensor((2, 3), dtype="float32", vdevice="llvm") +) -> R.Tensor((2, 3), dtype="float64", vdevice="llvm"): +cls = Expected +with R.dataflow(): +gv = R.call_tir(cls.cumsum, (x,), out_sinfo=R.Tensor((2, 3), dtype="float64")) +R.output(gv) +return gv + +mod = DispatchSortScan()(Before) +assert_structural_equal(mod, Expected, map_free_vars=True) + + +@pytest.mark.skip("The emitted primfunc is not roundtripable, failed in build.") +def test_dispatch_cumsum_cuda(): +@I.ir_module +class Before: Review Comment: the error is gone via applying `dlight.ApplyDefaultSchedule(dlight.gpu.Fallback())(mod)` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] [Frontend][PaddlePaddle] Add Support for PaddlePaddle Quantized Model [tvm]
Zheng-Bicheng closed pull request #15417: [Frontend][PaddlePaddle] Add Support for PaddlePaddle Quantized Model URL: https://github.com/apache/tvm/pull/15417 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [I] [Bug][Unity] TVMError when using relax to load model with Trilu operator [tvm]
jikechao commented on issue #15729: URL: https://github.com/apache/tvm/issues/15729#issuecomment-1865393531 @Thrsu This bug has been fixed in #15924, please recheck it and close the issue. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] [Unity][Draft][WIP] Relax language specification [tvm]
slyubomirsky commented on PR #14148: URL: https://github.com/apache/tvm/pull/14148#issuecomment-1865271775 Added semantics for heterogenous computation per #15823. Please have a look @yongwww! Some of the proposed rules haven't been implemented yet but I think we might need to introduce new invariants to keep things consistent and address some cases that the current unit tests don't check. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [I] [Unity] [Tracking Issue] Heterogeneous execution for Relax [tvm]
yongwww closed issue #15101: [Unity] [Tracking Issue] Heterogeneous execution for Relax URL: https://github.com/apache/tvm/issues/15101 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
(tvm) branch unity updated: [Unity] Add Relax multi-device e2e cases (#15823)
This is an automated email from the ASF dual-hosted git repository. yongwww pushed a commit to branch unity in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/unity by this push: new 1c35c39264 [Unity] Add Relax multi-device e2e cases (#15823) 1c35c39264 is described below commit 1c35c392648e4336fc5e00ab91abb37af997cd59 Author: Yong Wu AuthorDate: Wed Dec 20 13:52:56 2023 -0800 [Unity] Add Relax multi-device e2e cases (#15823) * [Unity] filter out non-GPU primfuncs in default_gpu_schedule * Add relex heterogeneous e2e case * Remove get_prim_func_device * Update test cases * Fix flake8 * fix lint * Add test case for change of default_gpu_schedule * fix comment --- python/tvm/driver/build_module.py | 27 ++- python/tvm/relax/utils.py | 26 ++- python/tvm/relax/vm_build.py | 32 ++-- python/tvm/runtime/relax_vm.py | 7 +- python/tvm/testing/utils.py| 20 +++ src/ir/module.cc | 2 +- src/relax/transform/call_tir_rewrite.cc| 39 - src/relax/transform/legalize_ops.cc| 42 + src/relax/transform/utils.h| 11 ++ src/runtime/relax_vm/vm.cc | 3 - src/script/printer/relax/utils.h | 1 - src/tir/transforms/default_gpu_schedule.cc | 49 -- tests/python/relax/test_frontend_stablehlo.py | 4 +- tests/python/relax/test_vm_multi_device.py | 186 + .../test_transform_default_gpu_schedule.py | 73 15 files changed, 471 insertions(+), 51 deletions(-) diff --git a/python/tvm/driver/build_module.py b/python/tvm/driver/build_module.py index 9389e7fbee..52303123c1 100644 --- a/python/tvm/driver/build_module.py +++ b/python/tvm/driver/build_module.py @@ -243,20 +243,33 @@ def build( if not isinstance(inputs, (dict, container.Map)): target = Target.current() if target is None else target -target = target if target else "llvm" -target_input_mod = {target: input_mod} +if target is None and isinstance(input_mod, tvm.IRModule): +target_mod = {} +for gvar, func in input_mod.functions.items(): +tgt = func.attrs["target"] if func.attrs and "target" in func.attrs else "llvm" +if tgt not in target_mod: +target_mod[tgt] = {} +target_mod[tgt][gvar] = func + +target_input_mod = {} +for tgt in target_mod.keys(): +tir_mod = tvm.IRModule(target_mod[tgt]) +tir_mod.with_attrs(input_mod.attrs) +target_input_mod[tgt] = tir_mod +else: +target_input_mod = {target: input_mod} else: -target_input_mod = inputs +target_input_mod = {tgt: lower(mod) for tgt, mod in inputs.items()} # Because modules can be created from a variety of sources, we annotate them # with the relevant attributes here to ensure they propagate annotated_mods = {} -for tar, mod in target_input_mod.items(): -if not isinstance(tar, (str, Target)): +for tgt, mod in target_input_mod.items(): +if not isinstance(tgt, (str, Target)): raise ValueError("The key of inputs must be str or " "Target when inputs is dict.") if not isinstance(mod, tvm.IRModule): -raise ValueError("inputs must be Schedule, IRModule," "or dict of str to IRModule.") -annotated_mods[tar] = mod.with_attr("runtime", runtime) +raise ValueError("inputs must be Schedule, IRModule, " "or dict of str to IRModule.") +annotated_mods[tgt] = mod.with_attr("runtime", runtime) # TODO(mbs): Both CompilationConfig and TIRToRuntime implement the same host target # defaulting logic, but there's currently no way to get back the decided host. diff --git a/python/tvm/relax/utils.py b/python/tvm/relax/utils.py index a1fa9cafe8..b720a727f6 100644 --- a/python/tvm/relax/utils.py +++ b/python/tvm/relax/utils.py @@ -28,7 +28,7 @@ from . import _ffi_api from .expr import Tuple as rx_Tuple from .expr import Expr, ShapeExpr, Function, PrimValue, StringImm, te_tensor from ..te import Tensor as te_Tensor, create_prim_func -from ..ir import Array, Attrs, Type, Map +from ..ir import Array, Attrs, Type, Map, VDevice from .struct_info import PrimStructInfo, ShapeStructInfo, TensorStructInfo @@ -418,6 +418,24 @@ def gen_call_tir_inputs( diff = used_vars - bound_vars return list(diff) +def _get_vdevice(arg: Any) -> Optional[VDevice]: +"""get the virtual device from arguments.""" +vdevice = None +if isinstance(arg, Expr): # type: ignore +if
Re: [PR] [Unity] Add Relax multi-device e2e cases [tvm]
yongwww merged PR #15823: URL: https://github.com/apache/tvm/pull/15823 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] [Unity] Dispatch cumsum and sort [tvm]
jinhongyii commented on code in PR #16254: URL: https://github.com/apache/tvm/pull/16254#discussion_r1433210709 ## python/tvm/relax/backend/dispatch_sort_scan.py: ## @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument, redefined-argument-from-local +"""Dispatch sort and scan operators to platform dependent implementation.""" + +from tvm import topi +from tvm.ir import Op +from tvm.ir.module import IRModule +from tvm.ir.transform import PassContext, module_pass +from tvm.target import Target +from tvm.contrib.thrust import can_use_thrust +from tvm.relax import Expr, Function, Call, PyExprMutator, expr_functor, TensorStructInfo + + +@expr_functor.mutator +class SortScanDispatcher(PyExprMutator): +""" +Dispatcher to dispatch sort and scan. + +""" + +def __init__(self, mod): +super().__init__(mod) + +def _get_target(self, expr: Expr) -> Target: +sinfo = expr.struct_info +# Get target information from TensorStructInfo +if isinstance(sinfo, TensorStructInfo): +vdevice = sinfo.vdevice +if vdevice is not None: +return vdevice.target +# Return the target in current context +target = Target.current() +if target is None: +raise ValueError( +"Target not found. Please ensure that the target is annotated within the module, \ +or alternatively, execute this within a specified target context." +) +return target + +def visit_call_(self, call: Call) -> Expr: +if not isinstance(call.op, Op): +return super().visit_call_(call) + +if call.op.name == "relax.sort": +tgt = self._get_target(call) +with tgt: +if can_use_thrust(tgt, "tvm.contrib.thrust.sort"): +return self.builder_.call_te( +topi.cuda.sort_thrust, +call.args[0], +call.attrs.axis, +not call.attrs.descending, +) +return self.builder_.call_te( Review Comment: please add a {"tir.is_scheduled": 1} attr to the sort and cumsum function to be compatible with default schedule -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] [TIR] Allow symbolic bounds in IndexMap analysis [tvm]
LeiWang1999 commented on PR #15264: URL: https://github.com/apache/tvm/pull/15264#issuecomment-1864872348 Thanks tq, I found that under `index_map = IndexMap.from_func(ldmatrix_trans_permutation_16x16_32x8_16x16, index_dtype="int32")` though, the inverse map is not ok, the output is: ```bash inject transform= # from tvm.script import tir as T @T.prim_func def main(A: T.Buffer((16, 16), "float16"), B: T.Buffer((16, 16), "float16")): T.func_attr({"tir.noalias": T.bool(True)}) # with T.block("root"): for i, j in T.grid(16, 16): with T.block("B"): vi, vj = T.axis.remap("SS", [i, j]) T.reads(B[(vi * 2 + vj // 8) // 16 * 8 + (vi * 2 + vj // 8) % 8, (vi * 2 + vj // 8) % 16 // 8 * 8 + vj % 8]) T.writes(A[vi, vj]) A[vi, vj] = B[(vi * 2 + vj // 8) // 16 * 8 + (vi * 2 + vj // 8) % 8, (vi * 2 + vj // 8) % 16 // 8 * 8 + vj % 8] inverse inject transform= # from tvm.script import tir as T @T.prim_func def main(A: T.Buffer((16, 16), "float16"), B: T.Buffer((16, 16), "float16")): T.func_attr({"tir.noalias": T.bool(True)}) # with T.block("root"): for i, j in T.grid(16, 16):yu with T.block("B"): vi, vj = T.axis.remap("SS", [i, j]) T.reads(B[(vi * 2 + vj // 8) // 2, vj % 16]) T.writes(A[vi, vj]) A[vi, vj] = B[(vi * 2 + vj // 8) // 2, vj % 16] ``` After applying the inverse_map to the map, the layout should remain consistent with its state prior to the transformation. Before this pull request, the code was functioning as expected. ```bash inject transform= # from tvm.script import tir as T @T.prim_func def func(A: T.Buffer[(16, 16), "float16"], B: T.Buffer[(16, 16), "float16"]): # function attr dict T.func_attr({"tir.noalias": True, "global_symbol": "main"}) # body # with T.block("root") for i, j in T.grid(16, 16): with T.block("B"): vi, vj = T.axis.remap("SS", [i, j]) T.reads(B[vi // 8 * 8 + vi % 4 * 2 + vj // 8, vi % 8 // 4 * 8 + vj % 8]) T.writes(A[vi, vj]) A[vi, vj] = B[vi // 8 * 8 + vi % 4 * 2 + vj // 8, vi % 8 // 4 * 8 + vj % 8] inverse inject transform= # from tvm.script import tir as T @T.prim_func def func(A: T.Buffer[(16, 16), "float16"], B: T.Buffer[(16, 16), "float16"]): # function attr dict T.func_attr({"tir.noalias": True, "global_symbol": "main"}) # body # with T.block("root") for i, j in T.grid(16, 16): with T.block("B"): vi, vj = T.axis.remap("SS", [i, j]) T.reads(B[vi, vj]) T.writes(A[vi, vj]) A[vi, vj] = B[vi, vj] ``` I'll take a look tomorrow. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] [TIR] Allow symbolic bounds in IndexMap analysis [tvm]
tqchen commented on PR #15264: URL: https://github.com/apache/tvm/pull/15264#issuecomment-1864818452 @LeiWang1999 what you met seems is related to the index bound(i32 i64 related), which is a bit unfortunate but as we transition to enable i64 it is necessary. ``` index_map = IndexMap.from_func(ldmatrix_trans_permutation_16x16_32x8_16x16, index_dtype="int32") ``` if you add index_dtype="int32", given your loops are in i32, the inverse map seems to be OK. if we don't do that, there will be cast as a result we cannot make the inverse -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] [TIR] Allow symbolic bounds in IndexMap analysis [tvm]
tqchen commented on PR #15264: URL: https://github.com/apache/tvm/pull/15264#issuecomment-1864745035 @LeiWang1999 do you mind dig further? specificlaly would be good to know what is difference in terms of input to the index map. Likely the analyzer have more context ``` IterMapExpr or subclasses should only result from calls in IterMapRewriter using DirectMutate. Indirect return occurred in i ``` seems to indicate that there are some issues in the internal IterMapRewriter -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [I] [Bug] Check failed: (reporter->AssertEQ(data->shape[data->shape.size() - 1], weight->shape[1])) is false: DenseRel: input dimension doesn’t match, data shape=[1, 512], weight shape=[512, 1000]
Liuz233 commented on issue #9496: URL: https://github.com/apache/tvm/issues/9496#issuecomment-1864442423 Excuse me, how do you solve this problem. I meet the problem too. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[PR] [microNPU][ETHOSU] Add fixed point for tanh [tvm]
Aleksei-grovety opened a new pull request, #16266: URL: https://github.com/apache/tvm/pull/16266 Add support for calculation tanh with 16 bits fixed point. Add flag enable_fixed_point to enable fixed point calculation. We get good accuracy with 1 bit to integer part and 15 bits for fractional, with other cases we get worse results. cc @lhutton1, @ekalda, @leandron -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] [TIR] Allow symbolic bounds in IndexMap analysis [tvm]
LeiWang1999 commented on PR #15264: URL: https://github.com/apache/tvm/pull/15264#issuecomment-1864277870 Hi @junrushao , I have encountered some issue and bisect to this pull request. here is my case: ```python import tvm from tvm.script import tir as T from tvm.tir import IndexMap def ldmatrix_trans_32x8_to_shared_16x16_layout(thread_id, local_id): row = 8 * (thread_id // 16) + (thread_id % 8) col = 8 * ((thread_id % 16) // 8) + local_id % 8 return row, col def ldmatrix_trans_permutation_16x16_32x8_16x16(kernel_i, kernel_j): thread_id = kernel_i * 2 + kernel_j // 8 local_id = kernel_j % 8 return ldmatrix_trans_32x8_to_shared_16x16_layout(thread_id, local_id) @tvm.script.ir_module class MyModule: @T.prim_func def main(a: T.handle, b: T.handle): T.func_attr({"global_symbol": "main", "tir.noalias": True}) A = T.match_buffer(a, [16, 16], dtype="float16") B = T.match_buffer(b, [16, 16], dtype="float16") for i, j in T.grid(16, 16): with T.block("B"): vi, vj = T.axis.remap("SS", [i, j]) T.reads(B[vi, vj]) T.writes(A[vi, vj]) A[vi, vj] = B[vi, vj] ir_module = MyModule sch = tvm.tir.Schedule(ir_module) block_b = sch.get_block("B") sch.transform_layout(block_b, ('read', 0), ldmatrix_trans_permutation_16x16_32x8_16x16) print("inject transform=") print(sch.mod["main"].script()) index_map = IndexMap.from_func(ldmatrix_trans_permutation_16x16_32x8_16x16) inversed_index_map = index_map.inverse([16, 16]) def inverse_permutation(i, j): return inversed_index_map.map_indices([i, j]) sch.transform_layout(block_b, ('read', 0), inverse_permutation) print("inverse inject transform=") print(sch.mod["main"].script()) ``` before this pr, the output is ```bash # from tvm.script import tir as T @T.prim_func def main(A: T.Buffer((16, 16), "float16"), B: T.Buffer((16, 16), "float16")): T.func_attr({"global_symbol": "main", "tir.noalias": T.bool(True)}) # with T.block("root"): for i, j in T.grid(16, 16): with T.block("B"): vi, vj = T.axis.remap("SS", [i, j]) T.reads(B[vi, vj]) T.writes(A[vi, vj]) A[vi, vj] = B[vi, vj] ``` As we can see, the indexmap can be simplified, and can be inversed. After this pr, the output is ```bash inject transform= # from tvm.script import tir as T @T.prim_func def main(A: T.Buffer((16, 16), "float16"), B: T.Buffer((16, 16), "float16")): T.func_attr({"global_symbol": "main", "tir.noalias": T.bool(True)}) # with T.block("root"): for i, j in T.grid(16, 16): with T.block("B"): vi, vj = T.axis.remap("SS", [i, j]) T.reads(B[(vi * 2 + vj // 8) // 16 * 8 + (vi * 2 + vj // 8) % 8, (vi * 2 + vj // 8) % 16 // 8 * 8 + vj % 8]) T.writes(A[vi, vj]) A[vi, vj] = B[(vi * 2 + vj // 8) // 16 * 8 + (vi * 2 + vj // 8) % 8, (vi * 2 + vj // 8) % 16 // 8 * 8 + vj % 8] Traceback (most recent call last): File "/home/t-leiwang/ladder_workspace/tvm_gpu_gemm/discuss_inversemap.py", line 42, in sch.transform_layout(block_b, ('read', 0), inverse_permutation) File "/home/t-leiwang/mlc_workspace/tvm_rebase/python/tvm/tir/schedule/_type_checker.py", line 340, in wrap return func(*args, **kwargs) File "/home/t-leiwang/mlc_workspace/tvm_rebase/python/tvm/tir/schedule/schedule.py", line 3296, in transform_layout _ffi_api.ScheduleTransformLayout( # type: ignore # pylint: disable=no-member File "/home/t-leiwang/mlc_workspace/tvm_rebase/python/tvm/_ffi/_ctypes/packed_func.py", line 238, in __call__ raise get_last_ffi_error() tvm._ffi.base.TVMError: Traceback (most recent call last): 7: TVMFuncCall 6: tvm::runtime::PackedFuncObj::Extractor const&, bool)>::AssignTypedLambda const&, bool)#17}>(tvm::tir::{lambda(tvm::tir::Schedule, tvm::tir::BlockRV const&, int, int, tvm::tir::IndexMap const&, tvm::runtime::Optional const&, bool)#17}, std::__cxx11::basic_string, std::allocator >)::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, std::__cxx11::basic_string, std::allocator >, tvm::runtime::TVMRetValue) 5: tvm::runtime::TypedPackedFunc const&, bool)>::AssignTypedLambda const&, bool)#17}>(tvm::tir::{lambda(tvm::tir::Schedule, tvm::tir::BlockRV const&, int, int, tvm::tir::IndexMap const&, tvm::runtime::Optional const&, bool)#17}, std::__cxx11::basic_string, std::allocator
(tvm) branch main updated: Update conv2d.py (#16262)
This is an automated email from the ASF dual-hosted git repository. sanirudh pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new f36a093c20 Update conv2d.py (#16262) f36a093c20 is described below commit f36a093c20963a3827be0cf4fd5e15a0cac69f31 Author: anonymousdouble <112695649+anonymousdou...@users.noreply.github.com> AuthorDate: Wed Dec 20 20:09:30 2023 +1100 Update conv2d.py (#16262) refactor with chain constant value assignment to make code more Pythonic, concise and efficient. --- python/tvm/topi/intel_graphics/conv2d.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/tvm/topi/intel_graphics/conv2d.py b/python/tvm/topi/intel_graphics/conv2d.py index b276bcae92..b7906cdb91 100644 --- a/python/tvm/topi/intel_graphics/conv2d.py +++ b/python/tvm/topi/intel_graphics/conv2d.py @@ -591,8 +591,7 @@ def _schedule_cl_spatialpack(s, op): OUTPUT_BLOCK_WIDTH = attrs["block_w"] # schedule conv -z_factor = 1 -y_factor = 1 +y_factor = z_factor = 1 x_factor = 16 thread_z = te.thread_axis((0, z_factor), "threadIdx.z") thread_y = te.thread_axis((0, y_factor), "threadIdx.y")
Re: [PR] Update conv2d.py [tvm]
quic-sanirudh merged PR #16262: URL: https://github.com/apache/tvm/pull/16262 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org
Re: [PR] [TVMC] Add tvmc flag to print ir before and print ir after named pass [tvm]
ekalda commented on code in PR #16261: URL: https://github.com/apache/tvm/pull/16261#discussion_r1432363883 ## python/tvm/driver/tvmc/compiler.py: ## @@ -310,6 +326,10 @@ def compile_model( compilation. print_pass_times: bool To enable printing a breakdown of compilation times by pass. Disabled by default. +print_ir_before: list[str] +To print ir before each named pass of a comma-separated list of passes. Review Comment: Nit: ```suggestion To print IR before each named pass of a comma-separated list of passes. ``` Same for the other argument ## python/tvm/ir/instrument.py: ## @@ -255,3 +255,25 @@ def render(): profiles = timing_inst.render() """ return _ffi_instrument_api.RenderTimePassProfiles() + + +@pass_instrument +class PassPrintBefore: +def __init__(self, print_pass_name): +self.print_pass_name = print_pass_name + +def run_before_pass(self, mod, pass_info): +if pass_info.name in self.print_pass_name: +print("Print ir before:") +print(str(pass_info.name) + "\n" + str(mod) + "\n\n") + + +@pass_instrument +class PassPrintAfter: +def __init__(self, print_pass_name): +self.print_pass_name = print_pass_name + +def run_after_pass(self, mod, pass_info): +if pass_info.name in self.print_pass_name: +print("Print ir after:") +print(str(pass_info.name) + "\n" + str(mod) + "\n\n") Review Comment: Maybe we can use f-strings here (like in the tests)? ## tests/python/driver/tvmc/test_command_line.py: ## @@ -289,3 +288,37 @@ def test_tvmc_print_pass_times(capsys, keras_simple, tmpdir_factory): captured_out = capsys.readouterr().out for exp_str in ("Compilation time breakdown by pass:", "sequential:", "us]"): assert exp_str in captured_out + + +def test_tvmc_print_ir_before(capsys, keras_simple, tmpdir_factory): +pytest.importorskip("tensorflow") +tmpdir = tmpdir_factory.mktemp("out") +print_cmd = "--print-ir-before=[tir.SplitHostDevice]" + +# Compile model +module_file = os.path.join(tmpdir, "keras-tvm.tar") +compile_cmd = f"tvmc compile --target 'llvm' {keras_simple} --output {module_file} {print_cmd}" +compile_args = compile_cmd.split(" ")[1:] +_main(compile_args) + +# Check for timing results output +captured_out = capsys.readouterr().out +for exp_str in ("Print ir before:\n", "tir.SplitHostDevice\n"): +assert exp_str in captured_out + + +def test_tvmc_print_ir_after(capsys, keras_simple, tmpdir_factory): +pytest.importorskip("tensorflow") +tmpdir = tmpdir_factory.mktemp("out") +print_cmd = "--print-ir-after=[tir.SplitHostDevice]" + +# Compile model +module_file = os.path.join(tmpdir, "keras-tvm.tar") +compile_cmd = f"tvmc compile --target 'llvm' {keras_simple} --output {module_file} {print_cmd}" +compile_args = compile_cmd.split(" ")[1:] +_main(compile_args) + +# Check for timing results output +captured_out = capsys.readouterr().out +for exp_str in ("Print ir after:\n", "tir.SplitHostDevice\n"): +assert exp_str in captured_out Review Comment: Some additional things that would be good to test: * That is works if there are more than one pass name in the list of inputs * That it works if both `--print-ir-before` and `--print-ir-after` are present in one command line ## tests/python/driver/tvmc/test_command_line.py: ## @@ -289,3 +288,37 @@ def test_tvmc_print_pass_times(capsys, keras_simple, tmpdir_factory): captured_out = capsys.readouterr().out for exp_str in ("Compilation time breakdown by pass:", "sequential:", "us]"): assert exp_str in captured_out + + +def test_tvmc_print_ir_before(capsys, keras_simple, tmpdir_factory): +pytest.importorskip("tensorflow") +tmpdir = tmpdir_factory.mktemp("out") +print_cmd = "--print-ir-before=[tir.SplitHostDevice]" + +# Compile model +module_file = os.path.join(tmpdir, "keras-tvm.tar") +compile_cmd = f"tvmc compile --target 'llvm' {keras_simple} --output {module_file} {print_cmd}" +compile_args = compile_cmd.split(" ")[1:] +_main(compile_args) + +# Check for timing results output Review Comment: Update the comment ## python/tvm/ir/instrument.py: ## @@ -255,3 +255,25 @@ def render(): profiles = timing_inst.render() """ return _ffi_instrument_api.RenderTimePassProfiles() + + +@pass_instrument +class PassPrintBefore: +def __init__(self, print_pass_name): +self.print_pass_name = print_pass_name + +def run_before_pass(self, mod, pass_info): +if pass_info.name in self.print_pass_name: +print("Print ir before:") Review Comment: Nit: ```suggestion print("Print IR before:") ``` ##
(tvm) branch main updated: [RUNTIME][CLML] Fix for CLML ops and enable more test case (#15896)
This is an automated email from the ASF dual-hosted git repository. srk pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 3a57a40c1b [RUNTIME][CLML] Fix for CLML ops and enable more test case (#15896) 3a57a40c1b is described below commit 3a57a40c1ba40e1c330346905f8db72775fc9992 Author: krishnaraj36 AuthorDate: Wed Dec 20 13:50:00 2023 +0530 [RUNTIME][CLML] Fix for CLML ops and enable more test case (#15896) * [RUNTIME][CLML] Fix for few clml ops Fixed the dense operator and enhance clml network testcase * [RUNTIME][CLML] Fix for dense layer and float16 Fixed the dense layer issue in network level and improved converage of dense layer with clml Fixed float16 crash error. * Update comment for dense pattern * fix in clml test cases * Enable more test cases and few fixes * Fix the import error * Fix the import error * Fix in batchnorm testcase * Restructure clml test case and enable vm executor * Fix the import error in clml test network * Fix the test failure for vm tests * Update clml.py --- python/tvm/relay/op/contrib/clml.py | 118 ++- src/relay/backend/contrib/clml/codegen.cc| 2 +- src/runtime/contrib/clml/clml_runtime.cc | 521 - tests/python/contrib/test_clml/conftest.py | 21 +- tests/python/contrib/test_clml/infrastructure.py | 242 +++--- tests/python/contrib/test_clml/test_network.py | 249 +++--- tests/python/contrib/test_clml/test_ops.py | 942 +-- tests/scripts/task_python_adreno.sh | 1 + 8 files changed, 1332 insertions(+), 764 deletions(-) diff --git a/python/tvm/relay/op/contrib/clml.py b/python/tvm/relay/op/contrib/clml.py index f194dd114b..14dd35a3cb 100644 --- a/python/tvm/relay/op/contrib/clml.py +++ b/python/tvm/relay/op/contrib/clml.py @@ -18,6 +18,7 @@ """CLML Library supported operators.""" import json from string import Template +import numpy as np import tvm from tvm import relay @@ -27,7 +28,7 @@ from tvm.relay import transform from tvm.relay.build_module import bind_params_by_name from tvm.relay import function as _function from tvm.relay.expr_functor import ExprMutator -from tvm.relay.expr import Call, TupleGetItem +from tvm.relay.expr import Call, TupleGetItem, Var, Constant from ...dataflow_pattern import wildcard, is_op, is_constant, is_tuple_get_item, is_tuple from .register import register_pattern_table @@ -81,34 +82,61 @@ class RemoveDropoutPass: return RemoveDropout().visit(func) -class BroadcastInputs(ExprMutator): +class OptimizeBatchnorm(ExprMutator): """ -Binary operators need broadcasting for CLML. +Fuse Conv+Batchnorm and constant folder to generate Conv+Add. """ -def visit_call(self, call): -if call.op.name in ["add", "subtract", "multiply", "divide", "maximum", "minimum"]: -new_fn = self.visit(call.op) -call_shape = call.checked_type.shape -lhs = call.args[0] -rhs = call.args[1] -lhs_shape = lhs.checked_type.shape -rhs_shape = rhs.checked_type.shape -if list(call_shape) != list(lhs_shape): -lhs = relay.broadcast_to(self.visit(lhs), call_shape) -if list(call_shape) != list(rhs_shape): -rhs = relay.broadcast_to(self.visit(rhs), call_shape) -args = [lhs, rhs] -return Call(new_fn, args, call.attrs) -return super().visit_call(call) +def visit_call(self, call) -> relay.expr.Expr: +new_args = [] +for arg in call.args: +if ( +not isinstance(arg, (Var, Constant)) +and isinstance(arg, tvm.relay.TupleGetItem) +and arg.tuple_value.op.name == "nn.batch_norm" +and (not isinstance(arg.tuple_value.args[0], (Var, Constant))) +and arg.tuple_value.args[0].op.name == "nn.conv2d" +): +ep = arg.tuple_value.attrs["epsilon"] +wt = arg.tuple_value.args[1].data.numpy() +bs = arg.tuple_value.args[2].data.numpy() +mn = arg.tuple_value.args[3].data.numpy() +vr = arg.tuple_value.args[4].data.numpy() + ep +dino = np.sqrt(vr) +wt = wt / dino +bs = bs - mn * wt +conv_op = arg.tuple_value.args[0] +conv_args = list(conv_op.args) +wt_conv = conv_args[1].data.numpy() +if conv_op.attrs["kernel_layout"] == "OIHW": +wt = wt.reshape(wt.shape[0], 1, 1, 1) +elif conv_op.attrs["kernel_layout"] == "IOHW": +wt = wt.reshape(1, wt.shape[0], 1, 1) +else: +
Re: [PR] [RUNTIME][CLML] Fix for CLML ops and enable more test case [tvm]
srkreddy1238 merged PR #15896: URL: https://github.com/apache/tvm/pull/15896 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org