This is an automated email from the ASF dual-hosted git repository.
lunderberg pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 0af9ff90e0 [TIR] Restrict tir.transform.LowerTVMBuiltin to host
functions (#14944)
0af9ff90e0 is described below
commit 0af9ff90e0e68fa459d72f8c7afd290ee25a8a62
Author: Eric Lunderberg <[email protected]>
AuthorDate: Sat Jun 3 14:47:11 2023 -0500
[TIR] Restrict tir.transform.LowerTVMBuiltin to host functions (#14944)
* [Bugfix][TIR][VTA] Update host-side target, even without device func
This resolves an issue introduced by the combination of
https://github.com/apache/tvm/pull/14918 and
https://github.com/apache/tvm/pull/14945. The bug occurred for
targets that do not require device-side codegen, but do require a
`device_type` other than `kDLCPU`. It wasn't caught by CI, as the
issue only occurred with the combination of both PRs.
1. #14918 updated `SplitHostDevice` to only modify the `"target"`
attribute when a device-side function has been extracted.
2. For VTA, there is no device-side function, as everything is done
through host-side API calls.
3. From (1) and (2), the VTA examples kept the target
`T.target("ext_dev", host="llvm")` after the `SplitHostDevice`
pass, instead of being updated to `T.target("llvm")`.
4. #14945 restricted CombineContextCall to only apply to host-side
passes.
5. From (4) and (5), the `CombineContextCall` pass was no longer
applied to the VTA context calls.
This PR fixes `SplitHostDevice`, updating the target from
`T.target("ext_dev", host="llvm")` to `T.target("llvm")`, even if no
device sections have been extracted from the function.
* [TIR] Restrict tir.transform.LowerTVMBuiltin to host functions
Previously, the `tir.transform.LowerTVMBuiltin` pass applied to all
functions in an `IRModule`, but was only applied to modules that
contain only host functions. This commit updates
`tir.transform.LowerTVMBuiltin` to apply only to host functions.
* Updated "stackvm" target to have "cpu" key.
With the presence/absence of the "cpu" key in a target used to
determine whether host-only calls should be run, should make sure to
add it to "stackvm".
* Update IsHostFunc() to use "host" tag instead of "cpu"
Current CI failures due to LowerTVMBuiltin not running on "hexagon"
target, and would like to avoid conflating cpu/host.
* Avoid "host" tag for now
* Update HEXAGON_AOT_LLVM_TARGET to be recognized as host
---
python/tvm/contrib/hexagon/pytest_plugin.py | 2 +-
src/target/target_kind.cc | 5 +-
src/tir/transforms/lower_tvm_builtin.cc | 8 +-
.../test_tir_transform_lower_tvm_builtin.py | 97 +++++++++++++++++++++-
4 files changed, 103 insertions(+), 9 deletions(-)
diff --git a/python/tvm/contrib/hexagon/pytest_plugin.py
b/python/tvm/contrib/hexagon/pytest_plugin.py
index 91a01ac56d..303414e391 100644
--- a/python/tvm/contrib/hexagon/pytest_plugin.py
+++ b/python/tvm/contrib/hexagon/pytest_plugin.py
@@ -40,7 +40,7 @@ ADB_SERVER_SOCKET = "ADB_SERVER_SOCKET"
RNG_SEEDED = False
HEXAGON_AOT_LLVM_TARGET = (
- "llvm -keys=hexagon "
+ "llvm -keys=hexagon,cpu "
"-mattr=+hvxv68,+hvx-length128b,+hvx-qfloat,-hvx-ieee-fp "
"-mcpu=hexagonv68 -mtriple=hexagon"
)
diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc
index 3c4e885ef9..44dee859d0 100644
--- a/src/target/target_kind.cc
+++ b/src/target/target_kind.cc
@@ -422,9 +422,10 @@ TVM_REGISTER_TARGET_KIND("hexagon", kDLHexagon)
.add_attr_option<Array<String>>("llvm-options")
.add_attr_option<Integer>("num-cores")
.add_attr_option<Integer>("vtcm-capacity")
- .set_default_keys({"hexagon"});
+ .set_default_keys({"hexagon", "cpu"});
-TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU);
+TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU) // line break
+ .set_default_keys({"cpu"});
TVM_REGISTER_TARGET_KIND("ext_dev", kDLExtDev);
diff --git a/src/tir/transforms/lower_tvm_builtin.cc
b/src/tir/transforms/lower_tvm_builtin.cc
index ea418635bc..837a3e6d35 100644
--- a/src/tir/transforms/lower_tvm_builtin.cc
+++ b/src/tir/transforms/lower_tvm_builtin.cc
@@ -629,9 +629,11 @@ namespace transform {
Pass LowerTVMBuiltin() {
auto pass_func = [](PrimFunc f, IRModule m, PassContext ctx) {
- auto* n = f.CopyOnWrite();
- n->body = BuiltinLower().Build(n->body);
- VLOG(2) << "LowerTVMBuiltin: " << f;
+ if (IsHostFunc(f).value_or(false)) {
+ auto global_symbol = f->GetAttr<String>(tvm::attr::kGlobalSymbol);
+ f.CopyOnWrite()->body = BuiltinLower().Build(f->body);
+ VLOG(2) << "LowerTVMBuiltin: " << f;
+ }
return f;
};
return CreatePrimFuncPass(pass_func, 0, "tir.LowerTVMBuiltin", {});
diff --git a/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py
b/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py
index d224a688d2..2e0784cc31 100644
--- a/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py
+++ b/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py
@@ -56,7 +56,7 @@ def check_packed_func(target="llvm"):
# Construct a valid IRModule to be lowered:
mod = tvm.IRModule.from_expr(tvm.tir.PrimFunc([a_buffer, b_buffer,
c_buffer], stmt))
- target = tvm.target.Target(target)
+ target = tvm.target.Target(target, host="llvm")
mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", target))(mod)
mod = tvm.tir.transform.Apply(lambda f: f.with_attr("global_symbol",
"main"))(mod)
mod = tvm.tir.transform.MakePackedAPI()(mod)
@@ -189,6 +189,97 @@ def test_lower_overflow_int32():
tvm.build(func, target="llvm") # should not crash
+class TestLowerDeviceAllocate(tvm.testing.CompareBeforeAfter):
+ """Device allocations are lowered to TVMBackend* calls
+
+ This test validates the current behavior of LowerTVMBuiltin. This
+ unit test may be improved in the future by addressing:
+
+ - The AttrStmt for "storage_alignment" occurs outside the LetStmt
+ that defines the pointer, which is currently required by
+ CodeGenLLVM. This fails to match when `map_free_vars=False`
+ (default), because the first occurrence is undefined.
+
+ - The call to TVMBackendFreeWorkspace uses the allocated pointer,
+ but occurs outside the LetStmt.
+
+ - TVMScript always produces "handle" dtype for
+ `T.tvm_throw_last_error`, while LowerTVMBuiltin outputs "int32"
+ dtype.
+ """
+
+ transform = tvm.tir.transform.LowerTVMBuiltin()
+
+ def before():
+ T.func_attr({"target": T.target("llvm")})
+ T.attr("dummy", "device_type", 2) # kDLCuda
+ T.attr("dummy", "device_id", 0)
+ ptr = T.allocate([16], "float32")
+ buf = T.decl_buffer(16, "float32", data=ptr)
+ buf[0] = 0.0
+
+ def expected():
+ T.func_attr({"target": T.target("llvm")})
+ ptr = T.handle("float32", "global")
+ T.attr(ptr, "storage_alignment", 64)
+ with T.LetStmt(T.TVMBackendAllocWorkspace(2, 0, T.uint64(64), 2, 32),
var=ptr):
+ if T.isnullptr(ptr):
+ T.Call("int32", "tir.tvm_throw_last_error", [])
+ buf = T.decl_buffer((16,), data=ptr)
+ buf[0] = T.float32(0)
+ if T.TVMBackendFreeWorkspace(2, 0, ptr) != 0:
+ T.Call("int32", "tir.tvm_throw_last_error", [])
+
+ def test_compare(self, before, expected, transform):
+ after = transform(before)
+ tvm.ir.assert_structural_equal(after, expected, map_free_vars=True)
+
+
+class TestLowerCPUAllocation(tvm.testing.CompareBeforeAfter):
+ """CPU allocations can be handled at codegen time"""
+
+ transform = tvm.tir.transform.LowerTVMBuiltin()
+
+ def before():
+ T.func_attr({"target": T.target("llvm")})
+ T.attr("dummy", "device_type", 1) # kDLCPU
+ T.attr("dummy", "device_id", 0)
+ ptr = T.allocate([16], "float32")
+ buf = T.decl_buffer(16, "float32", data=ptr)
+ buf[0] = 0.0
+
+ def expected():
+ T.func_attr({"target": T.target("llvm")})
+ ptr = T.allocate([16], "float32")
+ buf = T.decl_buffer(16, "float32", data=ptr)
+ buf[0] = 0.0
+
+
+class TestLowerAllocateRequiresDeviceID(tvm.testing.CompareBeforeAfter):
+ transform = tvm.tir.transform.LowerTVMBuiltin()
+
+ def before():
+ T.func_attr({"target": T.target("llvm")})
+ T.attr("dummy", "device_id", 0)
+ ptr = T.allocate([16], "float32")
+ buf = T.decl_buffer(16, "float32", data=ptr)
+ buf[0] = 0.0
+
+ expected = tvm.TVMError
+
+
+class TestLowerAllocateRequiresDeviceType(tvm.testing.CompareBeforeAfter):
+ transform = tvm.tir.transform.LowerTVMBuiltin()
+
+ def before():
+ T.func_attr({"target": T.target("llvm")})
+ T.attr("dummy", "device_id", 0)
+ ptr = T.allocate([16], "float32")
+ buf = T.decl_buffer(16, "float32", data=ptr)
+ buf[0] = 0.0
+
+ expected = tvm.TVMError
+
+
if __name__ == "__main__":
- test_call_packed_return_non_i32()
- test_lower_packed_func()
+ tvm.testing.main()