This is an automated email from the ASF dual-hosted git repository.

lunderberg pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 0af9ff90e0 [TIR] Restrict tir.transform.LowerTVMBuiltin to host 
functions (#14944)
0af9ff90e0 is described below

commit 0af9ff90e0e68fa459d72f8c7afd290ee25a8a62
Author: Eric Lunderberg <[email protected]>
AuthorDate: Sat Jun 3 14:47:11 2023 -0500

    [TIR] Restrict tir.transform.LowerTVMBuiltin to host functions (#14944)
    
    * [Bugfix][TIR][VTA] Update host-side target, even without device func
    
    This resolves an issue introduced by the combination of
    https://github.com/apache/tvm/pull/14918 and
    https://github.com/apache/tvm/pull/14945.  The bug occurred for
    targets that do not require device-side codegen, but do require a
    `device_type` other than `kDLCPU`.  It wasn't caught by CI, as the
    issue only occurred with the combination of both PRs.
    
    1. #14918 updated `SplitHostDevice` to only modify the `"target"`
       attribute when a device-side function has been extracted.
    
    2. For VTA, there is no device-side function, as everything is done
       through host-side API calls.
    
    3. From (1) and (2), the VTA examples kept the target
       `T.target("ext_dev", host="llvm")` after the `SplitHostDevice`
       pass, instead of being updated to `T.target("llvm")`.
    
    4. #14945 restricted CombineContextCall to only apply to host-side
       passes.
    
    5. From (4) and (5), the `CombineContextCall` pass was no longer
       applied to the VTA context calls.
    
    This PR fixes `SplitHostDevice`, updating the target from
    `T.target("ext_dev", host="llvm")` to `T.target("llvm")`, even if no
    device sections have been extracted from the function.
    
    * [TIR] Restrict tir.transform.LowerTVMBuiltin to host functions
    
    Previously, the `tir.transform.LowerTVMBuiltin` pass applied to all
    functions in an `IRModule`, but was only applied to modules that
    contain only host functions.  This commit updates
    `tir.transform.LowerTVMBuiltin` to apply only to host functions.
    
    * Updated "stackvm" target to have "cpu" key.
    
    With the presence/absence of the "cpu" key in a target used to
    determine whether host-only calls should be run, should make sure to
    add it to "stackvm".
    
    * Update IsHostFunc() to use "host" tag instead of "cpu"
    
    Current CI failures due to LowerTVMBuiltin not running on "hexagon"
    target, and would like to avoid conflating cpu/host.
    
    * Avoid "host" tag for now
    
    * Update HEXAGON_AOT_LLVM_TARGET to be recognized as host
---
 python/tvm/contrib/hexagon/pytest_plugin.py        |  2 +-
 src/target/target_kind.cc                          |  5 +-
 src/tir/transforms/lower_tvm_builtin.cc            |  8 +-
 .../test_tir_transform_lower_tvm_builtin.py        | 97 +++++++++++++++++++++-
 4 files changed, 103 insertions(+), 9 deletions(-)

diff --git a/python/tvm/contrib/hexagon/pytest_plugin.py 
b/python/tvm/contrib/hexagon/pytest_plugin.py
index 91a01ac56d..303414e391 100644
--- a/python/tvm/contrib/hexagon/pytest_plugin.py
+++ b/python/tvm/contrib/hexagon/pytest_plugin.py
@@ -40,7 +40,7 @@ ADB_SERVER_SOCKET = "ADB_SERVER_SOCKET"
 RNG_SEEDED = False
 
 HEXAGON_AOT_LLVM_TARGET = (
-    "llvm -keys=hexagon "
+    "llvm -keys=hexagon,cpu "
     "-mattr=+hvxv68,+hvx-length128b,+hvx-qfloat,-hvx-ieee-fp "
     "-mcpu=hexagonv68 -mtriple=hexagon"
 )
diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc
index 3c4e885ef9..44dee859d0 100644
--- a/src/target/target_kind.cc
+++ b/src/target/target_kind.cc
@@ -422,9 +422,10 @@ TVM_REGISTER_TARGET_KIND("hexagon", kDLHexagon)
     .add_attr_option<Array<String>>("llvm-options")
     .add_attr_option<Integer>("num-cores")
     .add_attr_option<Integer>("vtcm-capacity")
-    .set_default_keys({"hexagon"});
+    .set_default_keys({"hexagon", "cpu"});
 
-TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU);
+TVM_REGISTER_TARGET_KIND("stackvm", kDLCPU)  // line break
+    .set_default_keys({"cpu"});
 
 TVM_REGISTER_TARGET_KIND("ext_dev", kDLExtDev);
 
diff --git a/src/tir/transforms/lower_tvm_builtin.cc 
b/src/tir/transforms/lower_tvm_builtin.cc
index ea418635bc..837a3e6d35 100644
--- a/src/tir/transforms/lower_tvm_builtin.cc
+++ b/src/tir/transforms/lower_tvm_builtin.cc
@@ -629,9 +629,11 @@ namespace transform {
 
 Pass LowerTVMBuiltin() {
   auto pass_func = [](PrimFunc f, IRModule m, PassContext ctx) {
-    auto* n = f.CopyOnWrite();
-    n->body = BuiltinLower().Build(n->body);
-    VLOG(2) << "LowerTVMBuiltin: " << f;
+    if (IsHostFunc(f).value_or(false)) {
+      auto global_symbol = f->GetAttr<String>(tvm::attr::kGlobalSymbol);
+      f.CopyOnWrite()->body = BuiltinLower().Build(f->body);
+      VLOG(2) << "LowerTVMBuiltin: " << f;
+    }
     return f;
   };
   return CreatePrimFuncPass(pass_func, 0, "tir.LowerTVMBuiltin", {});
diff --git a/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py 
b/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py
index d224a688d2..2e0784cc31 100644
--- a/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py
+++ b/tests/python/unittest/test_tir_transform_lower_tvm_builtin.py
@@ -56,7 +56,7 @@ def check_packed_func(target="llvm"):
     # Construct a valid IRModule to be lowered:
     mod = tvm.IRModule.from_expr(tvm.tir.PrimFunc([a_buffer, b_buffer, 
c_buffer], stmt))
 
-    target = tvm.target.Target(target)
+    target = tvm.target.Target(target, host="llvm")
     mod = tvm.tir.transform.Apply(lambda f: f.with_attr("target", target))(mod)
     mod = tvm.tir.transform.Apply(lambda f: f.with_attr("global_symbol", 
"main"))(mod)
     mod = tvm.tir.transform.MakePackedAPI()(mod)
@@ -189,6 +189,97 @@ def test_lower_overflow_int32():
     tvm.build(func, target="llvm")  # should not crash
 
 
+class TestLowerDeviceAllocate(tvm.testing.CompareBeforeAfter):
+    """Device allocations are lowered to TVMBackend* calls
+
+    This test validates the current behavior of LowerTVMBuiltin.  This
+    unit test may be improved in the future by addressing:
+
+    - The AttrStmt for "storage_alignment" occurs outside the LetStmt
+      that defines the pointer, which is currently required by
+      CodeGenLLVM.  This fails to match when `map_free_vars=False`
+      (default), because the first occurrence is undefined.
+
+    - The call to TVMBackendFreeWorkspace uses the allocated pointer,
+      but occurs outside the LetStmt.
+
+    - TVMScript always produces "handle" dtype for
+      `T.tvm_throw_last_error`, while LowerTVMBuiltin outputs "int32"
+      dtype.
+    """
+
+    transform = tvm.tir.transform.LowerTVMBuiltin()
+
+    def before():
+        T.func_attr({"target": T.target("llvm")})
+        T.attr("dummy", "device_type", 2)  # kDLCuda
+        T.attr("dummy", "device_id", 0)
+        ptr = T.allocate([16], "float32")
+        buf = T.decl_buffer(16, "float32", data=ptr)
+        buf[0] = 0.0
+
+    def expected():
+        T.func_attr({"target": T.target("llvm")})
+        ptr = T.handle("float32", "global")
+        T.attr(ptr, "storage_alignment", 64)
+        with T.LetStmt(T.TVMBackendAllocWorkspace(2, 0, T.uint64(64), 2, 32), 
var=ptr):
+            if T.isnullptr(ptr):
+                T.Call("int32", "tir.tvm_throw_last_error", [])
+            buf = T.decl_buffer((16,), data=ptr)
+            buf[0] = T.float32(0)
+        if T.TVMBackendFreeWorkspace(2, 0, ptr) != 0:
+            T.Call("int32", "tir.tvm_throw_last_error", [])
+
+    def test_compare(self, before, expected, transform):
+        after = transform(before)
+        tvm.ir.assert_structural_equal(after, expected, map_free_vars=True)
+
+
+class TestLowerCPUAllocation(tvm.testing.CompareBeforeAfter):
+    """CPU allocations can be handled at codegen time"""
+
+    transform = tvm.tir.transform.LowerTVMBuiltin()
+
+    def before():
+        T.func_attr({"target": T.target("llvm")})
+        T.attr("dummy", "device_type", 1)  # kDLCPU
+        T.attr("dummy", "device_id", 0)
+        ptr = T.allocate([16], "float32")
+        buf = T.decl_buffer(16, "float32", data=ptr)
+        buf[0] = 0.0
+
+    def expected():
+        T.func_attr({"target": T.target("llvm")})
+        ptr = T.allocate([16], "float32")
+        buf = T.decl_buffer(16, "float32", data=ptr)
+        buf[0] = 0.0
+
+
+class TestLowerAllocateRequiresDeviceID(tvm.testing.CompareBeforeAfter):
+    transform = tvm.tir.transform.LowerTVMBuiltin()
+
+    def before():
+        T.func_attr({"target": T.target("llvm")})
+        T.attr("dummy", "device_id", 0)
+        ptr = T.allocate([16], "float32")
+        buf = T.decl_buffer(16, "float32", data=ptr)
+        buf[0] = 0.0
+
+    expected = tvm.TVMError
+
+
+class TestLowerAllocateRequiresDeviceType(tvm.testing.CompareBeforeAfter):
+    transform = tvm.tir.transform.LowerTVMBuiltin()
+
+    def before():
+        T.func_attr({"target": T.target("llvm")})
+        T.attr("dummy", "device_id", 0)
+        ptr = T.allocate([16], "float32")
+        buf = T.decl_buffer(16, "float32", data=ptr)
+        buf[0] = 0.0
+
+    expected = tvm.TVMError
+
+
 if __name__ == "__main__":
-    test_call_packed_return_non_i32()
-    test_lower_packed_func()
+    tvm.testing.main()

Reply via email to