This is an automated email from the ASF dual-hosted git repository.

masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 738c2e9e90 [VM][Adreno] Fix using buffers for weights in VM (#15671)
738c2e9e90 is described below

commit 738c2e9e90a6daacc7e581aa1745369ddbdf93f9
Author: Egor Churaev <egor.chur...@gmail.com>
AuthorDate: Wed Sep 6 21:46:45 2023 +0300

    [VM][Adreno] Fix using buffers for weights in VM (#15671)
    
    * [VM][Adreno] Fix using buffers for weights in VM
    
    In VM `fn->attrs` doesn't contain information about `kernel_layout`. So
    we can get this value from `expr_attrib`. In this PR function
    `CanUseBuffers` was modified to work with VM.
    
    A new test which checks memory scope for VM was added.
    
    * Fix ci
---
 src/relay/transforms/annotate_texture_storage.cc   |  8 ++-
 .../opencl_texture/test_conv2d_nchw_texture.py     | 77 ++++++++++++++++++----
 .../relay/opencl_texture/utils/adreno_utils.py     | 18 ++---
 3 files changed, 76 insertions(+), 27 deletions(-)

diff --git a/src/relay/transforms/annotate_texture_storage.cc 
b/src/relay/transforms/annotate_texture_storage.cc
index 4921cef4c8..01d47b6953 100644
--- a/src/relay/transforms/annotate_texture_storage.cc
+++ b/src/relay/transforms/annotate_texture_storage.cc
@@ -174,8 +174,11 @@ class StorageInfo : private 
transform::DeviceAwareExprVisitor {
             for (const auto& ttype : 
FlattenTupleType(fn->params[i]->checked_type())) {
               std::string scope = Scope(ttype->shape, 
GetVirtualDevice(GetRef<Expr>(call)));
               if (expr_attrib.as<Conv2DAttrs>() || 
expr_attrib.as<Conv2DWinogradAttrs>()) {
+                String kernel_layout = expr_attrib.as<Conv2DAttrs>()
+                                           ? 
expr_attrib.as<Conv2DAttrs>()->kernel_layout
+                                           : 
expr_attrib.as<Conv2DWinogradAttrs>()->kernel_layout;
                 if ((i == weights_pos) && !ttype->dtype.is_float16() &&
-                    CanUseBuffers(call->args[i], ttype->shape, fn->attrs)) {
+                    CanUseBuffers(call->args[i], ttype->shape, kernel_layout)) 
{
                   buffers_params.insert(fn->params[i]);
                   buffers_args.insert(call->args[i]);
                   scope = "global";
@@ -426,10 +429,9 @@ class StorageInfo : private 
transform::DeviceAwareExprVisitor {
   }
 
   bool CanUseBuffers(const Expr param, const Array<PrimExpr> shape,
-                     const tvm::DictAttrs param_attrs) const {
+                     const String kernel_layout) const {
     bool use_buffer = false;
     if (param.as<ConstantNode>() && shape.size() == 5) {
-      auto kernel_layout = param_attrs.GetAttr<String>("kernel_layout");
       if (kernel_layout == "HWOI4o" || kernel_layout == "HWIO4o") {
         int a0 = shape[0].as<IntImmNode>()->value;
         int a1 = shape[1].as<IntImmNode>()->value;
diff --git a/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py 
b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py
index 3c9c3f2caf..1dd5ca2abd 100644
--- a/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py
+++ b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py
@@ -692,7 +692,6 @@ def test_residual_block(remote, target, executor_type, 
dtype):
             {"data": input_shape},
             {"data": dtype},
             target,
-            static_memory_scope,
         )
 
 
@@ -790,11 +789,12 @@ def test_concat(remote, target, executor_type, dtype):
 
     static_memory_scope = [
         "",
+        "global.texture",
         "global",
         "global.texture-weight",
-        "global.texture-weight",
         "global",
-        "global.texture-weight",
+        "global.texture-nhwc",
+        "global",
         "global.texture-weight",
         "",
         "",
@@ -803,8 +803,6 @@ def test_concat(remote, target, executor_type, dtype):
         "",
     ]
 
-    static_memory_scope = []
-
     if executor_type == "ge":
         build_run_compare(
             remote,
@@ -823,7 +821,6 @@ def test_concat(remote, target, executor_type, dtype):
             {"data": input_shape},
             {"data": dtype},
             target,
-            static_memory_scope,
         )
 
 
@@ -968,7 +965,6 @@ def test_pooling_branching_texture_params(remote, target, 
executor_type, dtype):
             {"data": input_shape},
             {"data": dtype},
             target,
-            static_memory_scope,
         )
 
 
@@ -1111,7 +1107,6 @@ def test_branching_texture_params(remote, target, 
executor_type, dtype):
             {"data": input_shape},
             {"data": dtype},
             target,
-            static_memory_scope,
         )
 
 
@@ -1212,7 +1207,6 @@ def test_conv2d_different_lowering_same_op(remote, 
target, executor_type, dtype)
             {"data": input_shape},
             {"data": dtype},
             target,
-            static_memory_scope,
         )
 
 
@@ -1380,7 +1374,6 @@ def test_injective_nwo_inputs1(remote, target, 
executor_type, dtype):
             {"data": input_shape},
             {"data": dtype},
             target,
-            static_memory_scope,
         )
 
 
@@ -1495,7 +1488,6 @@ def test_injective_nwo_inputs2(remote, target, 
executor_type, dtype):
             {"data": input_shape},
             {"data": dtype},
             target,
-            static_memory_scope,
         )
 
 
@@ -1534,5 +1526,68 @@ def test_conv2d_to_3_channels(remote, target, 
executor_type, dtype):
         )
 
 
+@tvm.testing.requires_opencl
+@tvm.testing.parametrize_targets("opencl -device=adreno")
+def test_conv2d_weight_on_buffers(remote, target, executor_type, dtype):
+    target = "opencl -device=adreno"
+    input_shape = (1, 64, 75, 75)
+    filter_shape = (64, 64, 3, 3)
+    bias_shape = (64,)
+    A = relay.var("data", shape=input_shape, dtype=dtype)
+    W = relay.var("weight", shape=filter_shape, dtype=dtype)
+    BS = relay.var("bias", shape=bias_shape, dtype=dtype)
+    conv = relay.nn.conv2d(A, W, padding=[1, 1, 1, 1], channels=64, 
kernel_size=(3, 3))
+    conv = relay.nn.bias_add(conv, BS)
+    conv = relay.op.nn.relu(conv)
+
+    mod = relay.Function([A, W, BS], conv)
+    np.random.seed(0)
+    initializer = relay.testing.init.Xavier()
+    filter_data = np.zeros(filter_shape).astype(dtype)
+    bias_data = np.zeros(bias_shape).astype(dtype)
+    initializer("weight", filter_data)
+    initializer("bias", bias_data)
+    params1 = {
+        "weight": tvm.nd.array(filter_data),
+        "bias": tvm.nd.array(bias_data),
+    }
+
+    if executor_type == "ge":
+        static_memory_scope = [
+            "",
+            "global.texture",
+            "global",
+            "global.texture-weight",
+            "",
+            "",
+        ]
+        build_run_compare(
+            remote,
+            mod,
+            params1,
+            {"data": input_shape},
+            {"data": dtype},
+            target,
+            static_memory_scope,
+        )
+    else:
+        static_memory_scope = """
+        VM VirtualDevice[0]: device type 1, id 0 and mem_scope
+        VM VirtualDevice[1]: device type 4, id 0 and mem_scope
+        VM VirtualDevice[2]: device type 4, id 0 and mem_scope global.texture
+        VM VirtualDevice[3]: device type 4, id 0 and mem_scope global
+        VM VirtualDevice[4]: device type 4, id 0 and mem_scope 
global.texture-weight
+        """
+        build_run_compare_vm(
+            remote,
+            mod,
+            params1,
+            {"data": input_shape},
+            {"data": dtype},
+            target,
+            static_memory_scope,
+        )
+
+
 if __name__ == "__main__":
     tvm.testing.main()
diff --git a/tests/python/relay/opencl_texture/utils/adreno_utils.py 
b/tests/python/relay/opencl_texture/utils/adreno_utils.py
index 309243df16..d9e52f8847 100644
--- a/tests/python/relay/opencl_texture/utils/adreno_utils.py
+++ b/tests/python/relay/opencl_texture/utils/adreno_utils.py
@@ -161,19 +161,11 @@ def build_run_compare_vm(
                 tvm_mod_nchwc, target=target, target_host=target_host, 
params=params1
             )
 
-    # TODO(echuraev): enable scope checking
-    ## verification that storage_scope has expected textures scopes
-    # graph_json = json.loads(graph)
-    # if "storage_scope" in graph_json["attrs"]:
-    #    assert (
-    #        len(static_mem_scopes) == 
len(graph_json["attrs"]["storage_scope"][1])
-    #        or len(static_mem_scopes) == 0
-    #    )
-    # else:
-    #    assert len(static_mem_scopes) == 0
-
-    # for i in range(0, len(static_mem_scopes)):
-    #    assert static_mem_scopes[i] == 
graph_json["attrs"]["storage_scope"][1][i]
+    if len(static_mem_scopes) > 0:
+        mem_scopes_lines = static_mem_scopes.strip().split("\n")
+        vm_lines = vmc._get_virtual_devices().strip().split("\n")
+        for i in range(0, len(mem_scopes_lines)):
+            assert mem_scopes_lines[i].strip() == vm_lines[i].strip()
 
     if remote is None:
         dev = tvm.opencl()

Reply via email to