[tvm] branch main updated: [Adreno] Small fixes in Adreno schedules (#15391)

syfeng Mon, 24 Jul 2023 21:37:31 -0700

This is an automated email from the ASF dual-hosted git repository.

syfeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/main by this push:
     new d6407bef5d [Adreno] Small fixes in Adreno schedules (#15391)
d6407bef5d is described below

commit d6407bef5d96d0b87fde91b7e6c67a3155354902
Author: Egor Churaev <[email protected]>
AuthorDate: Tue Jul 25 07:37:02 2023 +0300

    [Adreno] Small fixes in Adreno schedules (#15391)
    
    On several topologies, I faced compilation errors. This PR introduces
    small fixes for these errors.
---
 python/tvm/topi/adreno/reduction.py                    |  2 +-
 python/tvm/topi/adreno/utils.py                        |  6 +++---
 python/tvm/topi/testing/common.py                      |  1 +
 .../relay/opencl_texture/test_reduction_texture.py     | 18 ++++++++++++++++++
 tests/python/topi/python/test_topi_transform.py        |  2 +-
 5 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/python/tvm/topi/adreno/reduction.py 
b/python/tvm/topi/adreno/reduction.py
index 4ff519fd7c..a208e2e274 100644
--- a/python/tvm/topi/adreno/reduction.py
+++ b/python/tvm/topi/adreno/reduction.py
@@ -51,7 +51,7 @@ def _schedule_reduce_adreno(op, sch, is_idx_reduce=False):
         sch[temp_val_input].set_scope("local")
 
     shape = get_const_tuple(sch_output.shape)
-    latest4 = shape[-1] == 4
+    latest4 = len(shape) > 0 and shape[-1] == 4
     div4 = numpy.prod(shape) % 4 == 0
 
     # Fuse and split the axis
diff --git a/python/tvm/topi/adreno/utils.py b/python/tvm/topi/adreno/utils.py
index 2fb4cf18cc..698a306514 100644
--- a/python/tvm/topi/adreno/utils.py
+++ b/python/tvm/topi/adreno/utils.py
@@ -537,14 +537,14 @@ def bind_data_copy(stage, axis_to_vectorize=None):
             stage.vectorize(iax3)
             fused = stage.fuse(ax0, ax1, ax2, oax3)
 
-        ftc = numpy.prod(shape) / 4
+        ftc = numpy.prod(shape) // 4
         div = get_div(ftc, 128)
         block, thread = stage.split(fused, factor=div)
 
         stage.bind(block, te.thread_axis("blockIdx.z"))
         stage.bind(thread, te.thread_axis("threadIdx.z"))
     else:
-        if shape[-1] == 4:
+        if len(shape) > 0 and shape[-1] == 4:
             axes = stage.op.axis
             fused = stage.fuse(*axes[:-1])
             ftc = numpy.prod(shape[:-1])
@@ -557,7 +557,7 @@ def bind_data_copy(stage, axis_to_vectorize=None):
             ftc = numpy.prod(shape)
             vthread = get_div(ftc, 8)
             fused = stage.fuse(*stage.op.axis)
-            ftc = ftc / vthread
+            ftc = ftc // vthread
             # 1024 is a maximum work group size on the most Adreno GPU
             num_thread = get_div(ftc, 1024 // vthread)
             a, b = stage.split(fused, factor=num_thread)
diff --git a/python/tvm/topi/testing/common.py 
b/python/tvm/topi/testing/common.py
index d040310ccc..c84c5eaa1e 100644
--- a/python/tvm/topi/testing/common.py
+++ b/python/tvm/topi/testing/common.py
@@ -30,6 +30,7 @@ _injective_schedule = {
     "arm_cpu": topi.arm_cpu.schedule_injective,
     "gpu": topi.cuda.schedule_injective,
     "hls": topi.hls.schedule_injective,
+    "adreno": topi.adreno.schedule_injective,
 }
 
 _reduce_schedule = {
diff --git a/tests/python/relay/opencl_texture/test_reduction_texture.py 
b/tests/python/relay/opencl_texture/test_reduction_texture.py
index cc2dbff173..5728e6294f 100644
--- a/tests/python/relay/opencl_texture/test_reduction_texture.py
+++ b/tests/python/relay/opencl_texture/test_reduction_texture.py
@@ -177,5 +177,23 @@ def test_max_global_pooling_block4(remote, target, dtype):
     build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype}, 
target)
 
 
[email protected]_opencl
[email protected]_targets("opencl -device=adreno")
+def test_sum_cast(remote, target, dtype):
+    shape = (10,)
+    A = relay.var("A", shape=shape)
+    w = relay.op.sum(A)
+    w = relay.cast(w, "int32")
+    mod = relay.Function([A], w)
+
+    shape_dict = {
+        "A": shape,
+    }
+    dtype_dict = {
+        "A": dtype,
+    }
+    build_run_compare(remote, mod, {}, shape_dict, dtype_dict, target)
+
+
 if __name__ == "__main__":
     tvm.testing.main()
diff --git a/tests/python/topi/python/test_topi_transform.py 
b/tests/python/topi/python/test_topi_transform.py
index 0f64b486f3..bdd8907abf 100644
--- a/tests/python/topi/python/test_topi_transform.py
+++ b/tests/python/topi/python/test_topi_transform.py
@@ -453,7 +453,7 @@ def verify_dynamic_strided_slice(in_shape, begin, end, 
strides=None):
         foo(data_nd, begin_nd, end_nd, strides_nd, out_nd)
         tvm.testing.assert_allclose(out_nd.numpy(), out_npy)
 
-    for target in ["llvm", "opencl", "sdaccel", "aocl_sw_emu"]:
+    for target in ["llvm", "opencl", "sdaccel", "aocl_sw_emu", "opencl 
--device=adreno"]:
         check_device(target)

[tvm] branch main updated: [Adreno] Small fixes in Adreno schedules (#15391)

Reply via email to