This is an automated email from the ASF dual-hosted git repository.
syfeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new d6407bef5d [Adreno] Small fixes in Adreno schedules (#15391)
d6407bef5d is described below
commit d6407bef5d96d0b87fde91b7e6c67a3155354902
Author: Egor Churaev <[email protected]>
AuthorDate: Tue Jul 25 07:37:02 2023 +0300
[Adreno] Small fixes in Adreno schedules (#15391)
On several topologies, I faced compilation errors. This PR introduces
small fixes for these errors.
---
python/tvm/topi/adreno/reduction.py | 2 +-
python/tvm/topi/adreno/utils.py | 6 +++---
python/tvm/topi/testing/common.py | 1 +
.../relay/opencl_texture/test_reduction_texture.py | 18 ++++++++++++++++++
tests/python/topi/python/test_topi_transform.py | 2 +-
5 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/python/tvm/topi/adreno/reduction.py
b/python/tvm/topi/adreno/reduction.py
index 4ff519fd7c..a208e2e274 100644
--- a/python/tvm/topi/adreno/reduction.py
+++ b/python/tvm/topi/adreno/reduction.py
@@ -51,7 +51,7 @@ def _schedule_reduce_adreno(op, sch, is_idx_reduce=False):
sch[temp_val_input].set_scope("local")
shape = get_const_tuple(sch_output.shape)
- latest4 = shape[-1] == 4
+ latest4 = len(shape) > 0 and shape[-1] == 4
div4 = numpy.prod(shape) % 4 == 0
# Fuse and split the axis
diff --git a/python/tvm/topi/adreno/utils.py b/python/tvm/topi/adreno/utils.py
index 2fb4cf18cc..698a306514 100644
--- a/python/tvm/topi/adreno/utils.py
+++ b/python/tvm/topi/adreno/utils.py
@@ -537,14 +537,14 @@ def bind_data_copy(stage, axis_to_vectorize=None):
stage.vectorize(iax3)
fused = stage.fuse(ax0, ax1, ax2, oax3)
- ftc = numpy.prod(shape) / 4
+ ftc = numpy.prod(shape) // 4
div = get_div(ftc, 128)
block, thread = stage.split(fused, factor=div)
stage.bind(block, te.thread_axis("blockIdx.z"))
stage.bind(thread, te.thread_axis("threadIdx.z"))
else:
- if shape[-1] == 4:
+ if len(shape) > 0 and shape[-1] == 4:
axes = stage.op.axis
fused = stage.fuse(*axes[:-1])
ftc = numpy.prod(shape[:-1])
@@ -557,7 +557,7 @@ def bind_data_copy(stage, axis_to_vectorize=None):
ftc = numpy.prod(shape)
vthread = get_div(ftc, 8)
fused = stage.fuse(*stage.op.axis)
- ftc = ftc / vthread
+ ftc = ftc // vthread
# 1024 is a maximum work group size on the most Adreno GPU
num_thread = get_div(ftc, 1024 // vthread)
a, b = stage.split(fused, factor=num_thread)
diff --git a/python/tvm/topi/testing/common.py
b/python/tvm/topi/testing/common.py
index d040310ccc..c84c5eaa1e 100644
--- a/python/tvm/topi/testing/common.py
+++ b/python/tvm/topi/testing/common.py
@@ -30,6 +30,7 @@ _injective_schedule = {
"arm_cpu": topi.arm_cpu.schedule_injective,
"gpu": topi.cuda.schedule_injective,
"hls": topi.hls.schedule_injective,
+ "adreno": topi.adreno.schedule_injective,
}
_reduce_schedule = {
diff --git a/tests/python/relay/opencl_texture/test_reduction_texture.py
b/tests/python/relay/opencl_texture/test_reduction_texture.py
index cc2dbff173..5728e6294f 100644
--- a/tests/python/relay/opencl_texture/test_reduction_texture.py
+++ b/tests/python/relay/opencl_texture/test_reduction_texture.py
@@ -177,5 +177,23 @@ def test_max_global_pooling_block4(remote, target, dtype):
build_run_compare(remote, mod, {}, {"data": input_shape}, {"data": dtype},
target)
[email protected]_opencl
[email protected]_targets("opencl -device=adreno")
+def test_sum_cast(remote, target, dtype):
+ shape = (10,)
+ A = relay.var("A", shape=shape)
+ w = relay.op.sum(A)
+ w = relay.cast(w, "int32")
+ mod = relay.Function([A], w)
+
+ shape_dict = {
+ "A": shape,
+ }
+ dtype_dict = {
+ "A": dtype,
+ }
+ build_run_compare(remote, mod, {}, shape_dict, dtype_dict, target)
+
+
if __name__ == "__main__":
tvm.testing.main()
diff --git a/tests/python/topi/python/test_topi_transform.py
b/tests/python/topi/python/test_topi_transform.py
index 0f64b486f3..bdd8907abf 100644
--- a/tests/python/topi/python/test_topi_transform.py
+++ b/tests/python/topi/python/test_topi_transform.py
@@ -453,7 +453,7 @@ def verify_dynamic_strided_slice(in_shape, begin, end,
strides=None):
foo(data_nd, begin_nd, end_nd, strides_nd, out_nd)
tvm.testing.assert_allclose(out_nd.numpy(), out_npy)
- for target in ["llvm", "opencl", "sdaccel", "aocl_sw_emu"]:
+ for target in ["llvm", "opencl", "sdaccel", "aocl_sw_emu", "opencl
--device=adreno"]:
check_device(target)