This is an automated email from the ASF dual-hosted git repository. mehrdadh pushed a commit to branch micro/dense_dsp_smaller_config in repository https://gitbox.apache.org/repos/asf/tvm.git
commit 1915c412a831a3a1df4b3819df41e86502a533a3 Author: Mehrdad Hessar <[email protected]> AuthorDate: Thu Sep 15 15:28:03 2022 -0700 gemm use any combination --- python/tvm/topi/arm_cpu/mprofile/dsp/dense.py | 2 +- python/tvm/topi/arm_cpu/mprofile/dsp/micro_kernel/gemm.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/tvm/topi/arm_cpu/mprofile/dsp/dense.py b/python/tvm/topi/arm_cpu/mprofile/dsp/dense.py index 5630636c92..cecb461874 100644 --- a/python/tvm/topi/arm_cpu/mprofile/dsp/dense.py +++ b/python/tvm/topi/arm_cpu/mprofile/dsp/dense.py @@ -32,7 +32,7 @@ def dense_dsp_compute(cfg, data, weight, bias=None, out_dtype=None): M, K = get_const_tuple(data.shape) N, _ = get_const_tuple(weight.shape) - factor = 2 + factor = 16 # import pdb; pdb.set_trace() if M % factor == 0: cfg.define_split("tile_x", M, policy="factors", num_outputs=2, filter=lambda x: x.size[-1] % factor == 0) diff --git a/python/tvm/topi/arm_cpu/mprofile/dsp/micro_kernel/gemm.py b/python/tvm/topi/arm_cpu/mprofile/dsp/micro_kernel/gemm.py index ffc48eaabd..41f22516ba 100644 --- a/python/tvm/topi/arm_cpu/mprofile/dsp/micro_kernel/gemm.py +++ b/python/tvm/topi/arm_cpu/mprofile/dsp/micro_kernel/gemm.py @@ -207,7 +207,7 @@ __STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_body_{uniq_id}( int16_t bb_pad[{bb_pad_size}]; int32_t retcode = 0; - if ( {M} < 16 || {N} < 16 ) {{ + if ( {M} < 16 && {N} < 16 ) {{ retcode = gemm_{M}x{K}x{N}_body_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); goto out; }} @@ -313,7 +313,7 @@ __STATIC_FORCEINLINE int32_t gemm_{M}x{K}x{N}_update_{uniq_id}( int16_t bb_pad[{bb_pad_size}]; int32_t retcode = 0; - if ( {M} < 16 || {N} < 16 ) {{ + if ( {M} < 16 && {N} < 16 ) {{ retcode = gemm_{M}x{K}x{N}_update_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); goto out; }} @@ -393,7 +393,7 @@ __STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_body_{uniq_id}( int A_stride, int B_stride, int C_stride) {{ int32_t retcode = 0; - if ( {M} < 2 || {N} < 2 ) {{ + if ( {M} < 2 && {N} < 2 ) {{ retcode = gemm16_{M}x{K}x{N}_body_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); goto out; }} @@ -471,7 +471,7 @@ __STATIC_FORCEINLINE int32_t gemm16_{M}x{K}x{N}_update_{uniq_id}( int A_stride, int B_stride, int C_stride) {{ int32_t retcode = 0; - if ( {M} < 2 || {N} < 2 ) {{ + if ( {M} < 2 && {N} < 2 ) {{ retcode = gemm16_{M}x{K}x{N}_update_loop_{uniq_id}(aa, bb, cc, A_stride, B_stride, C_stride); goto out; }}
