This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 1f66f9041bc73b47f610afa844348e6c3654956f Author: Zhao Zhili <[email protected]> AuthorDate: Thu May 21 02:12:30 2026 +0800 Commit: Zhao Zhili <[email protected]> CommitDate: Wed Jun 3 09:36:59 2026 +0000 lavu/aarch64: split FMLA chain in scalarproduct_float Unroll to 16 floats per iteration with four independent accumulators and reduce them once after the loop. scalarproduct_float_neon: before after Apple M1 (clang 16): 0.9 (3.56x) 0.4 (9.18x) Cortex-A76 (gcc 12.4): 118.7 (4.43x) 85.3 (6.15x) Signed-off-by: Zhao Zhili <[email protected]> --- libavutil/aarch64/float_dsp_neon.S | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/libavutil/aarch64/float_dsp_neon.S b/libavutil/aarch64/float_dsp_neon.S index 35e2715b87..fee47cb474 100644 --- a/libavutil/aarch64/float_dsp_neon.S +++ b/libavutil/aarch64/float_dsp_neon.S @@ -190,13 +190,35 @@ function ff_butterflies_float_neon, export=1 endfunc function ff_scalarproduct_float_neon, export=1 - movi v2.4s, #0 -1: ld1 {v0.4s}, [x0], #16 - ld1 {v1.4s}, [x1], #16 - subs w2, w2, #4 - fmla v2.4s, v0.4s, v1.4s - b.gt 1b - faddp v0.4s, v2.4s, v2.4s - faddp s0, v0.2s + // 16 elements per iteration; w3 = len / 16. + lsr w3, w2, #4 + movi v2.4s, #0 + movi v3.4s, #0 + movi v4.4s, #0 + movi v5.4s, #0 + cbz w3, 2f +1: ld1 {v16.4s, v17.4s}, [x0], #32 + ld1 {v20.4s, v21.4s}, [x1], #32 + ld1 {v18.4s, v19.4s}, [x0], #32 + ld1 {v22.4s, v23.4s}, [x1], #32 + subs w3, w3, #1 + fmla v2.4s, v16.4s, v20.4s + fmla v3.4s, v17.4s, v21.4s + fmla v4.4s, v18.4s, v22.4s + fmla v5.4s, v19.4s, v23.4s + b.ne 1b +2: // len is a multiple of 4 + and w2, w2, #12 + cbz w2, 4f +3: ld1 {v0.4s}, [x0], #16 + ld1 {v1.4s}, [x1], #16 + subs w2, w2, #4 + fmla v2.4s, v0.4s, v1.4s + b.ne 3b +4: fadd v2.4s, v2.4s, v3.4s + fadd v4.4s, v4.4s, v5.4s + fadd v2.4s, v2.4s, v4.4s + faddp v0.4s, v2.4s, v2.4s + faddp s0, v0.2s ret endfunc _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
