This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 1e86a92a1cd752f64b47a8a09a44626320cd4e27 Author: Zhao Zhili <[email protected]> AuthorDate: Thu May 21 15:01:50 2026 +0800 Commit: Zhao Zhili <[email protected]> CommitDate: Wed Jun 3 09:36:59 2026 +0000 lavu/aarch64: unroll butterflies_float to 8 floats/iter butterflies_float_neon: before after Cortex-A76 (gcc 12.4): 163.1 (3.95x) 147.0 (4.37x) Apple M1 (clang 16): 0.7 (0.85x) 0.6 (0.99x) Signed-off-by: Zhao Zhili <[email protected]> --- libavutil/aarch64/float_dsp_neon.S | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/libavutil/aarch64/float_dsp_neon.S b/libavutil/aarch64/float_dsp_neon.S index fee47cb474..aa6b5189f5 100644 --- a/libavutil/aarch64/float_dsp_neon.S +++ b/libavutil/aarch64/float_dsp_neon.S @@ -178,15 +178,26 @@ function ff_vector_fmul_reverse_neon, export=1 endfunc function ff_butterflies_float_neon, export=1 -1: ld1 {v0.4s}, [x0] + subs w2, w2, #8 + b.lt 2f +1: ldp q0, q1, [x0] + ldp q2, q3, [x1] + subs w2, w2, #8 + fadd v4.4s, v0.4s, v2.4s + fadd v5.4s, v1.4s, v3.4s + fsub v0.4s, v0.4s, v2.4s + fsub v1.4s, v1.4s, v3.4s + st1 {v4.4s, v5.4s}, [x0], #32 + st1 {v0.4s, v1.4s}, [x1], #32 + b.ge 1b +2: tbz w2, #2, 3f + ld1 {v0.4s}, [x0] ld1 {v1.4s}, [x1] - subs w2, w2, #4 fsub v2.4s, v0.4s, v1.4s fadd v3.4s, v0.4s, v1.4s - st1 {v2.4s}, [x1], #16 - st1 {v3.4s}, [x0], #16 - b.gt 1b - ret + st1 {v2.4s}, [x1] + st1 {v3.4s}, [x0] +3: ret endfunc function ff_scalarproduct_float_neon, export=1 _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
