This is an automated email from the git hooks/post-receive script. Git pushed a commit to branch master in repository ffmpeg.
commit 32df0352b791cc6d7a05ecd829940872e6fd14b8 Author: Jun Zhao <[email protected]> AuthorDate: Thu Mar 5 22:34:46 2026 +0800 Commit: Martin Storsjö <[email protected]> CommitDate: Fri Mar 13 21:43:37 2026 +0000 lavc/hevc: move subs earlier in qpel uni-weighted NEON loops Move the subs instruction before the store macro in the 8x-unrolled loops of qpel_uni_w_v4/v8/v16/v64 and qpel_uni_w_hv4/hv8/hv16, so that many NEON instructions from the store macro separate it from the conditional branch. This gives the CPU pipeline time to resolve the condition flags before the branch decision. Signed-off-by: Jun Zhao <[email protected]> --- libavcodec/aarch64/h26x/qpel_neon.S | 112 ++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/libavcodec/aarch64/h26x/qpel_neon.S b/libavcodec/aarch64/h26x/qpel_neon.S index 423db38491..884aa1dcab 100644 --- a/libavcodec/aarch64/h26x/qpel_neon.S +++ b/libavcodec/aarch64/h26x/qpel_neon.S @@ -2178,53 +2178,53 @@ function ff_hevc_put_hevc_qpel_uni_w_v4_8_neon, export=1 1: ldr s23, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v24, v16, v17, v18, v19, v20, v21, v22, v23 - QPEL_UNI_W_V_4 subs w4, w4, #1 + QPEL_UNI_W_V_4 b.eq 2f ldr s16, [x2] QPEL_FILTER_B v24, v17, v18, v19, v20, v21, v22, v23, v16 - QPEL_UNI_W_V_4 subs w4, w4, #1 + QPEL_UNI_W_V_4 b.eq 2f ldr s17, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v24, v18, v19, v20, v21, v22, v23, v16, v17 - QPEL_UNI_W_V_4 subs w4, w4, #1 + QPEL_UNI_W_V_4 b.eq 2f ldr s18, [x2] QPEL_FILTER_B v24, v19, v20, v21, v22, v23, v16, v17, v18 - QPEL_UNI_W_V_4 subs w4, w4, #1 + QPEL_UNI_W_V_4 b.eq 2f ldr s19, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v24, v20, v21, v22, v23, v16, v17, v18, v19 - QPEL_UNI_W_V_4 subs w4, w4, #1 + QPEL_UNI_W_V_4 b.eq 2f ldr s20, [x2] QPEL_FILTER_B v24, v21, v22, v23, v16, v17, v18, v19, v20 - QPEL_UNI_W_V_4 subs w4, w4, #1 + QPEL_UNI_W_V_4 b.eq 2f ldr s21, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v24, v22, v23, v16, v17, v18, v19, v20, v21 - QPEL_UNI_W_V_4 subs w4, w4, #1 + QPEL_UNI_W_V_4 b.eq 2f ldr s22, [x2] QPEL_FILTER_B v24, v23, v16, v17, v18, v19, v20, v21, v22 - QPEL_UNI_W_V_4 subs w4, w4, #1 + QPEL_UNI_W_V_4 b.ne 1b 2: ret @@ -2259,53 +2259,53 @@ function ff_hevc_put_hevc_qpel_uni_w_v8_8_neon, export=1 1: ldr d23, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v26, v16, v17, v18, v19, v20, v21, v22, v23 - QPEL_UNI_W_V_8 subs w4, w4, #1 + QPEL_UNI_W_V_8 b.eq 2f ldr d16, [x2] QPEL_FILTER_B v26, v17, v18, v19, v20, v21, v22, v23, v16 - QPEL_UNI_W_V_8 subs w4, w4, #1 + QPEL_UNI_W_V_8 b.eq 2f ldr d17, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v26, v18, v19, v20, v21, v22, v23, v16, v17 - QPEL_UNI_W_V_8 subs w4, w4, #1 + QPEL_UNI_W_V_8 b.eq 2f ldr d18, [x2] QPEL_FILTER_B v26, v19, v20, v21, v22, v23, v16, v17, v18 - QPEL_UNI_W_V_8 subs w4, w4, #1 + QPEL_UNI_W_V_8 b.eq 2f ldr d19, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v26, v20, v21, v22, v23, v16, v17, v18, v19 - QPEL_UNI_W_V_8 subs w4, w4, #1 + QPEL_UNI_W_V_8 b.eq 2f ldr d20, [x2] QPEL_FILTER_B v26, v21, v22, v23, v16, v17, v18, v19, v20 - QPEL_UNI_W_V_8 subs w4, w4, #1 + QPEL_UNI_W_V_8 b.eq 2f ldr d21, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v26, v22, v23, v16, v17, v18, v19, v20, v21 - QPEL_UNI_W_V_8 subs w4, w4, #1 + QPEL_UNI_W_V_8 b.eq 2f ldr d22, [x2] QPEL_FILTER_B v26, v23, v16, v17, v18, v19, v20, v21, v22 - QPEL_UNI_W_V_8 subs w4, w4, #1 + QPEL_UNI_W_V_8 b.ne 1b 2: ret @@ -2350,60 +2350,60 @@ function ff_hevc_put_hevc_qpel_uni_w_v16_8_neon, export=1 add x2, x2, x3, lsl #1 QPEL_FILTER_B v26, v16, v17, v18, v19, v20, v21, v22, v23 QPEL_FILTER_B2 v27, v16, v17, v18, v19, v20, v21, v22, v23 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q16, [x2] QPEL_FILTER_B v26, v17, v18, v19, v20, v21, v22, v23, v16 QPEL_FILTER_B2 v27, v17, v18, v19, v20, v21, v22, v23, v16 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q17, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v26, v18, v19, v20, v21, v22, v23, v16, v17 QPEL_FILTER_B2 v27, v18, v19, v20, v21, v22, v23, v16, v17 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q18, [x2] QPEL_FILTER_B v26, v19, v20, v21, v22, v23, v16, v17, v18 QPEL_FILTER_B2 v27, v19, v20, v21, v22, v23, v16, v17, v18 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q19, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v26, v20, v21, v22, v23, v16, v17, v18, v19 QPEL_FILTER_B2 v27, v20, v21, v22, v23, v16, v17, v18, v19 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q20, [x2] QPEL_FILTER_B v26, v21, v22, v23, v16, v17, v18, v19, v20 QPEL_FILTER_B2 v27, v21, v22, v23, v16, v17, v18, v19, v20 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q21, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v26, v22, v23, v16, v17, v18, v19, v20, v21 QPEL_FILTER_B2 v27, v22, v23, v16, v17, v18, v19, v20, v21 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q22, [x2] QPEL_FILTER_B v26, v23, v16, v17, v18, v19, v20, v21, v22 QPEL_FILTER_B2 v27, v23, v16, v17, v18, v19, v20, v21, v22 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.ne 1b 2: ret @@ -2433,60 +2433,60 @@ function ff_hevc_put_hevc_qpel_uni_w_v64_8_neon, export=1 add x2, x2, x3, lsl #1 QPEL_FILTER_B v26, v16, v17, v18, v19, v20, v21, v22, v23 QPEL_FILTER_B2 v27, v16, v17, v18, v19, v20, v21, v22, v23 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q16, [x2] QPEL_FILTER_B v26, v17, v18, v19, v20, v21, v22, v23, v16 QPEL_FILTER_B2 v27, v17, v18, v19, v20, v21, v22, v23, v16 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q17, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v26, v18, v19, v20, v21, v22, v23, v16, v17 QPEL_FILTER_B2 v27, v18, v19, v20, v21, v22, v23, v16, v17 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q18, [x2] QPEL_FILTER_B v26, v19, v20, v21, v22, v23, v16, v17, v18 QPEL_FILTER_B2 v27, v19, v20, v21, v22, v23, v16, v17, v18 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q19, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v26, v20, v21, v22, v23, v16, v17, v18, v19 QPEL_FILTER_B2 v27, v20, v21, v22, v23, v16, v17, v18, v19 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q20, [x2] QPEL_FILTER_B v26, v21, v22, v23, v16, v17, v18, v19, v20 QPEL_FILTER_B2 v27, v21, v22, v23, v16, v17, v18, v19, v20 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q21, [x2, x3] add x2, x2, x3, lsl #1 QPEL_FILTER_B v26, v22, v23, v16, v17, v18, v19, v20, v21 QPEL_FILTER_B2 v27, v22, v23, v16, v17, v18, v19, v20, v21 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.eq 2f ldr q22, [x2] QPEL_FILTER_B v26, v23, v16, v17, v18, v19, v20, v21, v22 QPEL_FILTER_B2 v27, v23, v16, v17, v18, v19, v20, v21, v22 - QPEL_UNI_W_V_16 subs w4, w4, #1 + QPEL_UNI_W_V_16 b.ne 1b 2: subs w13, w13, #16 @@ -4818,57 +4818,57 @@ function hevc_put_hevc_qpel_uni_w_hv4_8_end_neon ldr d23, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v16, v17, v18, v19, v20, v21, v22, v23 - QPEL_UNI_W_HV_4 subs w22, w22, #1 + QPEL_UNI_W_HV_4 b.eq 2f ldr d16, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v17, v18, v19, v20, v21, v22, v23, v16 - QPEL_UNI_W_HV_4 subs w22, w22, #1 + QPEL_UNI_W_HV_4 b.eq 2f ldr d17, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v18, v19, v20, v21, v22, v23, v16, v17 - QPEL_UNI_W_HV_4 subs w22, w22, #1 + QPEL_UNI_W_HV_4 b.eq 2f ldr d18, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v19, v20, v21, v22, v23, v16, v17, v18 - QPEL_UNI_W_HV_4 subs w22, w22, #1 + QPEL_UNI_W_HV_4 b.eq 2f ldr d19, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v20, v21, v22, v23, v16, v17, v18, v19 - QPEL_UNI_W_HV_4 subs w22, w22, #1 + QPEL_UNI_W_HV_4 b.eq 2f ldr d20, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v21, v22, v23, v16, v17, v18, v19, v20 - QPEL_UNI_W_HV_4 subs w22, w22, #1 + QPEL_UNI_W_HV_4 b.eq 2f ldr d21, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v22, v23, v16, v17, v18, v19, v20, v21 - QPEL_UNI_W_HV_4 subs w22, w22, #1 + QPEL_UNI_W_HV_4 b.eq 2f ldr d22, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v23, v16, v17, v18, v19, v20, v21, v22 - QPEL_UNI_W_HV_4 subs w22, w22, #1 + QPEL_UNI_W_HV_4 b.hi 1b 2: @@ -4908,64 +4908,64 @@ function hevc_put_hevc_qpel_uni_w_hv8_8_end_neon add sp, sp, x10 QPEL_FILTER_H v26, v16, v17, v18, v19, v20, v21, v22, v23 QPEL_FILTER_H2 v27, v16, v17, v18, v19, v20, v21, v22, v23 - QPEL_UNI_W_HV_8 subs w22, w22, #1 + QPEL_UNI_W_HV_8 b.eq 2f ldr q16, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v17, v18, v19, v20, v21, v22, v23, v16 QPEL_FILTER_H2 v27, v17, v18, v19, v20, v21, v22, v23, v16 - QPEL_UNI_W_HV_8 subs w22, w22, #1 + QPEL_UNI_W_HV_8 b.eq 2f ldr q17, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v18, v19, v20, v21, v22, v23, v16, v17 QPEL_FILTER_H2 v27, v18, v19, v20, v21, v22, v23, v16, v17 - QPEL_UNI_W_HV_8 subs w22, w22, #1 + QPEL_UNI_W_HV_8 b.eq 2f ldr q18, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v19, v20, v21, v22, v23, v16, v17, v18 QPEL_FILTER_H2 v27, v19, v20, v21, v22, v23, v16, v17, v18 - QPEL_UNI_W_HV_8 subs w22, w22, #1 + QPEL_UNI_W_HV_8 b.eq 2f ldr q19, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v20, v21, v22, v23, v16, v17, v18, v19 QPEL_FILTER_H2 v27, v20, v21, v22, v23, v16, v17, v18, v19 - QPEL_UNI_W_HV_8 subs w22, w22, #1 + QPEL_UNI_W_HV_8 b.eq 2f ldr q20, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v21, v22, v23, v16, v17, v18, v19, v20 QPEL_FILTER_H2 v27, v21, v22, v23, v16, v17, v18, v19, v20 - QPEL_UNI_W_HV_8 subs w22, w22, #1 + QPEL_UNI_W_HV_8 b.eq 2f ldr q21, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v22, v23, v16, v17, v18, v19, v20, v21 QPEL_FILTER_H2 v27, v22, v23, v16, v17, v18, v19, v20, v21 - QPEL_UNI_W_HV_8 subs w22, w22, #1 + QPEL_UNI_W_HV_8 b.eq 2f ldr q22, [sp] add sp, sp, x10 QPEL_FILTER_H v26, v23, v16, v17, v18, v19, v20, v21, v22 QPEL_FILTER_H2 v27, v23, v16, v17, v18, v19, v20, v21, v22 - QPEL_UNI_W_HV_8 subs w22, w22, #1 + QPEL_UNI_W_HV_8 b.hi 1b 2: @@ -5027,8 +5027,8 @@ function hevc_put_hevc_qpel_uni_w_hv16_8_end_neon QPEL_FILTER_H2 v25, v16, v17, v18, v19, v20, v21, v22, v23 QPEL_FILTER_H v26, v1, v2, v3, v4, v5, v6, v7, v31 QPEL_FILTER_H2 v27, v1, v2, v3, v4, v5, v6, v7, v31 - QPEL_UNI_W_HV_16 subs w22, w22, #1 + QPEL_UNI_W_HV_16 b.eq 2f ldp q16, q1, [x11] @@ -5037,8 +5037,8 @@ function hevc_put_hevc_qpel_uni_w_hv16_8_end_neon QPEL_FILTER_H2 v25, v17, v18, v19, v20, v21, v22, v23, v16 QPEL_FILTER_H v26, v2, v3, v4, v5, v6, v7, v31, v1 QPEL_FILTER_H2 v27, v2, v3, v4, v5, v6, v7, v31, v1 - QPEL_UNI_W_HV_16 subs w22, w22, #1 + QPEL_UNI_W_HV_16 b.eq 2f ldp q17, q2, [x11] @@ -5047,8 +5047,8 @@ function hevc_put_hevc_qpel_uni_w_hv16_8_end_neon QPEL_FILTER_H2 v25, v18, v19, v20, v21, v22, v23, v16, v17 QPEL_FILTER_H v26, v3, v4, v5, v6, v7, v31, v1, v2 QPEL_FILTER_H2 v27, v3, v4, v5, v6, v7, v31, v1, v2 - QPEL_UNI_W_HV_16 subs w22, w22, #1 + QPEL_UNI_W_HV_16 b.eq 2f ldp q18, q3, [x11] @@ -5057,8 +5057,8 @@ function hevc_put_hevc_qpel_uni_w_hv16_8_end_neon QPEL_FILTER_H2 v25, v19, v20, v21, v22, v23, v16, v17, v18 QPEL_FILTER_H v26, v4, v5, v6, v7, v31, v1, v2, v3 QPEL_FILTER_H2 v27, v4, v5, v6, v7, v31, v1, v2, v3 - QPEL_UNI_W_HV_16 subs w22, w22, #1 + QPEL_UNI_W_HV_16 b.eq 2f ldp q19, q4, [x11] @@ -5067,8 +5067,8 @@ function hevc_put_hevc_qpel_uni_w_hv16_8_end_neon QPEL_FILTER_H2 v25, v20, v21, v22, v23, v16, v17, v18, v19 QPEL_FILTER_H v26, v5, v6, v7, v31, v1, v2, v3, v4 QPEL_FILTER_H2 v27, v5, v6, v7, v31, v1, v2, v3, v4 - QPEL_UNI_W_HV_16 subs w22, w22, #1 + QPEL_UNI_W_HV_16 b.eq 2f ldp q20, q5, [x11] @@ -5077,8 +5077,8 @@ function hevc_put_hevc_qpel_uni_w_hv16_8_end_neon QPEL_FILTER_H2 v25, v21, v22, v23, v16, v17, v18, v19, v20 QPEL_FILTER_H v26, v6, v7, v31, v1, v2, v3, v4, v5 QPEL_FILTER_H2 v27, v6, v7, v31, v1, v2, v3, v4, v5 - QPEL_UNI_W_HV_16 subs w22, w22, #1 + QPEL_UNI_W_HV_16 b.eq 2f ldp q21, q6, [x11] @@ -5087,8 +5087,8 @@ function hevc_put_hevc_qpel_uni_w_hv16_8_end_neon QPEL_FILTER_H2 v25, v22, v23, v16, v17, v18, v19, v20, v21 QPEL_FILTER_H v26, v7, v31, v1, v2, v3, v4, v5, v6 QPEL_FILTER_H2 v27, v7, v31, v1, v2, v3, v4, v5, v6 - QPEL_UNI_W_HV_16 subs w22, w22, #1 + QPEL_UNI_W_HV_16 b.eq 2f ldp q22, q7, [x11] @@ -5097,8 +5097,8 @@ function hevc_put_hevc_qpel_uni_w_hv16_8_end_neon QPEL_FILTER_H2 v25, v23, v16, v17, v18, v19, v20, v21, v22 QPEL_FILTER_H v26, v31, v1, v2, v3, v4, v5, v6, v7 QPEL_FILTER_H2 v27, v31, v1, v2, v3, v4, v5, v6, v7 - QPEL_UNI_W_HV_16 subs w22, w22, #1 + QPEL_UNI_W_HV_16 b.hi 1b 2: subs w27, w27, #16 _______________________________________________ ffmpeg-cvslog mailing list -- [email protected] To unsubscribe send an email to [email protected]
