PR #22246 opened by mkver URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22246 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/22246.patch
For bi-predicted weighted averages, only the sum of the two offsets is ever used, so add the two early. >From 4e16436743c12edaa1658f9626a62cefa682e3f2 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Fri, 20 Feb 2026 18:24:24 +0100 Subject: [PATCH] avcodec/vvc/inter: Combine offsets early For bi-predicted weighted averages, only the sum of the two offsets is ever used, so add the two early. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/aarch64/vvc/dsp_init.c | 4 ++-- libavcodec/riscv/vvc/dsp_init.c | 2 +- libavcodec/riscv/vvc/mc_rvv.S | 2 -- libavcodec/vvc/dsp.h | 2 +- libavcodec/vvc/inter.c | 10 +++++----- libavcodec/vvc/inter_template.c | 4 ++-- libavcodec/x86/vvc/dsp_init.c | 2 +- libavcodec/x86/vvc/mc.asm | 5 ++--- tests/checkasm/vvc_mc.c | 8 ++++---- 9 files changed, 18 insertions(+), 21 deletions(-) diff --git a/libavcodec/aarch64/vvc/dsp_init.c b/libavcodec/aarch64/vvc/dsp_init.c index bc2677945e..570070a28c 100644 --- a/libavcodec/aarch64/vvc/dsp_init.c +++ b/libavcodec/aarch64/vvc/dsp_init.c @@ -107,10 +107,10 @@ void ff_vvc_w_avg_12_neon(uint8_t *_dst, ptrdiff_t _dst_stride, #define W_AVG_FUN(bit_depth) \ static void vvc_w_avg_ ## bit_depth(uint8_t *dst, ptrdiff_t dst_stride, \ const int16_t *src0, const int16_t *src1, int width, int height, \ - int denom, int w0, int w1, int o0, int o1) \ + int denom, int w0, int w1, int o) \ { \ int shift = denom + FFMAX(3, 15 - bit_depth); \ - int offset = ((o0 + o1) * (1 << (bit_depth - 8)) + 1) * (1 << (shift - 1)); \ + int offset = (o * (1 << (bit_depth - 8)) + 1) * (1 << (shift - 1)); \ uintptr_t w0_w1 = ((uintptr_t)w0 << 32) | (uint32_t)w1; \ uintptr_t offset_shift = ((uintptr_t)offset << 32) | (uint32_t)shift; \ ff_vvc_w_avg_ ## bit_depth ## _neon(dst, dst_stride, src0, src1, width, height, w0_w1, offset_shift); \ diff --git a/libavcodec/riscv/vvc/dsp_init.c b/libavcodec/riscv/vvc/dsp_init.c index d7a89f4779..f8fde41529 100644 --- a/libavcodec/riscv/vvc/dsp_init.c +++ b/libavcodec/riscv/vvc/dsp_init.c @@ -34,7 +34,7 @@ void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, int width, int height); \ void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \ const int16_t *src0, const int16_t *src1, int width, int height, \ - int denom, int w0, int w1, int o0, int o1); + int denom, int w0, int w1, int o); AVG_PROTOTYPES(8, rvv_128) AVG_PROTOTYPES(8, rvv_256) diff --git a/libavcodec/riscv/vvc/mc_rvv.S b/libavcodec/riscv/vvc/mc_rvv.S index e6b2aadafe..a612290e3a 100644 --- a/libavcodec/riscv/vvc/mc_rvv.S +++ b/libavcodec/riscv/vvc/mc_rvv.S @@ -163,9 +163,7 @@ func ff_vvc_w_avg_8_rvv_\vlen\(), zve32x, zbb, zba addi t6, a6, 7 ld t3, (sp) ld t4, 8(sp) - ld t5, 16(sp) addi t4, t4, 1 // o0 + o1 + 1 - add t4, t4, t5 addi t5, t6, -1 // shift - 1 sll t4, t4, t5 POW2_J \vlen, 2, a4 diff --git a/libavcodec/vvc/dsp.h b/libavcodec/vvc/dsp.h index ae22900931..29cdd7e8f4 100644 --- a/libavcodec/vvc/dsp.h +++ b/libavcodec/vvc/dsp.h @@ -75,7 +75,7 @@ typedef struct VVCInterDSPContext { void (*w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src0, const int16_t *src1, int width, int height, - int denom, int w0, int w1, int o0, int o1); + int denom, int w0, int w1, int o); void (*put_ciip)(uint8_t *dst, ptrdiff_t dst_stride, int width, int height, const uint8_t *inter, ptrdiff_t inter_stride, int inter_weight); diff --git a/libavcodec/vvc/inter.c b/libavcodec/vvc/inter.c index 85e0665a75..7d6e79e49b 100644 --- a/libavcodec/vvc/inter.c +++ b/libavcodec/vvc/inter.c @@ -232,22 +232,22 @@ static void apply_averaging(uint8_t *dst, const ptrdiff_t dst_stride, return; } - int denom, w0, w1, o1, o2; + int denom, w0, w1, o; if (bcw_idx) { denom = 2; w1 = bcw_w_lut[bcw_idx]; w0 = 8 - w1; - o1 = o2 = 0; + o = 0; } else { const PredWeightTable *w = pps->r->pps_wp_info_in_ph_flag ? &fc->ps.ph.pwt : &sh->pwt; denom = w->log2_denom[c_idx > 0]; w0 = w->weight[L0][c_idx][mvf->ref_idx[L0]]; w1 = w->weight[L1][c_idx][mvf->ref_idx[L1]]; - o1 = w->offset[L0][c_idx][mvf->ref_idx[L0]]; - o2 = w->offset[L1][c_idx][mvf->ref_idx[L1]]; + o = w->offset[L0][c_idx][mvf->ref_idx[L0]] + + w->offset[L1][c_idx][mvf->ref_idx[L1]]; } - fc->vvcdsp.inter.w_avg(dst, dst_stride, src0, src1, width, height, denom, w0, w1, o1, o2); + fc->vvcdsp.inter.w_avg(dst, dst_stride, src0, src1, width, height, denom, w0, w1, o); } #define INTER_FILTER(t, frac) (is_chroma ? ff_vvc_inter_chroma_filters[t][frac] : ff_vvc_inter_luma_filters[t][frac]) diff --git a/libavcodec/vvc/inter_template.c b/libavcodec/vvc/inter_template.c index aee4994c17..efa432d1fd 100644 --- a/libavcodec/vvc/inter_template.c +++ b/libavcodec/vvc/inter_template.c @@ -201,12 +201,12 @@ static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride, static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride, const int16_t *src0, const int16_t *src1, const int width, const int height, - const int denom, const int w0, const int w1, const int o0, const int o1) + const int denom, const int w0, const int w1, const int o) { pixel *dst = (pixel*)_dst; const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel); const int shift = denom + FFMAX(3, 15 - BIT_DEPTH); - const int offset = ((o0 + o1) * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1)); + const int offset = (o * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1)); for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) diff --git a/libavcodec/x86/vvc/dsp_init.c b/libavcodec/x86/vvc/dsp_init.c index cd3d02c0fb..37ddbcb73b 100644 --- a/libavcodec/x86/vvc/dsp_init.c +++ b/libavcodec/x86/vvc/dsp_init.c @@ -231,7 +231,7 @@ void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \ const int16_t *src0, const int16_t *src1, int width, int height);\ void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \ const int16_t *src0, const int16_t *src1, int width, int height, \ - int denom, int w0, int w1, int o0, int o1); \ + int denom, int w0, int w1, int o); \ c->inter.avg = bf(ff_vvc_avg, bd, opt); \ c->inter.w_avg = bf(ff_vvc_w_avg, bd, opt); \ } while (0) diff --git a/libavcodec/x86/vvc/mc.asm b/libavcodec/x86/vvc/mc.asm index 8ba493aebd..4f078ea8d0 100644 --- a/libavcodec/x86/vvc/mc.asm +++ b/libavcodec/x86/vvc/mc.asm @@ -244,7 +244,7 @@ cglobal vvc_avg_%2, 4, 7, 5, dst, stride, src0, src1, w, h ;void ff_vvc_w_avg_%2_avx(uint8_t *dst, ptrdiff_t dst_stride, ; const int16_t *src0, const int16_t *src1, int width, int height, -; int denom, intptr_t w0, int w1, int o0, int o1); +; int denom, intptr_t w0, int w1, int o); %macro VVC_W_AVG_AVX2 3 cglobal vvc_w_avg_%2, 4, 7+2*UNIX64, 6+2*(%1 != 8), dst, stride, src0, src1, w, h %if UNIX64 @@ -256,8 +256,7 @@ cglobal vvc_w_avg_%2, 4, 7+2*UNIX64, 6+2*(%1 != 8), dst, stride, src0, src1, w, %endif mov t1d, r6m ; denom - mov t0d, r9m ; o0 - add t0d, r10m ; o1 + mov t0d, r9m ; o0 + o1 movifnidn t2d, r8m ; w1 add t1d, 15-%2 %if %2 != 8 diff --git a/tests/checkasm/vvc_mc.c b/tests/checkasm/vvc_mc.c index 754cf19065..623b6142f1 100644 --- a/tests/checkasm/vvc_mc.c +++ b/tests/checkasm/vvc_mc.c @@ -306,7 +306,7 @@ static void check_avg(void) { declare_func(void, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *src0, const int16_t *src1, int width, int height, - int denom, int w0, int w1, int o0, int o1); + int denom, int w0, int w1, int o); { const int denom = rnd() % 8; const int w0 = rnd() % 256 - 128; @@ -317,12 +317,12 @@ static void check_avg(void) memset(dst0, 0, AVG_DST_BUF_SIZE); memset(dst1, 0, AVG_DST_BUF_SIZE); - call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1); - call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0, o1); + call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0 + o1); + call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0 + o1); if (memcmp(dst0, dst1, DST_BUF_SIZE)) fail(); if (w == h) - bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1); + bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0 + o1); } } } -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
