PR #23571 opened by mkver URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23571 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23571.patch
I'd really appreciate if someone knowledgeable (like @mstorsjo) would check the apple aarch64 changes in the "Combine offsets early for biweight prediction" commit. >From 63239b63b1f50e926556b6edae98fb3d515be8ee Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Tue, 23 Jun 2026 21:37:46 +0200 Subject: [PATCH 1/5] avcodec/x86/hevc/mc: Use pmaddwd in biweight functions Improves performance and saves 384B of .text here; also avoids using nonvolatile registers on Win64. Old benchmarks (just a selection): put_hevc_qpel_bi_w_v16_8_c: 1158.1 put_hevc_qpel_bi_w_v16_8_sse4: 214.4 ( 5.40x) put_hevc_qpel_bi_w_v16_10_c: 2193.3 put_hevc_qpel_bi_w_v16_10_sse4: 318.5 ( 6.89x) put_hevc_qpel_bi_w_v16_12_c: 2188.5 put_hevc_qpel_bi_w_v16_12_sse4: 317.8 ( 6.89x) put_hevc_qpel_bi_w_v24_8_c: 2940.5 put_hevc_qpel_bi_w_v24_8_sse4: 502.9 ( 5.85x) put_hevc_qpel_bi_w_v24_10_c: 4557.1 put_hevc_qpel_bi_w_v24_10_sse4: 686.0 ( 6.64x) put_hevc_qpel_bi_w_v24_12_c: 4557.1 put_hevc_qpel_bi_w_v24_12_sse4: 688.4 ( 6.62x) put_hevc_qpel_bi_w_v32_8_c: 3753.0 put_hevc_qpel_bi_w_v32_8_sse4: 817.8 ( 4.59x) put_hevc_qpel_bi_w_v32_10_c: 6504.2 put_hevc_qpel_bi_w_v32_10_sse4: 1227.7 ( 5.30x) put_hevc_qpel_bi_w_v32_12_c: 6502.0 put_hevc_qpel_bi_w_v32_12_sse4: 1230.8 ( 5.28x) put_hevc_qpel_bi_w_v48_8_c: 7756.0 put_hevc_qpel_bi_w_v48_8_sse4: 1805.9 ( 4.29x) put_hevc_qpel_bi_w_v48_10_c: 12938.4 put_hevc_qpel_bi_w_v48_10_sse4: 2690.5 ( 4.81x) put_hevc_qpel_bi_w_v48_12_c: 12934.1 put_hevc_qpel_bi_w_v48_12_sse4: 2691.2 ( 4.81x) put_hevc_qpel_bi_w_v64_8_c: 13212.5 put_hevc_qpel_bi_w_v64_8_sse4: 3183.9 ( 4.15x) put_hevc_qpel_bi_w_v64_10_c: 21520.0 put_hevc_qpel_bi_w_v64_10_sse4: 4854.9 ( 4.43x) put_hevc_qpel_bi_w_v64_12_c: 21529.5 put_hevc_qpel_bi_w_v64_12_sse4: 4860.9 ( 4.43x) New benchmarks: put_hevc_qpel_bi_w_v16_8_c: 1159.1 put_hevc_qpel_bi_w_v16_8_sse4: 176.7 ( 6.56x) put_hevc_qpel_bi_w_v16_10_c: 2196.5 put_hevc_qpel_bi_w_v16_10_sse4: 279.9 ( 7.85x) put_hevc_qpel_bi_w_v16_12_c: 2189.3 put_hevc_qpel_bi_w_v16_12_sse4: 280.5 ( 7.80x) put_hevc_qpel_bi_w_v24_8_c: 2940.4 put_hevc_qpel_bi_w_v24_8_sse4: 417.5 ( 7.04x) put_hevc_qpel_bi_w_v24_10_c: 4553.1 put_hevc_qpel_bi_w_v24_10_sse4: 605.9 ( 7.51x) put_hevc_qpel_bi_w_v24_12_c: 4573.9 put_hevc_qpel_bi_w_v24_12_sse4: 605.6 ( 7.55x) put_hevc_qpel_bi_w_v32_8_c: 3752.2 put_hevc_qpel_bi_w_v32_8_sse4: 668.2 ( 5.61x) put_hevc_qpel_bi_w_v32_10_c: 6482.2 put_hevc_qpel_bi_w_v32_10_sse4: 1077.4 ( 6.02x) put_hevc_qpel_bi_w_v32_12_c: 6484.9 put_hevc_qpel_bi_w_v32_12_sse4: 1088.0 ( 5.96x) put_hevc_qpel_bi_w_v48_8_c: 7765.1 put_hevc_qpel_bi_w_v48_8_sse4: 1467.8 ( 5.29x) put_hevc_qpel_bi_w_v48_10_c: 12902.8 put_hevc_qpel_bi_w_v48_10_sse4: 2356.6 ( 5.47x) put_hevc_qpel_bi_w_v48_12_c: 12931.1 put_hevc_qpel_bi_w_v48_12_sse4: 2356.6 ( 5.49x) put_hevc_qpel_bi_w_v64_8_c: 13207.2 put_hevc_qpel_bi_w_v64_8_sse4: 2624.9 ( 5.03x) put_hevc_qpel_bi_w_v64_10_c: 21542.3 put_hevc_qpel_bi_w_v64_10_sse4: 4438.8 ( 4.85x) put_hevc_qpel_bi_w_v64_12_c: 21537.4 put_hevc_qpel_bi_w_v64_12_sse4: 4359.5 ( 4.94x) Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/x86/hevc/mc.asm | 38 +++++++++----------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/libavcodec/x86/hevc/mc.asm b/libavcodec/x86/hevc/mc.asm index 550f7a0e23..66ed406c26 100644 --- a/libavcodec/x86/hevc/mc.asm +++ b/libavcodec/x86/hevc/mc.asm @@ -1090,27 +1090,17 @@ cglobal hevc_put_uni_w%1_%2, 6, 6, 7, dst, dststride, src, height, denom, wx, ox jnz .loop ; height loop RET -cglobal hevc_put_bi_w%1_%2, 4, 6, 10, dst, dststride, src, src2, height, denom, wx0, wx1, ox0, ox1 +cglobal hevc_put_bi_w%1_%2, 4, 6, 6, dst, dststride, src, src2, height, denom, wx0, wx1, ox0, ox1 movifnidn r5d, denomm -%if %1 <= 4 - pxor m1, m1 -%endif - movd m2, wx0m ; WX0 + movd m3, wx0m ; WX0 lea r5d, [r5d+14-%2] ; shift = 14 - bitd + denom - movd m3, wx1m ; WX1 + movd m2, wx1m ; WX1 movd m0, r5d ; shift -%if %1 <= 4 - punpcklwd m2, m1 - punpcklwd m3, m1 -%else - punpcklwd m2, m2 - punpcklwd m3, m3 -%endif + punpcklwd m2, m3 inc r5d movd m5, r5d ; shift+1 pshufd m2, m2, 0 mov r5d, ox0m - pshufd m3, m3, 0 add r5d, ox1m %if %2 != 8 shl r5d, %2-8 ; ox << (bitd - 8) @@ -1128,26 +1118,16 @@ cglobal hevc_put_bi_w%1_%2, 4, 6, 10, dst, dststride, src, src2, height, denom, .loop: SIMPLE_LOAD %1, 10, srcq, m0 - SIMPLE_LOAD %1, 10, src2q, m8 + SIMPLE_LOAD %1, 10, src2q, m1 %if %1 <= 4 punpcklwd m0, m1 - punpcklwd m8, m1 - pmaddwd m0, m3 - pmaddwd m8, m2 + pmaddwd m0, m2 paddd m0, m4 - paddd m0, m8 psrad m0, m5 %else - pmulhw m6, m0, m3 - pmullw m0, m3 - pmulhw m7, m8, m2 - pmullw m8, m2 - punpckhwd m1, m0, m6 - punpcklwd m0, m6 - punpckhwd m9, m8, m7 - punpcklwd m8, m7 - paddd m0, m8 - paddd m1, m9 + SBUTTERFLY wd, 0, 1, 3 + pmaddwd m0, m2 + pmaddwd m1, m2 paddd m0, m4 paddd m1, m4 psrad m0, m5 -- 2.52.0 >From 4b1cc171ac01fcf79ce2965bfb70e2743f66c642 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Tue, 23 Jun 2026 21:58:47 +0200 Subject: [PATCH 2/5] avcodec/hevc/hevcdec: Remove redundant clipping Forgotten in f82dd4c09b2decb033f1e339d4be81efd38554f1. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/hevc/hevcdec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c index ae064ec8af..f00acd07bf 100644 --- a/libavcodec/hevc/hevcdec.c +++ b/libavcodec/hevc/hevcdec.c @@ -185,7 +185,7 @@ static int pred_weight_table(SliceHeader *sh, void *logctx, av_log(logctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", luma_log2_weight_denom); return AVERROR_INVALIDDATA; } - sh->luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3); + sh->luma_log2_weight_denom = luma_log2_weight_denom; if (sps->chroma_format_idc != 0) { int64_t chroma_log2_weight_denom = luma_log2_weight_denom + (int64_t)get_se_golomb(gb); if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) { -- 2.52.0 >From cd0ae4c1367f3f1b1782d62864263f8ae9206ae3 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Tue, 23 Jun 2026 23:28:56 +0200 Subject: [PATCH 3/5] avcodec/hevc/dsp: Fix epel_bi_w parameter names It uses the same order as qpel_bi_w. Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/hevc/dsp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavcodec/hevc/dsp.h b/libavcodec/hevc/dsp.h index b884cd36be..cfd43e753e 100644 --- a/libavcodec/hevc/dsp.h +++ b/libavcodec/hevc/dsp.h @@ -97,7 +97,7 @@ typedef struct HEVCDSPContext { int height, intptr_t mx, intptr_t my, int width); void (*put_hevc_epel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, - int height, int denom, int wx0, int ox0, int wx1, + int height, int denom, int wx0, int wx1, int ox0, int ox1, intptr_t mx, intptr_t my, int width); void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, -- 2.52.0 >From 634191de2c9a35c1875985e45bff1583c0504d77 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Wed, 24 Jun 2026 00:14:37 +0200 Subject: [PATCH 4/5] avcodec/hevc/dsp: Combine offsets early for biweight prediction Only the sum of the offsets is ever used (see equation 8-279 in the 2019 version of the H.265 spec). Signed-off-by: Andreas Rheinhardt <[email protected]> --- libavcodec/aarch64/h26x/dsp.h | 2 +- libavcodec/aarch64/h26x/epel_neon.S | 44 ++- libavcodec/hevc/dsp.h | 6 +- libavcodec/hevc/dsp_template.c | 49 ++-- libavcodec/hevc/hevcdec.c | 4 +- libavcodec/mips/hevc_mc_biw_msa.c | 440 ++++++++++++---------------- libavcodec/x86/hevc/dsp.h | 2 +- libavcodec/x86/hevc/dsp_init.c | 8 +- libavcodec/x86/hevc/mc.asm | 5 +- tests/checkasm/hevc_pel.c | 16 +- 10 files changed, 240 insertions(+), 336 deletions(-) diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h index 47a61d22c2..0cbbdc3157 100644 --- a/libavcodec/aarch64/h26x/dsp.h +++ b/libavcodec/aarch64/h26x/dsp.h @@ -95,7 +95,7 @@ NEON8_FNPROTO(pel_bi_pixels, (uint8_t *dst, ptrdiff_t dststride, NEON8_FNPROTO(pel_bi_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, - int ox0, int ox1, intptr_t mx, intptr_t my, int width),); + int ox, intptr_t mx, intptr_t my, int width),); NEON8_FNPROTO(epel_bi_h, (uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2, diff --git a/libavcodec/aarch64/h26x/epel_neon.S b/libavcodec/aarch64/h26x/epel_neon.S index ee584705de..22f58ba5fa 100644 --- a/libavcodec/aarch64/h26x/epel_neon.S +++ b/libavcodec/aarch64/h26x/epel_neon.S @@ -476,12 +476,11 @@ endfunc .macro load_bi_w_pixels_param ldrsw x8, [sp] // wx1 #if defined(__APPLE__) - ldpsw x9, x10, [sp, #4] // ox0, ox1 - ldrsw x11, [sp, #32] // width + ldrsw x9, [sp, #4] // ox + ldrsw x11, [sp, #24] // width #else - ldrsw x9, [sp, #8] // ox0 - ldrsw x10, [sp, #16] // ox1 - ldrsw x11, [sp, #40] // width + ldrsw x9, [sp, #8] // ox + ldrsw x11, [sp, #32] // width #endif .endm @@ -490,13 +489,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels4_8_neon, export=1 add w6, w6, #6 // log2Wd dup v0.8h, w7 // wx0 dup v1.8h, w8 // wx1 - add w9, w9, w10 - add w9, w9, #1 // ox0 + ox1 + 1 + add w9, w9, #1 // ox + 1 lsl w9, w9, w6 add w7, w6, #1 // (log2Wd + 1) mov x8, #(2 * HEVC_MAX_PB_SIZE) neg w7, w7 - dup v2.4s, w9 // (ox0 + ox1 + 1) << logwWd + dup v2.4s, w9 // (ox + 1) << logwWd dup v6.4s, w7 // -(log2Wd + 1) 1: ld1 {v4.8b}, [x2], x3 // load src @@ -519,13 +517,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels6_8_neon, export=1 add w6, w6, #6 // log2Wd dup v0.8h, w7 // wx0 dup v1.8h, w8 // wx1 - add w9, w9, w10 - add w9, w9, #1 // ox0 + ox1 + 1 + add w9, w9, #1 // ox + 1 lsl w9, w9, w6 add w7, w6, #1 // (log2Wd + 1) mov x8, #(2 * HEVC_MAX_PB_SIZE) neg w7, w7 - dup v2.4s, w9 // (ox0 + ox1 + 1) << logwWd + dup v2.4s, w9 // (ox + 1) << logwWd dup v6.4s, w7 // -(log2Wd + 1) sub x1, x1, #4 1: @@ -555,13 +552,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels8_8_neon, export=1 add w6, w6, #6 // log2Wd dup v0.8h, w7 // wx0 dup v1.8h, w8 // wx1 - add w9, w9, w10 - add w9, w9, #1 // ox0 + ox1 + 1 + add w9, w9, #1 // ox + 1 lsl w9, w9, w6 add w7, w6, #1 // (log2Wd + 1) mov x8, #(2 * HEVC_MAX_PB_SIZE) neg w7, w7 - dup v2.4s, w9 // (ox0 + ox1 + 1) << logwWd + dup v2.4s, w9 // (ox + 1) << logwWd dup v6.4s, w7 // -(log2Wd + 1) 1: ld1 {v4.8b}, [x2], x3 // load src @@ -589,13 +585,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels12_8_neon, export=1 add w6, w6, #6 // log2Wd dup v0.8h, w7 // wx0 dup v1.8h, w8 // wx1 - add w9, w9, w10 - add w9, w9, #1 // ox0 + ox1 + 1 + add w9, w9, #1 // ox + 1 lsl w9, w9, w6 add w7, w6, #1 // (log2Wd + 1) mov x8, #(2 * HEVC_MAX_PB_SIZE) neg w7, w7 - dup v2.4s, w9 // (ox0 + ox1 + 1) << logwWd + dup v2.4s, w9 // (ox + 1) << logwWd dup v6.4s, w7 // -(log2Wd + 1) sub x1, x1, #8 1: @@ -637,13 +632,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels16_8_neon, export=1 add w6, w6, #6 // log2Wd dup v0.8h, w7 // wx0 dup v1.8h, w8 // wx1 - add w9, w9, w10 - add w9, w9, #1 // ox0 + ox1 + 1 + add w9, w9, #1 // ox + 1 lsl w9, w9, w6 add w7, w6, #1 // (log2Wd + 1) mov x8, #(2 * HEVC_MAX_PB_SIZE) neg w7, w7 - dup v2.4s, w9 // (ox0 + ox1 + 1) << logwWd + dup v2.4s, w9 // (ox + 1) << logwWd dup v6.4s, w7 // -(log2Wd + 1) 1: ld1 {v24.16b}, [x2], x3 // load src @@ -688,13 +682,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels24_8_neon, export=1 add w6, w6, #6 // log2Wd dup v0.8h, w7 // wx0 dup v1.8h, w8 // wx1 - add w9, w9, w10 - add w9, w9, #1 // ox0 + ox1 + 1 + add w9, w9, #1 // ox + 1 lsl w9, w9, w6 add w7, w6, #1 // (log2Wd + 1) mov x8, #(2 * HEVC_MAX_PB_SIZE) neg w7, w7 - dup v2.4s, w9 // (ox0 + ox1 + 1) << logwWd + dup v2.4s, w9 // (ox + 1) << logwWd dup v6.4s, w7 // -(log2Wd + 1) mov x7, #24 sub x3, x3, x11 @@ -765,13 +758,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels32_8_neon, export=1 add w6, w6, #6 // log2Wd dup v0.8h, w7 // wx0 dup v1.8h, w8 // wx1 - add w9, w9, w10 - add w9, w9, #1 // ox0 + ox1 + 1 + add w9, w9, #1 // ox + 1 lsl w9, w9, w6 add w7, w6, #1 // (log2Wd + 1) mov x8, #(2 * HEVC_MAX_PB_SIZE) neg w7, w7 - dup v2.4s, w9 // (ox0 + ox1 + 1) << logwWd + dup v2.4s, w9 // (ox + 1) << logwWd dup v6.4s, w7 // -(log2Wd + 1) sub x3, x3, x11 sub x8, x8, x11, lsl #1 diff --git a/libavcodec/hevc/dsp.h b/libavcodec/hevc/dsp.h index cfd43e753e..b66eb148dc 100644 --- a/libavcodec/hevc/dsp.h +++ b/libavcodec/hevc/dsp.h @@ -84,7 +84,7 @@ typedef struct HEVCDSPContext { void (*put_hevc_qpel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, - int ox0, int ox1, intptr_t mx, intptr_t my, int width); + int ox, intptr_t mx, intptr_t my, int width); void (*put_hevc_epel[10][2][2])(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width); @@ -97,8 +97,8 @@ typedef struct HEVCDSPContext { int height, intptr_t mx, intptr_t my, int width); void (*put_hevc_epel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, - int height, int denom, int wx0, int wx1, int ox0, - int ox1, intptr_t mx, intptr_t my, int width); + int height, int denom, int wx0, int wx1, int ox, + intptr_t mx, intptr_t my, int width); void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int beta, const int32_t *tc, diff --git a/libavcodec/hevc/dsp_template.c b/libavcodec/hevc/dsp_template.c index d902a0ca6b..8984fad69f 100644 --- a/libavcodec/hevc/dsp_template.c +++ b/libavcodec/hevc/dsp_template.c @@ -394,7 +394,7 @@ static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, co static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, - int ox0, int ox1, intptr_t mx, intptr_t my, int width) + int ox, intptr_t mx, intptr_t my, int width) { int x, y; const pixel *src = (const pixel *)_src; @@ -405,11 +405,10 @@ static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, int shift = 14 + 1 - BIT_DEPTH; int log2Wd = denom + shift - 1; - ox0 = ox0 * (1 << (BIT_DEPTH - 8)); - ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + ox = ox * (1 << (BIT_DEPTH - 8)) + 1; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) { - dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1)); + dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + ox * (1 << log2Wd)) >> (log2Wd + 1)); } src += srcstride; dst += dststride; @@ -529,7 +528,7 @@ static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, - int ox0, int ox1, intptr_t mx, intptr_t my, int width) + int ox, intptr_t mx, intptr_t my, int width) { int x, y; const pixel *src = (const pixel*)_src; @@ -542,12 +541,11 @@ static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, int shift = 14 + 1 - BIT_DEPTH; int log2Wd = denom + shift - 1; - ox0 = ox0 * (1 << (BIT_DEPTH - 8)); - ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + ox = ox * (1 << (BIT_DEPTH - 8)) + 1; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + - ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); + ox * (1 << log2Wd)) >> (log2Wd + 1)); src += srcstride; dst += dststride; src2 += MAX_PB_SIZE; @@ -557,7 +555,7 @@ static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, - int ox0, int ox1, intptr_t mx, intptr_t my, int width) + int ox, intptr_t mx, intptr_t my, int width) { int x, y; const pixel *src = (const pixel*)_src; @@ -570,12 +568,11 @@ static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, int shift = 14 + 1 - BIT_DEPTH; int log2Wd = denom + shift - 1; - ox0 = ox0 * (1 << (BIT_DEPTH - 8)); - ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + ox = ox * (1 << (BIT_DEPTH - 8)) + 1; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + - ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); + ox * (1 << log2Wd)) >> (log2Wd + 1)); src += srcstride; dst += dststride; src2 += MAX_PB_SIZE; @@ -585,7 +582,7 @@ static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, - int ox0, int ox1, intptr_t mx, intptr_t my, int width) + int ox, intptr_t mx, intptr_t my, int width) { int x, y; const int8_t *filter; @@ -610,12 +607,11 @@ static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, tmp = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE; filter = ff_hevc_qpel_filters[my]; - ox0 = ox0 * (1 << (BIT_DEPTH - 8)); - ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + ox = ox * (1 << (BIT_DEPTH - 8)) + 1; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + - ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); + ox * (1 << log2Wd)) >> (log2Wd + 1)); tmp += MAX_PB_SIZE; dst += dststride; src2 += MAX_PB_SIZE; @@ -727,7 +723,7 @@ static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, - int ox0, int ox1, intptr_t mx, intptr_t my, int width) + int ox, intptr_t mx, intptr_t my, int width) { int x, y; const pixel *src = (const pixel *)_src; @@ -738,12 +734,11 @@ static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, int shift = 14 + 1 - BIT_DEPTH; int log2Wd = denom + shift - 1; - ox0 = ox0 * (1 << (BIT_DEPTH - 8)); - ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + ox = ox * (1 << (BIT_DEPTH - 8)) + 1; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + - ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); + ox * (1 << log2Wd)) >> (log2Wd + 1)); src += srcstride; dst += dststride; src2 += MAX_PB_SIZE; @@ -753,7 +748,7 @@ static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, - int ox0, int ox1, intptr_t mx, intptr_t my, int width) + int ox, intptr_t mx, intptr_t my, int width) { int x, y; const pixel *src = (const pixel *)_src; @@ -764,12 +759,11 @@ static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, int shift = 14 + 1 - BIT_DEPTH; int log2Wd = denom + shift - 1; - ox0 = ox0 * (1 << (BIT_DEPTH - 8)); - ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + ox = ox * (1 << (BIT_DEPTH - 8)) + 1; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 + - ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); + ox * (1 << log2Wd)) >> (log2Wd + 1)); src += srcstride; dst += dststride; src2 += MAX_PB_SIZE; @@ -779,7 +773,7 @@ static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, - int ox0, int ox1, intptr_t mx, intptr_t my, int width) + int ox, intptr_t mx, intptr_t my, int width) { int x, y; const pixel *src = (const pixel *)_src; @@ -804,12 +798,11 @@ static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, tmp = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE; filter = ff_hevc_epel_filters[my]; - ox0 = ox0 * (1 << (BIT_DEPTH - 8)); - ox1 = ox1 * (1 << (BIT_DEPTH - 8)); + ox = ox * (1 << (BIT_DEPTH - 8)) + 1; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 + - ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1)); + ox * (1 << log2Wd)) >> (log2Wd + 1)); tmp += MAX_PB_SIZE; dst += dststride; src2 += MAX_PB_SIZE; diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c index f00acd07bf..b4c2d82e8d 100644 --- a/libavcodec/hevc/hevcdec.c +++ b/libavcodec/hevc/hevcdec.c @@ -1835,7 +1835,7 @@ static void luma_mc_bi(HEVCLocalContext *lc, block_h, s->sh.luma_log2_weight_denom, s->sh.luma_weight_l0[current_mv->ref_idx[0]], s->sh.luma_weight_l1[current_mv->ref_idx[1]], - s->sh.luma_offset_l0[current_mv->ref_idx[0]], + s->sh.luma_offset_l0[current_mv->ref_idx[0]] + s->sh.luma_offset_l1[current_mv->ref_idx[1]], mx1, my1, block_w); @@ -2016,7 +2016,7 @@ static void chroma_mc_bi(HEVCLocalContext *lc, s->sh.chroma_log2_weight_denom, s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx], s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx], - s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx], + s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx] + s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx], _mx1, _my1, block_w); } diff --git a/libavcodec/mips/hevc_mc_biw_msa.c b/libavcodec/mips/hevc_mc_biw_msa.c index 34be61c0dc..65f18f786e 100644 --- a/libavcodec/mips/hevc_mc_biw_msa.c +++ b/libavcodec/mips/hevc_mc_biw_msa.c @@ -88,8 +88,7 @@ static void hevc_biwgt_copy_4w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt, tp0, tp1, tp2, tp3; @@ -102,7 +101,7 @@ static void hevc_biwgt_copy_4w_msa(const uint8_t *src0_ptr, v8i16 dst0, dst1, dst2, dst3, weight_vec; v4i32 dst0_r, dst0_l, offset_vec, rnd_vec; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -177,8 +176,7 @@ static void hevc_biwgt_copy_6w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -192,7 +190,7 @@ static void hevc_biwgt_copy_6w_msa(const uint8_t *src0_ptr, v8i16 dst0, dst1, dst2, dst3; v4i32 offset_vec, weight_vec, rnd_vec; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -253,8 +251,7 @@ static void hevc_biwgt_copy_8w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint64_t tp0, tp1, tp2, tp3; @@ -266,7 +263,7 @@ static void hevc_biwgt_copy_8w_msa(const uint8_t *src0_ptr, v8i16 dst0, dst1, dst2, dst3, dst4, dst5; v4i32 offset_vec, weight_vec, rnd_vec; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -341,8 +338,7 @@ static void hevc_biwgt_copy_12w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -354,7 +350,7 @@ static void hevc_biwgt_copy_12w_msa(const uint8_t *src0_ptr, v8i16 dst0, dst1, dst2, dst3, dst4, dst5; v4i32 offset_vec, weight_vec, rnd_vec; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -400,8 +396,7 @@ static void hevc_biwgt_copy_16w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -413,7 +408,7 @@ static void hevc_biwgt_copy_16w_msa(const uint8_t *src0_ptr, v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; v4i32 offset_vec, weight_vec, rnd_vec; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -455,8 +450,7 @@ static void hevc_biwgt_copy_24w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -467,7 +461,7 @@ static void hevc_biwgt_copy_24w_msa(const uint8_t *src0_ptr, v8i16 in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, dst11; v4i32 offset_vec, weight_vec, rnd_vec; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -519,8 +513,7 @@ static void hevc_biwgt_copy_32w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -532,7 +525,7 @@ static void hevc_biwgt_copy_32w_msa(const uint8_t *src0_ptr, v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; v4i32 offset_vec, weight_vec, rnd_vec; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -580,8 +573,7 @@ static void hevc_biwgt_copy_48w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -592,7 +584,7 @@ static void hevc_biwgt_copy_48w_msa(const uint8_t *src0_ptr, v8i16 dst0, dst1, dst2, dst3, dst4, dst5, in0, in1, in2, in3, in4, in5; v4i32 offset_vec, weight_vec, rnd_vec; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -632,8 +624,7 @@ static void hevc_biwgt_copy_64w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -645,7 +636,7 @@ static void hevc_biwgt_copy_64w_msa(const uint8_t *src0_ptr, v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; v4i32 offset_vec, weight_vec, rnd_vec; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -688,8 +679,7 @@ static void hevc_hz_biwgt_8t_4w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -712,7 +702,7 @@ static void hevc_hz_biwgt_8t_4w_msa(const uint8_t *src0_ptr, mask2 = mask0 + 4; mask3 = mask0 + 6; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -760,8 +750,7 @@ static void hevc_hz_biwgt_8t_8w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -777,7 +766,7 @@ static void hevc_hz_biwgt_8t_8w_msa(const uint8_t *src0_ptr, v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]); src0_ptr -= 3; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -840,8 +829,7 @@ static void hevc_hz_biwgt_8t_12w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -858,7 +846,7 @@ static void hevc_hz_biwgt_8t_12w_msa(const uint8_t *src0_ptr, weight = weight0 | (weight1 << 16); constant = 128 * weight1; constant <<= 6; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; offset += constant; offset_vec = __msa_fill_w(offset); @@ -935,8 +923,7 @@ static void hevc_hz_biwgt_8t_16w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -952,7 +939,7 @@ static void hevc_hz_biwgt_8t_16w_msa(const uint8_t *src0_ptr, v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; src0_ptr -= 3; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -1019,8 +1006,7 @@ static void hevc_hz_biwgt_8t_24w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -1038,7 +1024,7 @@ static void hevc_hz_biwgt_8t_24w_msa(const uint8_t *src0_ptr, v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]); src0_ptr = src0_ptr - 3; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -1141,8 +1127,7 @@ static void hevc_hz_biwgt_8t_32w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -1158,7 +1143,7 @@ static void hevc_hz_biwgt_8t_32w_msa(const uint8_t *src0_ptr, v4i32 weight_vec, offset_vec, rnd_vec; src0_ptr -= 3; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -1227,8 +1212,7 @@ static void hevc_hz_biwgt_8t_48w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -1244,7 +1228,7 @@ static void hevc_hz_biwgt_8t_48w_msa(const uint8_t *src0_ptr, v4i32 weight_vec, offset_vec, rnd_vec; src0_ptr -= 3; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -1331,8 +1315,7 @@ static void hevc_hz_biwgt_8t_64w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { const uint8_t *src0_ptr_tmp; @@ -1351,7 +1334,7 @@ static void hevc_hz_biwgt_8t_64w_msa(const uint8_t *src0_ptr, v4i32 weight_vec, offset_vec, rnd_vec; src0_ptr -= 3; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -1430,8 +1413,7 @@ static void hevc_vt_biwgt_8t_4w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -1451,7 +1433,7 @@ static void hevc_vt_biwgt_8t_4w_msa(const uint8_t *src0_ptr, v4i32 weight_vec, weight1_vec, offset_vec, rnd_vec, const_vec; src0_ptr -= (3 * src_stride); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -1569,8 +1551,7 @@ static void hevc_vt_biwgt_8t_8w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -1586,7 +1567,7 @@ static void hevc_vt_biwgt_8t_8w_msa(const uint8_t *src0_ptr, v4i32 weight_vec, weight1_vec, offset_vec, rnd_vec, const_vec; src0_ptr -= (3 * src_stride); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -1657,8 +1638,7 @@ static void hevc_vt_biwgt_8t_12w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -1677,7 +1657,7 @@ static void hevc_vt_biwgt_8t_12w_msa(const uint8_t *src0_ptr, v4i32 weight_vec, weight1_vec, offset_vec, rnd_vec, const_vec; src0_ptr -= (3 * src_stride); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -1767,8 +1747,7 @@ static void hevc_vt_biwgt_8t_16multx2mult_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val, int32_t width) { @@ -1791,7 +1770,7 @@ static void hevc_vt_biwgt_8t_16multx2mult_msa(const uint8_t *src0_ptr, src0_ptr -= (3 * src_stride); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -1883,14 +1862,13 @@ static void hevc_vt_biwgt_8t_16w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_vt_biwgt_8t_16multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, height, - weight0, weight1, offset0, offset1, + weight0, weight1, offset, rnd_val, 16); } @@ -1904,19 +1882,18 @@ static void hevc_vt_biwgt_8t_24w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_vt_biwgt_8t_16multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, height, - weight0, weight1, offset0, offset1, + weight0, weight1, offset, rnd_val, 16); hevc_vt_biwgt_8t_8w_msa(src0_ptr + 16, src_stride, src1_ptr + 16, src2_stride, dst + 16, dst_stride, filter, height, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } static void hevc_vt_biwgt_8t_32w_msa(const uint8_t *src0_ptr, @@ -1929,14 +1906,13 @@ static void hevc_vt_biwgt_8t_32w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_vt_biwgt_8t_16multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, height, - weight0, weight1, offset0, offset1, + weight0, weight1, offset, rnd_val, 32); } @@ -1950,14 +1926,13 @@ static void hevc_vt_biwgt_8t_48w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_vt_biwgt_8t_16multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, height, - weight0, weight1, offset0, offset1, + weight0, weight1, offset, rnd_val, 48); } @@ -1971,14 +1946,13 @@ static void hevc_vt_biwgt_8t_64w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_vt_biwgt_8t_16multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, height, - weight0, weight1, offset0, offset1, + weight0, weight1, offset, rnd_val, 64); } @@ -1993,8 +1967,7 @@ static void hevc_hv_biwgt_8t_4w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -2030,7 +2003,7 @@ static void hevc_hv_biwgt_8t_4w_msa(const uint8_t *src0_ptr, mask2 = mask0 + 4; mask3 = mask0 + 6; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -2138,8 +2111,7 @@ static void hevc_hv_biwgt_8t_8multx2mult_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val, int32_t width8mult) { @@ -2169,7 +2141,7 @@ static void hevc_hv_biwgt_8t_8multx2mult_msa(const uint8_t *src0_ptr, src0_ptr -= ((3 * src_stride) + 3); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -2321,15 +2293,14 @@ static void hevc_hv_biwgt_8t_8w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, - height, weight0, weight1, offset0, - offset1, rnd_val, 1); + height, weight0, weight1, offset, + rnd_val, 1); } static void hevc_hv_biwgt_8t_12w_msa(const uint8_t *src0_ptr, @@ -2343,8 +2314,7 @@ static void hevc_hv_biwgt_8t_12w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -2370,7 +2340,7 @@ static void hevc_hv_biwgt_8t_12w_msa(const uint8_t *src0_ptr, src0_ptr -= ((3 * src_stride) + 3); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -2594,15 +2564,14 @@ static void hevc_hv_biwgt_8t_16w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, - height, weight0, weight1, offset0, - offset1, rnd_val, 2); + height, weight0, weight1, offset, + rnd_val, 2); } static void hevc_hv_biwgt_8t_24w_msa(const uint8_t *src0_ptr, @@ -2616,15 +2585,14 @@ static void hevc_hv_biwgt_8t_24w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, - height, weight0, weight1, offset0, - offset1, rnd_val, 3); + height, weight0, weight1, offset, + rnd_val, 3); } static void hevc_hv_biwgt_8t_32w_msa(const uint8_t *src0_ptr, @@ -2638,15 +2606,14 @@ static void hevc_hv_biwgt_8t_32w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, - height, weight0, weight1, offset0, - offset1, rnd_val, 4); + height, weight0, weight1, offset, + rnd_val, 4); } static void hevc_hv_biwgt_8t_48w_msa(const uint8_t *src0_ptr, @@ -2660,15 +2627,14 @@ static void hevc_hv_biwgt_8t_48w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, - height, weight0, weight1, offset0, - offset1, rnd_val, 6); + height, weight0, weight1, offset, + rnd_val, 6); } static void hevc_hv_biwgt_8t_64w_msa(const uint8_t *src0_ptr, @@ -2682,15 +2648,14 @@ static void hevc_hv_biwgt_8t_64w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, - height, weight0, weight1, offset0, - offset1, rnd_val, 8); + height, weight0, weight1, offset, + rnd_val, 8); } static void hevc_hz_biwgt_4t_4x2_msa(const uint8_t *src0_ptr, @@ -2702,8 +2667,7 @@ static void hevc_hz_biwgt_4t_4x2_msa(const uint8_t *src0_ptr, const int8_t *filter, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { int32_t offset, weight, constant; @@ -2724,7 +2688,7 @@ static void hevc_hz_biwgt_4t_4x2_msa(const uint8_t *src0_ptr, mask1 = mask0 + 2; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -2762,8 +2726,7 @@ static void hevc_hz_biwgt_4t_4x4_msa(const uint8_t *src0_ptr, const int8_t *filter, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { int32_t offset, weight, constant; @@ -2785,7 +2748,7 @@ static void hevc_hz_biwgt_4t_4x4_msa(const uint8_t *src0_ptr, mask1 = mask0 + 2; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -2823,8 +2786,7 @@ static void hevc_hz_biwgt_4t_4x8multiple_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -2844,7 +2806,7 @@ static void hevc_hz_biwgt_4t_4x8multiple_msa(const uint8_t *src0_ptr, filter_vec = LD_SH(filter); SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -2898,23 +2860,22 @@ static void hevc_hz_biwgt_4t_4w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { if (2 == height) { hevc_hz_biwgt_4t_4x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } else if (4 == height) { hevc_hz_biwgt_4t_4x4_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } else if (0 == (height % 8)) { hevc_hz_biwgt_4t_4x8multiple_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, height, - weight0, weight1, offset0, offset1, + weight0, weight1, offset, rnd_val); } } @@ -2929,8 +2890,7 @@ static void hevc_hz_biwgt_4t_6w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -2950,7 +2910,7 @@ static void hevc_hz_biwgt_4t_6w_msa(const uint8_t *src0_ptr, filter_vec = LD_SH(filter); SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3002,8 +2962,7 @@ static void hevc_hz_biwgt_4t_8x2_msa(const uint8_t *src0_ptr, const int8_t *filter, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { int32_t offset, weight, constant; @@ -3021,7 +2980,7 @@ static void hevc_hz_biwgt_4t_8x2_msa(const uint8_t *src0_ptr, filter_vec = LD_SH(filter); SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3058,8 +3017,7 @@ static void hevc_hz_biwgt_4t_8x6_msa(const uint8_t *src0_ptr, const int8_t *filter, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { int32_t weight, offset, constant; @@ -3078,7 +3036,7 @@ static void hevc_hz_biwgt_4t_8x6_msa(const uint8_t *src0_ptr, filter_vec = LD_SH(filter); SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3133,8 +3091,7 @@ static void hevc_hz_biwgt_4t_8x4multiple_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -3154,7 +3111,7 @@ static void hevc_hz_biwgt_4t_8x4multiple_msa(const uint8_t *src0_ptr, filter_vec = LD_SH(filter); SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3203,23 +3160,22 @@ static void hevc_hz_biwgt_4t_8w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { if (2 == height) { hevc_hz_biwgt_4t_8x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } else if (6 == height) { hevc_hz_biwgt_4t_8x6_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } else if (0 == (height % 4)) { hevc_hz_biwgt_4t_8x4multiple_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, height, - weight0, weight1, offset0, offset1, + weight0, weight1, offset, rnd_val); } } @@ -3234,8 +3190,7 @@ static void hevc_hz_biwgt_4t_12w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -3258,7 +3213,7 @@ static void hevc_hz_biwgt_4t_12w_msa(const uint8_t *src0_ptr, filter_vec = LD_SH(filter); SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3320,8 +3275,7 @@ static void hevc_hz_biwgt_4t_16w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -3341,7 +3295,7 @@ static void hevc_hz_biwgt_4t_16w_msa(const uint8_t *src0_ptr, filter_vec = LD_SH(filter); SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3409,8 +3363,7 @@ static void hevc_hz_biwgt_4t_24w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -3430,7 +3383,7 @@ static void hevc_hz_biwgt_4t_24w_msa(const uint8_t *src0_ptr, filter_vec = LD_SH(filter); SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3496,8 +3449,7 @@ static void hevc_hz_biwgt_4t_32w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -3517,7 +3469,7 @@ static void hevc_hz_biwgt_4t_32w_msa(const uint8_t *src0_ptr, filter_vec = LD_SH(filter); SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3568,8 +3520,7 @@ static void hevc_vt_biwgt_4t_4x2_msa(const uint8_t *src0_ptr, const int8_t *filter, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { int32_t weight, offset, constant; @@ -3583,7 +3534,7 @@ static void hevc_vt_biwgt_4t_4x2_msa(const uint8_t *src0_ptr, src0_ptr -= src_stride; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3633,8 +3584,7 @@ static void hevc_vt_biwgt_4t_4x4_msa(const uint8_t *src0_ptr, const int8_t *filter, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { int32_t weight, offset, constant; @@ -3649,7 +3599,7 @@ static void hevc_vt_biwgt_4t_4x4_msa(const uint8_t *src0_ptr, src0_ptr -= src_stride; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3701,8 +3651,7 @@ static void hevc_vt_biwgt_4t_4x8multiple_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -3719,7 +3668,7 @@ static void hevc_vt_biwgt_4t_4x8multiple_msa(const uint8_t *src0_ptr, src0_ptr -= src_stride; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3787,23 +3736,22 @@ static void hevc_vt_biwgt_4t_4w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { if (2 == height) { hevc_vt_biwgt_4t_4x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } else if (4 == height) { hevc_vt_biwgt_4t_4x4_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } else if (0 == (height % 8)) { hevc_vt_biwgt_4t_4x8multiple_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, height, - weight0, weight1, offset0, offset1, + weight0, weight1, offset, rnd_val); } } @@ -3818,8 +3766,7 @@ static void hevc_vt_biwgt_4t_6w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -3835,7 +3782,7 @@ static void hevc_vt_biwgt_4t_6w_msa(const uint8_t *src0_ptr, src0_ptr -= src_stride; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3924,8 +3871,7 @@ static void hevc_vt_biwgt_4t_8x2_msa(const uint8_t *src0_ptr, const int8_t *filter, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { int32_t offset, weight, constant; @@ -3938,7 +3884,7 @@ static void hevc_vt_biwgt_4t_8x2_msa(const uint8_t *src0_ptr, src0_ptr -= src_stride; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -3981,8 +3927,7 @@ static void hevc_vt_biwgt_4t_8x6_msa(const uint8_t *src0_ptr, const int8_t *filter, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { int32_t offset, weight, constant; @@ -3997,7 +3942,7 @@ static void hevc_vt_biwgt_4t_8x6_msa(const uint8_t *src0_ptr, src0_ptr -= src_stride; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -4053,8 +3998,7 @@ static void hevc_vt_biwgt_4t_8x4multiple_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -4069,7 +4013,7 @@ static void hevc_vt_biwgt_4t_8x4multiple_msa(const uint8_t *src0_ptr, src0_ptr -= src_stride; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -4127,23 +4071,22 @@ static void hevc_vt_biwgt_4t_8w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { if (2 == height) { hevc_vt_biwgt_4t_8x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } else if (6 == height) { hevc_vt_biwgt_4t_8x6_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } else { hevc_vt_biwgt_4t_8x4multiple_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter, height, - weight0, weight1, offset0, offset1, + weight0, weight1, offset, rnd_val); } } @@ -4158,8 +4101,7 @@ static void hevc_vt_biwgt_4t_12w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -4176,7 +4118,7 @@ static void hevc_vt_biwgt_4t_12w_msa(const uint8_t *src0_ptr, src0_ptr -= (1 * src_stride); - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -4250,8 +4192,7 @@ static void hevc_vt_biwgt_4t_16w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -4267,7 +4208,7 @@ static void hevc_vt_biwgt_4t_16w_msa(const uint8_t *src0_ptr, src0_ptr -= src_stride; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -4344,8 +4285,7 @@ static void hevc_vt_biwgt_4t_24w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -4363,7 +4303,7 @@ static void hevc_vt_biwgt_4t_24w_msa(const uint8_t *src0_ptr, src0_ptr -= src_stride; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -4481,8 +4421,7 @@ static void hevc_vt_biwgt_4t_32w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -4501,7 +4440,7 @@ static void hevc_vt_biwgt_4t_32w_msa(const uint8_t *src0_ptr, src0_ptr -= src_stride; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); constant = 128 * weight1; @@ -4600,8 +4539,7 @@ static void hevc_hv_biwgt_4t_4x2_msa(const uint8_t *src0_ptr, const int8_t *filter_y, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint64_t tp0, tp1; @@ -4630,7 +4568,7 @@ static void hevc_hv_biwgt_4t_4x2_msa(const uint8_t *src0_ptr, mask1 = mask0 + 2; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -4684,8 +4622,7 @@ static void hevc_hv_biwgt_4t_4x4_msa(const uint8_t *src0_ptr, const int8_t *filter_y, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint64_t tp0, tp1; @@ -4717,7 +4654,7 @@ static void hevc_hv_biwgt_4t_4x4_msa(const uint8_t *src0_ptr, mask1 = mask0 + 2; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -4782,8 +4719,7 @@ static void hevc_hv_biwgt_4t_4multx8mult_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -4818,7 +4754,7 @@ static void hevc_hv_biwgt_4t_4multx8mult_msa(const uint8_t *src0_ptr, mask1 = mask0 + 2; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -4927,24 +4863,23 @@ static void hevc_hv_biwgt_4t_4w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { if (2 == height) { hevc_hv_biwgt_4t_4x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } else if (4 == height) { hevc_hv_biwgt_4t_4x4_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } else if (0 == (height % 8)) { hevc_hv_biwgt_4t_4multx8mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, height, weight0, weight1, - offset0, offset1, rnd_val); + offset, rnd_val); } } @@ -4959,8 +4894,7 @@ static void hevc_hv_biwgt_4t_6w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t tpw0, tpw1, tpw2, tpw3; @@ -4998,7 +4932,7 @@ static void hevc_hv_biwgt_4t_6w_msa(const uint8_t *src0_ptr, mask1 = mask0 + 2; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -5141,8 +5075,7 @@ static void hevc_hv_biwgt_4t_8x2_msa(const uint8_t *src0_ptr, const int8_t *filter_y, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { int32_t weight, offset; @@ -5174,7 +5107,7 @@ static void hevc_hv_biwgt_4t_8x2_msa(const uint8_t *src0_ptr, mask1 = mask0 + 2; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -5237,8 +5170,7 @@ static void hevc_hv_biwgt_4t_8multx4_msa(const uint8_t *src0_ptr, const int8_t *filter_y, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val, int32_t width8mult) { @@ -5269,7 +5201,7 @@ static void hevc_hv_biwgt_4t_8multx4_msa(const uint8_t *src0_ptr, mask0 = LD_SB(ff_hevc_mask_arr); mask1 = mask0 + 2; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -5362,8 +5294,7 @@ static void hevc_hv_biwgt_4t_8x6_msa(const uint8_t *src0_ptr, const int8_t *filter_y, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t offset, weight; @@ -5400,7 +5331,7 @@ static void hevc_hv_biwgt_4t_8x6_msa(const uint8_t *src0_ptr, mask1 = mask0 + 2; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -5514,8 +5445,7 @@ static void hevc_hv_biwgt_4t_8multx4mult_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val, int32_t width) { @@ -5554,7 +5484,7 @@ static void hevc_hv_biwgt_4t_8multx4mult_msa(const uint8_t *src0_ptr, mask1 = mask0 + 2; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -5665,29 +5595,28 @@ static void hevc_hv_biwgt_4t_8w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { if (2 == height) { hevc_hv_biwgt_4t_8x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } else if (4 == height) { hevc_hv_biwgt_4t_8multx4_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, - filter_y, weight0, weight1, offset0, - offset1, rnd_val, 1); + filter_y, weight0, weight1, offset, + rnd_val, 1); } else if (6 == height) { hevc_hv_biwgt_4t_8x6_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, - weight0, weight1, offset0, offset1, rnd_val); + weight0, weight1, offset, rnd_val); } else if (0 == (height % 4)) { hevc_hv_biwgt_4t_8multx4mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, height, weight0, - weight1, offset0, offset1, rnd_val, 8); + weight1, offset, rnd_val, 8); } } @@ -5702,8 +5631,7 @@ static void hevc_hv_biwgt_4t_12w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { uint32_t loop_cnt; @@ -5741,7 +5669,7 @@ static void hevc_hv_biwgt_4t_12w_msa(const uint8_t *src0_ptr, mask0 = LD_SB(ff_hevc_mask_arr); mask1 = mask0 + 2; - offset = (offset0 + offset1) << rnd_val; + offset = offset << rnd_val; weight0 = weight0 & 0x0000FFFF; weight = weight0 | (weight1 << 16); @@ -5944,20 +5872,19 @@ static void hevc_hv_biwgt_4t_16w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { if (4 == height) { hevc_hv_biwgt_4t_8multx4_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, - filter_y, weight0, weight1, offset0, - offset1, rnd_val, 2); + filter_y, weight0, weight1, offset, + rnd_val, 2); } else { hevc_hv_biwgt_4t_8multx4mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, height, weight0, - weight1, offset0, offset1, rnd_val, 16); + weight1, offset, rnd_val, 16); } } @@ -5972,15 +5899,14 @@ static void hevc_hv_biwgt_4t_24w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_hv_biwgt_4t_8multx4mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, height, weight0, - weight1, offset0, offset1, rnd_val, 24); + weight1, offset, rnd_val, 24); } static void hevc_hv_biwgt_4t_32w_msa(const uint8_t *src0_ptr, @@ -5994,15 +5920,14 @@ static void hevc_hv_biwgt_4t_32w_msa(const uint8_t *src0_ptr, int32_t height, int32_t weight0, int32_t weight1, - int32_t offset0, - int32_t offset1, + int32_t offset, int32_t rnd_val) { hevc_hv_biwgt_4t_8multx4mult_msa(src0_ptr, src_stride, src1_ptr, src2_stride, dst, dst_stride, filter_x, filter_y, height, weight0, - weight1, offset0, offset1, rnd_val, 32); + weight1, offset, rnd_val, 32); } #define BI_W_MC_COPY(WIDTH) \ @@ -6015,8 +5940,7 @@ void ff_hevc_put_hevc_bi_w_pel_pixels##WIDTH##_8_msa(uint8_t *dst, \ int denom, \ int weight0, \ int weight1, \ - int offset0, \ - int offset1, \ + int offset, \ intptr_t mx, \ intptr_t my, \ int width) \ @@ -6026,8 +5950,7 @@ void ff_hevc_put_hevc_bi_w_pel_pixels##WIDTH##_8_msa(uint8_t *dst, \ \ hevc_biwgt_copy_##WIDTH##w_msa(src, src_stride, src_16bit, MAX_PB_SIZE, \ dst, dst_stride, height, \ - weight0, weight1, offset0, \ - offset1, log2Wd); \ + weight0, weight1, offset, log2Wd); \ } BI_W_MC_COPY(4); @@ -6054,8 +5977,7 @@ void ff_hevc_put_hevc_bi_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst, \ int denom, \ int weight0, \ int weight1, \ - int offset0, \ - int offset1, \ + int offset, \ intptr_t mx, \ intptr_t my, \ int width) \ @@ -6066,8 +5988,7 @@ void ff_hevc_put_hevc_bi_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst, \ hevc_##DIR1##_biwgt_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit, \ MAX_PB_SIZE, dst, dst_stride, \ filter, height, weight0, \ - weight1, offset0, offset1, \ - log2Wd); \ + weight1, offset, log2Wd); \ } BI_W_MC(qpel, h, 4, 8, hz, mx); @@ -6116,8 +6037,7 @@ void ff_hevc_put_hevc_bi_w_##PEL##_hv##WIDTH##_8_msa(uint8_t *dst, \ int denom, \ int weight0, \ int weight1, \ - int offset0, \ - int offset1, \ + int offset, \ intptr_t mx, \ intptr_t my, \ int width) \ @@ -6129,8 +6049,8 @@ void ff_hevc_put_hevc_bi_w_##PEL##_hv##WIDTH##_8_msa(uint8_t *dst, \ hevc_hv_biwgt_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit, \ MAX_PB_SIZE, dst, dst_stride, \ filter_x, filter_y, height, \ - weight0, weight1, offset0, \ - offset1, log2Wd); \ + weight0, weight1, offset, \ + log2Wd); \ } BI_W_MC_HV(qpel, 4, 8); diff --git a/libavcodec/x86/hevc/dsp.h b/libavcodec/x86/hevc/dsp.h index 0062699ce0..69d3987cdb 100644 --- a/libavcodec/x86/hevc/dsp.h +++ b/libavcodec/x86/hevc/dsp.h @@ -41,7 +41,7 @@ bi_pel_func ff_hevc_put_bi_ ## name ## W ## _ ## D ## _##opt #define WEIGHTING_PROTOTYPE(width, bitd, opt) \ void ff_hevc_put_uni_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, int height, int denom, int _wx, int _ox); \ -void ff_hevc_put_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, const int16_t *_src2, int height, int denom, int _wx0, int _wx1, int _ox0, int _ox1) +void ff_hevc_put_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t dststride, const int16_t *_src, const int16_t *_src2, int height, int denom, int wx0, int wx1, int ox) #define WEIGHTING_PROTOTYPES(bitd, opt) \ WEIGHTING_PROTOTYPE(4, bitd, opt); \ diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c index bd967eac67..ca3962b3f2 100644 --- a/libavcodec/x86/hevc/dsp_init.c +++ b/libavcodec/x86/hevc/dsp_init.c @@ -575,7 +575,7 @@ mc_rep_uni_w(12, 8, 64, sse4) #define mc_rep_bi_w(bitd, step, W, opt) \ void ff_hevc_put_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, const int16_t *_src, \ const int16_t *_src2, int height, \ - int denom, int _wx0, int _wx1, int _ox0, int _ox1) \ + int denom, int wx0, int wx1, int ox) \ { \ int i; \ uint8_t *dst; \ @@ -584,7 +584,7 @@ void ff_hevc_put_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, co const int16_t *src2 = _src2 + i; \ dst = _dst + (i * ((bitd + 7) / 8)); \ ff_hevc_put_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2, \ - height, denom, _wx0, _wx1, _ox0, _ox1); \ + height, denom, wx0, wx1, ox); \ } \ } @@ -672,13 +672,13 @@ static void hevc_put_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _d const uint8_t *_src, ptrdiff_t _srcstride, \ const int16_t *_src2, \ int height, int denom, \ - int _wx0, int _wx1, int _ox0, int _ox1, \ + int wx0, int wx1, int ox, \ intptr_t mx, intptr_t my, int width) \ { \ LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]); \ hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, my, width); \ ff_hevc_put_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2, \ - height, denom, _wx0, _wx1, _ox0, _ox1); \ + height, denom, wx0, wx1, ox); \ } #define mc_bi_w_funcs(name, bitd, opt) \ diff --git a/libavcodec/x86/hevc/mc.asm b/libavcodec/x86/hevc/mc.asm index 66ed406c26..e76807c6e2 100644 --- a/libavcodec/x86/hevc/mc.asm +++ b/libavcodec/x86/hevc/mc.asm @@ -1090,7 +1090,7 @@ cglobal hevc_put_uni_w%1_%2, 6, 6, 7, dst, dststride, src, height, denom, wx, ox jnz .loop ; height loop RET -cglobal hevc_put_bi_w%1_%2, 4, 6, 6, dst, dststride, src, src2, height, denom, wx0, wx1, ox0, ox1 +cglobal hevc_put_bi_w%1_%2, 4, 6, 6, dst, dststride, src, src2, height, denom, wx0, wx1, ox movifnidn r5d, denomm movd m3, wx0m ; WX0 lea r5d, [r5d+14-%2] ; shift = 14 - bitd + denom @@ -1100,8 +1100,7 @@ cglobal hevc_put_bi_w%1_%2, 4, 6, 6, dst, dststride, src, src2, height, denom, w inc r5d movd m5, r5d ; shift+1 pshufd m2, m2, 0 - mov r5d, ox0m - add r5d, ox1m + mov r5d, oxm %if %2 != 8 shl r5d, %2-8 ; ox << (bitd - 8) %endif diff --git a/tests/checkasm/hevc_pel.c b/tests/checkasm/hevc_pel.c index e89facb1dc..000d1e7a57 100644 --- a/tests/checkasm/hevc_pel.c +++ b/tests/checkasm/hevc_pel.c @@ -298,7 +298,7 @@ static void checkasm_check_hevc_qpel_bi_w(void) declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, - int ox0, int ox1, intptr_t mx, intptr_t my, int width); + int ox, intptr_t mx, intptr_t my, int width); for (bit_depth = 8; bit_depth <= 12; bit_depth++) { ff_hevc_dsp_init(&h, bit_depth); @@ -324,16 +324,16 @@ static void checkasm_check_hevc_qpel_bi_w(void) CLEAR_PIXEL_RECT(dst1); call_ref(dst0, dst0_stride, src0, sizes[idx] * SIZEOF_PIXEL, - ref0, sizes[idx], *denom, *wx, *wx, *ox, *ox, i, j, sizes[idx]); + ref0, sizes[idx], *denom, *wx, *wx, *ox + *ox, i, j, sizes[idx]); call_new(dst1, dst1_stride, src1, sizes[idx] * SIZEOF_PIXEL, - ref1, sizes[idx], *denom, *wx, *wx, *ox, *ox, i, j, sizes[idx]); + ref1, sizes[idx], *denom, *wx, *wx, *ox + *ox, i, j, sizes[idx]); checkasm_check_pixel_padded(dst0, dst0_stride, dst1, dst1_stride, sizes[idx], sizes[idx], "dst"); bench_new(dst1, dst1_stride, src1, sizes[idx] * SIZEOF_PIXEL, - ref1, sizes[idx], *denom, *wx, *wx, *ox, *ox, i, j, sizes[idx]); + ref1, sizes[idx], *denom, *wx, *wx, *ox + *ox, i, j, sizes[idx]); } } } @@ -571,7 +571,7 @@ static void checkasm_check_hevc_epel_bi_w(void) declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2, int height, int denom, int wx0, int wx1, - int ox0, int ox1, intptr_t mx, intptr_t my, int width); + int ox, intptr_t mx, intptr_t my, int width); for (bit_depth = 8; bit_depth <= 12; bit_depth++) { ff_hevc_dsp_init(&h, bit_depth); @@ -597,16 +597,16 @@ static void checkasm_check_hevc_epel_bi_w(void) CLEAR_PIXEL_RECT(dst1); call_ref(dst0, dst0_stride, src0, sizes[idx] * SIZEOF_PIXEL, - ref0, sizes[idx], *denom, *wx, *wx, *ox, *ox, i, j, sizes[idx]); + ref0, sizes[idx], *denom, *wx, *wx, *ox + *ox, i, j, sizes[idx]); call_new(dst1, dst1_stride, src1, sizes[idx] * SIZEOF_PIXEL, - ref1, sizes[idx], *denom, *wx, *wx, *ox, *ox, i, j, sizes[idx]); + ref1, sizes[idx], *denom, *wx, *wx, *ox + *ox, i, j, sizes[idx]); checkasm_check_pixel_padded(dst0, dst0_stride, dst1, dst1_stride, sizes[idx], sizes[idx], "dst"); bench_new(dst1, dst1_stride, src1, sizes[idx] * SIZEOF_PIXEL, - ref1, sizes[idx], *denom, *wx, *wx, *ox, *ox, i, j, sizes[idx]); + ref1, sizes[idx], *denom, *wx, *wx, *ox + *ox, i, j, sizes[idx]); } } } -- 2.52.0 >From 2d27de9aa1981aaaf643f25d5eadef11fd8c4f54 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt <[email protected]> Date: Wed, 24 Jun 2026 01:07:02 +0200 Subject: [PATCH 5/5] tests/checkasm/hevc_pel: Don't test impossible values denom is in 0..7 (see pred_weight_table() in hevcdec.c). Signed-off-by: Andreas Rheinhardt <[email protected]> --- tests/checkasm/hevc_pel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/checkasm/hevc_pel.c b/tests/checkasm/hevc_pel.c index 000d1e7a57..12d28eaccb 100644 --- a/tests/checkasm/hevc_pel.c +++ b/tests/checkasm/hevc_pel.c @@ -30,7 +30,7 @@ static const uint32_t pixel_mask[] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x07f static const uint32_t pixel_mask16[] = { 0x00ff00ff, 0x01ff01ff, 0x03ff03ff, 0x07ff07ff, 0x0fff0fff }; static const int sizes[] = { -1, 4, 6, 8, 12, 16, 24, 32, 48, 64 }; static const int weights[] = { 0, 128, 255, -1 }; -static const int denoms[] = {0, 7, 12, -1 }; +static const int denoms[] = {0, 7, -1 }; static const int offsets[] = {0, 255, -1 }; #define SIZEOF_PIXEL ((bit_depth + 7) / 8) -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
