mc}: Improve biweight prediction (PR #23571)

mkver via ffmpeg-devel Tue, 23 Jun 2026 16:26:05 -0700

PR #23571 opened by mkver
URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23571
Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23571.patch


I'd really appreciate if someone knowledgeable (like @mstorsjo) would check the 
apple aarch64 changes in the "Combine offsets early for biweight prediction" 
commit.


>From 63239b63b1f50e926556b6edae98fb3d515be8ee Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 23 Jun 2026 21:37:46 +0200
Subject: [PATCH 1/5] avcodec/x86/hevc/mc: Use pmaddwd in biweight functions

Improves performance and saves 384B of .text here;
also avoids using nonvolatile registers on Win64.

Old benchmarks (just a selection):
  put_hevc_qpel_bi_w_v16_8_c:           1158.1
  put_hevc_qpel_bi_w_v16_8_sse4:         214.4 ( 5.40x)
  put_hevc_qpel_bi_w_v16_10_c:          2193.3
  put_hevc_qpel_bi_w_v16_10_sse4:        318.5 ( 6.89x)
  put_hevc_qpel_bi_w_v16_12_c:          2188.5
  put_hevc_qpel_bi_w_v16_12_sse4:        317.8 ( 6.89x)
  put_hevc_qpel_bi_w_v24_8_c:           2940.5
  put_hevc_qpel_bi_w_v24_8_sse4:         502.9 ( 5.85x)
  put_hevc_qpel_bi_w_v24_10_c:          4557.1
  put_hevc_qpel_bi_w_v24_10_sse4:        686.0 ( 6.64x)
  put_hevc_qpel_bi_w_v24_12_c:          4557.1
  put_hevc_qpel_bi_w_v24_12_sse4:        688.4 ( 6.62x)
  put_hevc_qpel_bi_w_v32_8_c:           3753.0
  put_hevc_qpel_bi_w_v32_8_sse4:         817.8 ( 4.59x)
  put_hevc_qpel_bi_w_v32_10_c:          6504.2
  put_hevc_qpel_bi_w_v32_10_sse4:       1227.7 ( 5.30x)
  put_hevc_qpel_bi_w_v32_12_c:          6502.0
  put_hevc_qpel_bi_w_v32_12_sse4:       1230.8 ( 5.28x)
  put_hevc_qpel_bi_w_v48_8_c:           7756.0
  put_hevc_qpel_bi_w_v48_8_sse4:        1805.9 ( 4.29x)
  put_hevc_qpel_bi_w_v48_10_c:         12938.4
  put_hevc_qpel_bi_w_v48_10_sse4:       2690.5 ( 4.81x)
  put_hevc_qpel_bi_w_v48_12_c:         12934.1
  put_hevc_qpel_bi_w_v48_12_sse4:       2691.2 ( 4.81x)
  put_hevc_qpel_bi_w_v64_8_c:          13212.5
  put_hevc_qpel_bi_w_v64_8_sse4:        3183.9 ( 4.15x)
  put_hevc_qpel_bi_w_v64_10_c:         21520.0
  put_hevc_qpel_bi_w_v64_10_sse4:       4854.9 ( 4.43x)
  put_hevc_qpel_bi_w_v64_12_c:         21529.5
  put_hevc_qpel_bi_w_v64_12_sse4:       4860.9 ( 4.43x)

New benchmarks:
  put_hevc_qpel_bi_w_v16_8_c:           1159.1
  put_hevc_qpel_bi_w_v16_8_sse4:         176.7 ( 6.56x)
  put_hevc_qpel_bi_w_v16_10_c:          2196.5
  put_hevc_qpel_bi_w_v16_10_sse4:        279.9 ( 7.85x)
  put_hevc_qpel_bi_w_v16_12_c:          2189.3
  put_hevc_qpel_bi_w_v16_12_sse4:        280.5 ( 7.80x)
  put_hevc_qpel_bi_w_v24_8_c:           2940.4
  put_hevc_qpel_bi_w_v24_8_sse4:         417.5 ( 7.04x)
  put_hevc_qpel_bi_w_v24_10_c:          4553.1
  put_hevc_qpel_bi_w_v24_10_sse4:        605.9 ( 7.51x)
  put_hevc_qpel_bi_w_v24_12_c:          4573.9
  put_hevc_qpel_bi_w_v24_12_sse4:        605.6 ( 7.55x)
  put_hevc_qpel_bi_w_v32_8_c:           3752.2
  put_hevc_qpel_bi_w_v32_8_sse4:         668.2 ( 5.61x)
  put_hevc_qpel_bi_w_v32_10_c:          6482.2
  put_hevc_qpel_bi_w_v32_10_sse4:       1077.4 ( 6.02x)
  put_hevc_qpel_bi_w_v32_12_c:          6484.9
  put_hevc_qpel_bi_w_v32_12_sse4:       1088.0 ( 5.96x)
  put_hevc_qpel_bi_w_v48_8_c:           7765.1
  put_hevc_qpel_bi_w_v48_8_sse4:        1467.8 ( 5.29x)
  put_hevc_qpel_bi_w_v48_10_c:         12902.8
  put_hevc_qpel_bi_w_v48_10_sse4:       2356.6 ( 5.47x)
  put_hevc_qpel_bi_w_v48_12_c:         12931.1
  put_hevc_qpel_bi_w_v48_12_sse4:       2356.6 ( 5.49x)
  put_hevc_qpel_bi_w_v64_8_c:          13207.2
  put_hevc_qpel_bi_w_v64_8_sse4:        2624.9 ( 5.03x)
  put_hevc_qpel_bi_w_v64_10_c:         21542.3
  put_hevc_qpel_bi_w_v64_10_sse4:       4438.8 ( 4.85x)
  put_hevc_qpel_bi_w_v64_12_c:         21537.4
  put_hevc_qpel_bi_w_v64_12_sse4:       4359.5 ( 4.94x)

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/hevc/mc.asm | 38 +++++++++-----------------------------
 1 file changed, 9 insertions(+), 29 deletions(-)

diff --git a/libavcodec/x86/hevc/mc.asm b/libavcodec/x86/hevc/mc.asm
index 550f7a0e23..66ed406c26 100644
--- a/libavcodec/x86/hevc/mc.asm
+++ b/libavcodec/x86/hevc/mc.asm
@@ -1090,27 +1090,17 @@ cglobal hevc_put_uni_w%1_%2, 6, 6, 7, dst, dststride, 
src, height, denom, wx, ox
     jnz               .loop                      ; height loop
     RET
 
-cglobal hevc_put_bi_w%1_%2, 4, 6, 10, dst, dststride, src, src2, height, 
denom, wx0, wx1, ox0, ox1
+cglobal hevc_put_bi_w%1_%2, 4, 6, 6, dst, dststride, src, src2, height, denom, 
wx0, wx1, ox0, ox1
     movifnidn        r5d, denomm
-%if %1 <= 4
-    pxor              m1, m1
-%endif
-    movd              m2, wx0m         ; WX0
+    movd              m3, wx0m         ; WX0
     lea              r5d, [r5d+14-%2]  ; shift = 14 - bitd + denom
-    movd              m3, wx1m         ; WX1
+    movd              m2, wx1m         ; WX1
     movd              m0, r5d          ; shift
-%if %1 <= 4
-    punpcklwd         m2, m1
-    punpcklwd         m3, m1
-%else
-    punpcklwd         m2, m2
-    punpcklwd         m3, m3
-%endif
+    punpcklwd         m2, m3
     inc              r5d
     movd              m5, r5d          ; shift+1
     pshufd            m2, m2, 0
     mov              r5d, ox0m
-    pshufd            m3, m3, 0
     add              r5d, ox1m
 %if %2 != 8
     shl              r5d, %2-8         ; ox << (bitd - 8)
@@ -1128,26 +1118,16 @@ cglobal hevc_put_bi_w%1_%2, 4, 6, 10, dst, dststride, 
src, src2, height, denom,
 
 .loop:
    SIMPLE_LOAD        %1, 10, srcq,  m0
-   SIMPLE_LOAD        %1, 10, src2q, m8
+   SIMPLE_LOAD        %1, 10, src2q, m1
 %if %1 <= 4
     punpcklwd         m0, m1
-    punpcklwd         m8, m1
-    pmaddwd           m0, m3
-    pmaddwd           m8, m2
+    pmaddwd           m0, m2
     paddd             m0, m4
-    paddd             m0, m8
     psrad             m0, m5
 %else
-    pmulhw            m6, m0, m3
-    pmullw            m0, m3
-    pmulhw            m7, m8, m2
-    pmullw            m8, m2
-    punpckhwd         m1, m0, m6
-    punpcklwd         m0, m6
-    punpckhwd         m9, m8, m7
-    punpcklwd         m8, m7
-    paddd             m0, m8
-    paddd             m1, m9
+    SBUTTERFLY        wd, 0, 1, 3
+    pmaddwd           m0, m2
+    pmaddwd           m1, m2
     paddd             m0, m4
     paddd             m1, m4
     psrad             m0, m5
-- 
2.52.0


>From 4b1cc171ac01fcf79ce2965bfb70e2743f66c642 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 23 Jun 2026 21:58:47 +0200
Subject: [PATCH 2/5] avcodec/hevc/hevcdec: Remove redundant clipping

Forgotten in f82dd4c09b2decb033f1e339d4be81efd38554f1.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/hevc/hevcdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
index ae064ec8af..f00acd07bf 100644
--- a/libavcodec/hevc/hevcdec.c
+++ b/libavcodec/hevc/hevcdec.c
@@ -185,7 +185,7 @@ static int pred_weight_table(SliceHeader *sh, void *logctx,
         av_log(logctx, AV_LOG_ERROR, "luma_log2_weight_denom %d is invalid\n", 
luma_log2_weight_denom);
         return AVERROR_INVALIDDATA;
     }
-    sh->luma_log2_weight_denom = av_clip_uintp2(luma_log2_weight_denom, 3);
+    sh->luma_log2_weight_denom = luma_log2_weight_denom;
     if (sps->chroma_format_idc != 0) {
         int64_t chroma_log2_weight_denom = luma_log2_weight_denom + 
(int64_t)get_se_golomb(gb);
         if (chroma_log2_weight_denom < 0 || chroma_log2_weight_denom > 7) {
-- 
2.52.0


>From cd0ae4c1367f3f1b1782d62864263f8ae9206ae3 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Tue, 23 Jun 2026 23:28:56 +0200
Subject: [PATCH 3/5] avcodec/hevc/dsp: Fix epel_bi_w parameter names

It uses the same order as qpel_bi_w.

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/hevc/dsp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/hevc/dsp.h b/libavcodec/hevc/dsp.h
index b884cd36be..cfd43e753e 100644
--- a/libavcodec/hevc/dsp.h
+++ b/libavcodec/hevc/dsp.h
@@ -97,7 +97,7 @@ typedef struct HEVCDSPContext {
                                        int height, intptr_t mx, intptr_t my, 
int width);
     void (*put_hevc_epel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride,
                                          const uint8_t *_src, ptrdiff_t 
_srcstride, const int16_t *src2,
-                                         int height, int denom, int wx0, int 
ox0, int wx1,
+                                         int height, int denom, int wx0, int 
wx1, int ox0,
                                          int ox1, intptr_t mx, intptr_t my, 
int width);
 
     void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
-- 
2.52.0


>From 634191de2c9a35c1875985e45bff1583c0504d77 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 24 Jun 2026 00:14:37 +0200
Subject: [PATCH 4/5] avcodec/hevc/dsp: Combine offsets early for biweight
 prediction

Only the sum of the offsets is ever used
(see equation 8-279 in the 2019 version of the H.265 spec).

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/aarch64/h26x/dsp.h       |   2 +-
 libavcodec/aarch64/h26x/epel_neon.S |  44 ++-
 libavcodec/hevc/dsp.h               |   6 +-
 libavcodec/hevc/dsp_template.c      |  49 ++--
 libavcodec/hevc/hevcdec.c           |   4 +-
 libavcodec/mips/hevc_mc_biw_msa.c   | 440 ++++++++++++----------------
 libavcodec/x86/hevc/dsp.h           |   2 +-
 libavcodec/x86/hevc/dsp_init.c      |   8 +-
 libavcodec/x86/hevc/mc.asm          |   5 +-
 tests/checkasm/hevc_pel.c           |  16 +-
 10 files changed, 240 insertions(+), 336 deletions(-)

diff --git a/libavcodec/aarch64/h26x/dsp.h b/libavcodec/aarch64/h26x/dsp.h
index 47a61d22c2..0cbbdc3157 100644
--- a/libavcodec/aarch64/h26x/dsp.h
+++ b/libavcodec/aarch64/h26x/dsp.h
@@ -95,7 +95,7 @@ NEON8_FNPROTO(pel_bi_pixels, (uint8_t *dst, ptrdiff_t 
dststride,
 NEON8_FNPROTO(pel_bi_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
         const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
         int height, int denom, int wx0, int wx1,
-        int ox0, int ox1, intptr_t mx, intptr_t my, int width),);
+        int ox, intptr_t mx, intptr_t my, int width),);
 
 NEON8_FNPROTO(epel_bi_h, (uint8_t *dst, ptrdiff_t dststride,
         const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
diff --git a/libavcodec/aarch64/h26x/epel_neon.S 
b/libavcodec/aarch64/h26x/epel_neon.S
index ee584705de..22f58ba5fa 100644
--- a/libavcodec/aarch64/h26x/epel_neon.S
+++ b/libavcodec/aarch64/h26x/epel_neon.S
@@ -476,12 +476,11 @@ endfunc
 .macro load_bi_w_pixels_param
         ldrsw           x8, [sp]            // wx1
 #if defined(__APPLE__)
-        ldpsw           x9, x10, [sp, #4]   // ox0, ox1
-        ldrsw           x11, [sp, #32]      // width
+        ldrsw           x9, [sp, #4]        // ox
+        ldrsw           x11, [sp, #24]      // width
 #else
-        ldrsw           x9, [sp, #8]        // ox0
-        ldrsw           x10, [sp, #16]      // ox1
-        ldrsw           x11, [sp, #40]      // width
+        ldrsw           x9, [sp, #8]        // ox
+        ldrsw           x11, [sp, #32]      // width
 #endif
 .endm
 
@@ -490,13 +489,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels4_8_neon, 
export=1
         add             w6, w6, #6              // log2Wd
         dup             v0.8h, w7               // wx0
         dup             v1.8h, w8               // wx1
-        add             w9, w9, w10
-        add             w9, w9, #1              // ox0 + ox1 + 1
+        add             w9, w9, #1              // ox + 1
         lsl             w9, w9, w6
         add             w7, w6, #1              // (log2Wd + 1)
         mov             x8, #(2 * HEVC_MAX_PB_SIZE)
         neg             w7, w7
-        dup             v2.4s, w9               // (ox0 + ox1 + 1) << logwWd
+        dup             v2.4s, w9               // (ox + 1) << logwWd
         dup             v6.4s, w7               // -(log2Wd + 1)
 1:
         ld1             {v4.8b}, [x2], x3       // load src
@@ -519,13 +517,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels6_8_neon, 
export=1
         add             w6, w6, #6              // log2Wd
         dup             v0.8h, w7               // wx0
         dup             v1.8h, w8               // wx1
-        add             w9, w9, w10
-        add             w9, w9, #1              // ox0 + ox1 + 1
+        add             w9, w9, #1              // ox + 1
         lsl             w9, w9, w6
         add             w7, w6, #1              // (log2Wd + 1)
         mov             x8, #(2 * HEVC_MAX_PB_SIZE)
         neg             w7, w7
-        dup             v2.4s, w9               // (ox0 + ox1 + 1) << logwWd
+        dup             v2.4s, w9               // (ox + 1) << logwWd
         dup             v6.4s, w7               // -(log2Wd + 1)
         sub             x1, x1, #4
 1:
@@ -555,13 +552,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels8_8_neon, 
export=1
         add             w6, w6, #6              // log2Wd
         dup             v0.8h, w7               // wx0
         dup             v1.8h, w8               // wx1
-        add             w9, w9, w10
-        add             w9, w9, #1              // ox0 + ox1 + 1
+        add             w9, w9, #1              // ox + 1
         lsl             w9, w9, w6
         add             w7, w6, #1              // (log2Wd + 1)
         mov             x8, #(2 * HEVC_MAX_PB_SIZE)
         neg             w7, w7
-        dup             v2.4s, w9               // (ox0 + ox1 + 1) << logwWd
+        dup             v2.4s, w9               // (ox + 1) << logwWd
         dup             v6.4s, w7               // -(log2Wd + 1)
 1:
         ld1             {v4.8b}, [x2], x3       // load src
@@ -589,13 +585,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels12_8_neon, 
export=1
         add             w6, w6, #6              // log2Wd
         dup             v0.8h, w7               // wx0
         dup             v1.8h, w8               // wx1
-        add             w9, w9, w10
-        add             w9, w9, #1              // ox0 + ox1 + 1
+        add             w9, w9, #1              // ox + 1
         lsl             w9, w9, w6
         add             w7, w6, #1              // (log2Wd + 1)
         mov             x8, #(2 * HEVC_MAX_PB_SIZE)
         neg             w7, w7
-        dup             v2.4s, w9               // (ox0 + ox1 + 1) << logwWd
+        dup             v2.4s, w9               // (ox + 1) << logwWd
         dup             v6.4s, w7               // -(log2Wd + 1)
         sub             x1, x1, #8
 1:
@@ -637,13 +632,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels16_8_neon, 
export=1
         add             w6, w6, #6              // log2Wd
         dup             v0.8h, w7               // wx0
         dup             v1.8h, w8               // wx1
-        add             w9, w9, w10
-        add             w9, w9, #1              // ox0 + ox1 + 1
+        add             w9, w9, #1              // ox + 1
         lsl             w9, w9, w6
         add             w7, w6, #1              // (log2Wd + 1)
         mov             x8, #(2 * HEVC_MAX_PB_SIZE)
         neg             w7, w7
-        dup             v2.4s, w9               // (ox0 + ox1 + 1) << logwWd
+        dup             v2.4s, w9               // (ox + 1) << logwWd
         dup             v6.4s, w7               // -(log2Wd + 1)
 1:
         ld1             {v24.16b}, [x2], x3          // load src
@@ -688,13 +682,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels24_8_neon, 
export=1
         add             w6, w6, #6              // log2Wd
         dup             v0.8h, w7               // wx0
         dup             v1.8h, w8               // wx1
-        add             w9, w9, w10
-        add             w9, w9, #1              // ox0 + ox1 + 1
+        add             w9, w9, #1              // ox + 1
         lsl             w9, w9, w6
         add             w7, w6, #1              // (log2Wd + 1)
         mov             x8, #(2 * HEVC_MAX_PB_SIZE)
         neg             w7, w7
-        dup             v2.4s, w9               // (ox0 + ox1 + 1) << logwWd
+        dup             v2.4s, w9               // (ox + 1) << logwWd
         dup             v6.4s, w7               // -(log2Wd + 1)
         mov             x7, #24
         sub             x3, x3, x11
@@ -765,13 +758,12 @@ function ff_hevc_put_hevc_pel_bi_w_pixels32_8_neon, 
export=1
         add             w6, w6, #6              // log2Wd
         dup             v0.8h, w7               // wx0
         dup             v1.8h, w8               // wx1
-        add             w9, w9, w10
-        add             w9, w9, #1              // ox0 + ox1 + 1
+        add             w9, w9, #1              // ox + 1
         lsl             w9, w9, w6
         add             w7, w6, #1              // (log2Wd + 1)
         mov             x8, #(2 * HEVC_MAX_PB_SIZE)
         neg             w7, w7
-        dup             v2.4s, w9               // (ox0 + ox1 + 1) << logwWd
+        dup             v2.4s, w9               // (ox + 1) << logwWd
         dup             v6.4s, w7               // -(log2Wd + 1)
         sub             x3, x3, x11
         sub             x8, x8, x11, lsl #1
diff --git a/libavcodec/hevc/dsp.h b/libavcodec/hevc/dsp.h
index cfd43e753e..b66eb148dc 100644
--- a/libavcodec/hevc/dsp.h
+++ b/libavcodec/hevc/dsp.h
@@ -84,7 +84,7 @@ typedef struct HEVCDSPContext {
     void (*put_hevc_qpel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride,
                                          const uint8_t *_src, ptrdiff_t 
_srcstride, const int16_t *src2,
                                          int height, int denom, int wx0, int 
wx1,
-                                         int ox0, int ox1, intptr_t mx, 
intptr_t my, int width);
+                                         int ox, intptr_t mx, intptr_t my, int 
width);
     void (*put_hevc_epel[10][2][2])(int16_t *dst, const uint8_t *src, 
ptrdiff_t srcstride,
                                     int height, intptr_t mx, intptr_t my, int 
width);
 
@@ -97,8 +97,8 @@ typedef struct HEVCDSPContext {
                                        int height, intptr_t mx, intptr_t my, 
int width);
     void (*put_hevc_epel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride,
                                          const uint8_t *_src, ptrdiff_t 
_srcstride, const int16_t *src2,
-                                         int height, int denom, int wx0, int 
wx1, int ox0,
-                                         int ox1, intptr_t mx, intptr_t my, 
int width);
+                                         int height, int denom, int wx0, int 
wx1, int ox,
+                                         intptr_t mx, intptr_t my, int width);
 
     void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
                                     int beta, const int32_t *tc,
diff --git a/libavcodec/hevc/dsp_template.c b/libavcodec/hevc/dsp_template.c
index d902a0ca6b..8984fad69f 100644
--- a/libavcodec/hevc/dsp_template.c
+++ b/libavcodec/hevc/dsp_template.c
@@ -394,7 +394,7 @@ static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, 
ptrdiff_t _dststride, co
 static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t 
_dststride, const uint8_t *_src, ptrdiff_t _srcstride,
                                            const int16_t *src2,
                                            int height, int denom, int wx0, int 
wx1,
-                                           int ox0, int ox1, intptr_t mx, 
intptr_t my, int width)
+                                           int ox, intptr_t mx, intptr_t my, 
int width)
 {
     int x, y;
     const pixel *src    = (const pixel *)_src;
@@ -405,11 +405,10 @@ static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, 
ptrdiff_t _dststride,
     int shift = 14  + 1 - BIT_DEPTH;
     int log2Wd = denom + shift - 1;
 
-    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
-    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    ox = ox * (1 << (BIT_DEPTH - 8)) + 1;
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++) {
-            dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + 
src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1));
+            dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + 
src2[x] * wx0 + ox * (1 << log2Wd)) >> (log2Wd + 1));
         }
         src  += srcstride;
         dst  += dststride;
@@ -529,7 +528,7 @@ static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, 
ptrdiff_t _dststride,
 static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
                                        const uint8_t *_src, ptrdiff_t 
_srcstride, const int16_t *src2,
                                        int height, int denom, int wx0, int wx1,
-                                       int ox0, int ox1, intptr_t mx, intptr_t 
my, int width)
+                                       int ox, intptr_t mx, intptr_t my, int 
width)
 {
     int x, y;
     const pixel  *src       = (const pixel*)_src;
@@ -542,12 +541,11 @@ static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, 
ptrdiff_t _dststride,
     int shift = 14  + 1 - BIT_DEPTH;
     int log2Wd = denom + shift - 1;
 
-    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
-    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    ox = ox * (1 << (BIT_DEPTH - 8)) + 1;
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
             dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * 
wx1 + src2[x] * wx0 +
-                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> 
(log2Wd + 1));
+                                    ox * (1 << log2Wd)) >> (log2Wd + 1));
         src  += srcstride;
         dst  += dststride;
         src2 += MAX_PB_SIZE;
@@ -557,7 +555,7 @@ static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, 
ptrdiff_t _dststride,
 static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
                                        const uint8_t *_src, ptrdiff_t 
_srcstride, const int16_t *src2,
                                        int height, int denom, int wx0, int wx1,
-                                       int ox0, int ox1, intptr_t mx, intptr_t 
my, int width)
+                                       int ox, intptr_t mx, intptr_t my, int 
width)
 {
     int x, y;
     const pixel  *src       = (const pixel*)_src;
@@ -570,12 +568,11 @@ static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, 
ptrdiff_t _dststride,
     int shift = 14 + 1 - BIT_DEPTH;
     int log2Wd = denom + shift - 1;
 
-    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
-    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    ox = ox * (1 << (BIT_DEPTH - 8)) + 1;
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
             dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH 
- 8)) * wx1 + src2[x] * wx0 +
-                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> 
(log2Wd + 1));
+                                    ox * (1 << log2Wd)) >> (log2Wd + 1));
         src  += srcstride;
         dst  += dststride;
         src2 += MAX_PB_SIZE;
@@ -585,7 +582,7 @@ static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, 
ptrdiff_t _dststride,
 static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
                                         const uint8_t *_src, ptrdiff_t 
_srcstride, const int16_t *src2,
                                         int height, int denom, int wx0, int 
wx1,
-                                        int ox0, int ox1, intptr_t mx, 
intptr_t my, int width)
+                                        int ox, intptr_t mx, intptr_t my, int 
width)
 {
     int x, y;
     const int8_t *filter;
@@ -610,12 +607,11 @@ static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, 
ptrdiff_t _dststride,
     tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
     filter = ff_hevc_qpel_filters[my];
 
-    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
-    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    ox = ox * (1 << (BIT_DEPTH - 8)) + 1;
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
             dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 
+ src2[x] * wx0 +
-                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> 
(log2Wd + 1));
+                                    ox * (1 << log2Wd)) >> (log2Wd + 1));
         tmp  += MAX_PB_SIZE;
         dst  += dststride;
         src2 += MAX_PB_SIZE;
@@ -727,7 +723,7 @@ static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, 
ptrdiff_t _dststride,
 static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride,
                                        const uint8_t *_src, ptrdiff_t 
_srcstride, const int16_t *src2,
                                        int height, int denom, int wx0, int wx1,
-                                       int ox0, int ox1, intptr_t mx, intptr_t 
my, int width)
+                                       int ox, intptr_t mx, intptr_t my, int 
width)
 {
     int x, y;
     const pixel *src = (const pixel *)_src;
@@ -738,12 +734,11 @@ static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, 
ptrdiff_t _dststride,
     int shift = 14 + 1 - BIT_DEPTH;
     int log2Wd = denom + shift - 1;
 
-    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
-    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    ox = ox * (1 << (BIT_DEPTH - 8)) + 1;
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
             dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * 
wx1 + src2[x] * wx0 +
-                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> 
(log2Wd + 1));
+                                    ox * (1 << log2Wd)) >> (log2Wd + 1));
         src  += srcstride;
         dst  += dststride;
         src2 += MAX_PB_SIZE;
@@ -753,7 +748,7 @@ static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, 
ptrdiff_t _dststride,
 static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride,
                                        const uint8_t *_src, ptrdiff_t 
_srcstride, const int16_t *src2,
                                        int height, int denom, int wx0, int wx1,
-                                       int ox0, int ox1, intptr_t mx, intptr_t 
my, int width)
+                                       int ox, intptr_t mx, intptr_t my, int 
width)
 {
     int x, y;
     const pixel *src = (const pixel *)_src;
@@ -764,12 +759,11 @@ static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, 
ptrdiff_t _dststride,
     int shift = 14 + 1 - BIT_DEPTH;
     int log2Wd = denom + shift - 1;
 
-    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
-    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    ox = ox * (1 << (BIT_DEPTH - 8)) + 1;
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
             dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH 
- 8)) * wx1 + src2[x] * wx0 +
-                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> 
(log2Wd + 1));
+                                    ox * (1 << log2Wd)) >> (log2Wd + 1));
         src  += srcstride;
         dst  += dststride;
         src2 += MAX_PB_SIZE;
@@ -779,7 +773,7 @@ static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, 
ptrdiff_t _dststride,
 static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride,
                                         const uint8_t *_src, ptrdiff_t 
_srcstride, const int16_t *src2,
                                         int height, int denom, int wx0, int 
wx1,
-                                        int ox0, int ox1, intptr_t mx, 
intptr_t my, int width)
+                                        int ox, intptr_t mx, intptr_t my, int 
width)
 {
     int x, y;
     const pixel *src = (const pixel *)_src;
@@ -804,12 +798,11 @@ static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, 
ptrdiff_t _dststride,
     tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
     filter = ff_hevc_epel_filters[my];
 
-    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
-    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
+    ox = ox * (1 << (BIT_DEPTH - 8)) + 1;
     for (y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
             dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 
+ src2[x] * wx0 +
-                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> 
(log2Wd + 1));
+                                    ox * (1 << log2Wd)) >> (log2Wd + 1));
         tmp  += MAX_PB_SIZE;
         dst  += dststride;
         src2 += MAX_PB_SIZE;
diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
index f00acd07bf..b4c2d82e8d 100644
--- a/libavcodec/hevc/hevcdec.c
+++ b/libavcodec/hevc/hevcdec.c
@@ -1835,7 +1835,7 @@ static void luma_mc_bi(HEVCLocalContext *lc,
                                                          block_h, 
s->sh.luma_log2_weight_denom,
                                                          
s->sh.luma_weight_l0[current_mv->ref_idx[0]],
                                                          
s->sh.luma_weight_l1[current_mv->ref_idx[1]],
-                                                         
s->sh.luma_offset_l0[current_mv->ref_idx[0]],
+                                                         
s->sh.luma_offset_l0[current_mv->ref_idx[0]] +
                                                          
s->sh.luma_offset_l1[current_mv->ref_idx[1]],
                                                          mx1, my1, block_w);
 
@@ -2016,7 +2016,7 @@ static void chroma_mc_bi(HEVCLocalContext *lc,
                                                          
s->sh.chroma_log2_weight_denom,
                                                          
s->sh.chroma_weight_l0[current_mv->ref_idx[0]][cidx],
                                                          
s->sh.chroma_weight_l1[current_mv->ref_idx[1]][cidx],
-                                                         
s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx],
+                                                         
s->sh.chroma_offset_l0[current_mv->ref_idx[0]][cidx] +
                                                          
s->sh.chroma_offset_l1[current_mv->ref_idx[1]][cidx],
                                                          _mx1, _my1, block_w);
 }
diff --git a/libavcodec/mips/hevc_mc_biw_msa.c 
b/libavcodec/mips/hevc_mc_biw_msa.c
index 34be61c0dc..65f18f786e 100644
--- a/libavcodec/mips/hevc_mc_biw_msa.c
+++ b/libavcodec/mips/hevc_mc_biw_msa.c
@@ -88,8 +88,7 @@ static void hevc_biwgt_copy_4w_msa(const uint8_t *src0_ptr,
                                    int32_t height,
                                    int32_t weight0,
                                    int32_t weight1,
-                                   int32_t offset0,
-                                   int32_t offset1,
+                                   int32_t offset,
                                    int32_t rnd_val)
 {
     uint32_t loop_cnt, tp0, tp1, tp2, tp3;
@@ -102,7 +101,7 @@ static void hevc_biwgt_copy_4w_msa(const uint8_t *src0_ptr,
     v8i16 dst0, dst1, dst2, dst3, weight_vec;
     v4i32 dst0_r, dst0_l, offset_vec, rnd_vec;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -177,8 +176,7 @@ static void hevc_biwgt_copy_6w_msa(const uint8_t *src0_ptr,
                                    int32_t height,
                                    int32_t weight0,
                                    int32_t weight1,
-                                   int32_t offset0,
-                                   int32_t offset1,
+                                   int32_t offset,
                                    int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -192,7 +190,7 @@ static void hevc_biwgt_copy_6w_msa(const uint8_t *src0_ptr,
     v8i16 dst0, dst1, dst2, dst3;
     v4i32 offset_vec, weight_vec, rnd_vec;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -253,8 +251,7 @@ static void hevc_biwgt_copy_8w_msa(const uint8_t *src0_ptr,
                                    int32_t height,
                                    int32_t weight0,
                                    int32_t weight1,
-                                   int32_t offset0,
-                                   int32_t offset1,
+                                   int32_t offset,
                                    int32_t rnd_val)
 {
     uint64_t tp0, tp1, tp2, tp3;
@@ -266,7 +263,7 @@ static void hevc_biwgt_copy_8w_msa(const uint8_t *src0_ptr,
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
     v4i32 offset_vec, weight_vec, rnd_vec;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -341,8 +338,7 @@ static void hevc_biwgt_copy_12w_msa(const uint8_t *src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -354,7 +350,7 @@ static void hevc_biwgt_copy_12w_msa(const uint8_t *src0_ptr,
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5;
     v4i32 offset_vec, weight_vec, rnd_vec;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -400,8 +396,7 @@ static void hevc_biwgt_copy_16w_msa(const uint8_t *src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -413,7 +408,7 @@ static void hevc_biwgt_copy_16w_msa(const uint8_t *src0_ptr,
     v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
     v4i32 offset_vec, weight_vec, rnd_vec;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -455,8 +450,7 @@ static void hevc_biwgt_copy_24w_msa(const uint8_t *src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -467,7 +461,7 @@ static void hevc_biwgt_copy_24w_msa(const uint8_t *src0_ptr,
     v8i16 in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, dst11;
     v4i32 offset_vec, weight_vec, rnd_vec;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -519,8 +513,7 @@ static void hevc_biwgt_copy_32w_msa(const uint8_t *src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -532,7 +525,7 @@ static void hevc_biwgt_copy_32w_msa(const uint8_t *src0_ptr,
     v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
     v4i32 offset_vec, weight_vec, rnd_vec;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -580,8 +573,7 @@ static void hevc_biwgt_copy_48w_msa(const uint8_t *src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -592,7 +584,7 @@ static void hevc_biwgt_copy_48w_msa(const uint8_t *src0_ptr,
     v8i16 dst0, dst1, dst2, dst3, dst4, dst5, in0, in1, in2, in3, in4, in5;
     v4i32 offset_vec, weight_vec, rnd_vec;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -632,8 +624,7 @@ static void hevc_biwgt_copy_64w_msa(const uint8_t *src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -645,7 +636,7 @@ static void hevc_biwgt_copy_64w_msa(const uint8_t *src0_ptr,
     v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
     v4i32 offset_vec, weight_vec, rnd_vec;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -688,8 +679,7 @@ static void hevc_hz_biwgt_8t_4w_msa(const uint8_t *src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -712,7 +702,7 @@ static void hevc_hz_biwgt_8t_4w_msa(const uint8_t *src0_ptr,
     mask2 = mask0 + 4;
     mask3 = mask0 + 6;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -760,8 +750,7 @@ static void hevc_hz_biwgt_8t_8w_msa(const uint8_t *src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -777,7 +766,7 @@ static void hevc_hz_biwgt_8t_8w_msa(const uint8_t *src0_ptr,
     v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
 
     src0_ptr -= 3;
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -840,8 +829,7 @@ static void hevc_hz_biwgt_8t_12w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -858,7 +846,7 @@ static void hevc_hz_biwgt_8t_12w_msa(const uint8_t 
*src0_ptr,
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
     constant <<= 6;
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     offset += constant;
 
     offset_vec = __msa_fill_w(offset);
@@ -935,8 +923,7 @@ static void hevc_hz_biwgt_8t_16w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -952,7 +939,7 @@ static void hevc_hz_biwgt_8t_16w_msa(const uint8_t 
*src0_ptr,
     v16i8 mask0 = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
 
     src0_ptr -= 3;
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -1019,8 +1006,7 @@ static void hevc_hz_biwgt_8t_24w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -1038,7 +1024,7 @@ static void hevc_hz_biwgt_8t_24w_msa(const uint8_t 
*src0_ptr,
     v16i8 mask0 = LD_SB(&ff_hevc_mask_arr[0]);
 
     src0_ptr = src0_ptr - 3;
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -1141,8 +1127,7 @@ static void hevc_hz_biwgt_8t_32w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -1158,7 +1143,7 @@ static void hevc_hz_biwgt_8t_32w_msa(const uint8_t 
*src0_ptr,
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 3;
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -1227,8 +1212,7 @@ static void hevc_hz_biwgt_8t_48w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -1244,7 +1228,7 @@ static void hevc_hz_biwgt_8t_48w_msa(const uint8_t 
*src0_ptr,
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 3;
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -1331,8 +1315,7 @@ static void hevc_hz_biwgt_8t_64w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     const uint8_t *src0_ptr_tmp;
@@ -1351,7 +1334,7 @@ static void hevc_hz_biwgt_8t_64w_msa(const uint8_t 
*src0_ptr,
     v4i32 weight_vec, offset_vec, rnd_vec;
 
     src0_ptr -= 3;
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -1430,8 +1413,7 @@ static void hevc_vt_biwgt_8t_4w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -1451,7 +1433,7 @@ static void hevc_vt_biwgt_8t_4w_msa(const uint8_t 
*src0_ptr,
     v4i32 weight_vec, weight1_vec, offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (3 * src_stride);
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -1569,8 +1551,7 @@ static void hevc_vt_biwgt_8t_8w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -1586,7 +1567,7 @@ static void hevc_vt_biwgt_8t_8w_msa(const uint8_t 
*src0_ptr,
     v4i32 weight_vec, weight1_vec, offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (3 * src_stride);
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -1657,8 +1638,7 @@ static void hevc_vt_biwgt_8t_12w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -1677,7 +1657,7 @@ static void hevc_vt_biwgt_8t_12w_msa(const uint8_t 
*src0_ptr,
     v4i32 weight_vec, weight1_vec, offset_vec, rnd_vec, const_vec;
 
     src0_ptr -= (3 * src_stride);
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -1767,8 +1747,7 @@ static void hevc_vt_biwgt_8t_16multx2mult_msa(const 
uint8_t *src0_ptr,
                                               int32_t height,
                                               int32_t weight0,
                                               int32_t weight1,
-                                              int32_t offset0,
-                                              int32_t offset1,
+                                              int32_t offset,
                                               int32_t rnd_val,
                                               int32_t width)
 {
@@ -1791,7 +1770,7 @@ static void hevc_vt_biwgt_8t_16multx2mult_msa(const 
uint8_t *src0_ptr,
 
     src0_ptr -= (3 * src_stride);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -1883,14 +1862,13 @@ static void hevc_vt_biwgt_8t_16w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     hevc_vt_biwgt_8t_16multx2mult_msa(src0_ptr, src_stride,
                                       src1_ptr, src2_stride,
                                       dst, dst_stride, filter, height,
-                                      weight0, weight1, offset0, offset1,
+                                      weight0, weight1, offset,
                                       rnd_val, 16);
 }
 
@@ -1904,19 +1882,18 @@ static void hevc_vt_biwgt_8t_24w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     hevc_vt_biwgt_8t_16multx2mult_msa(src0_ptr, src_stride,
                                       src1_ptr, src2_stride,
                                       dst, dst_stride, filter, height,
-                                      weight0, weight1, offset0, offset1,
+                                      weight0, weight1, offset,
                                       rnd_val, 16);
     hevc_vt_biwgt_8t_8w_msa(src0_ptr + 16, src_stride,
                             src1_ptr + 16, src2_stride,
                             dst + 16, dst_stride, filter, height,
-                            weight0, weight1, offset0, offset1, rnd_val);
+                            weight0, weight1, offset, rnd_val);
 }
 
 static void hevc_vt_biwgt_8t_32w_msa(const uint8_t *src0_ptr,
@@ -1929,14 +1906,13 @@ static void hevc_vt_biwgt_8t_32w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     hevc_vt_biwgt_8t_16multx2mult_msa(src0_ptr, src_stride,
                                       src1_ptr, src2_stride,
                                       dst, dst_stride, filter, height,
-                                      weight0, weight1, offset0, offset1,
+                                      weight0, weight1, offset,
                                       rnd_val, 32);
 }
 
@@ -1950,14 +1926,13 @@ static void hevc_vt_biwgt_8t_48w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     hevc_vt_biwgt_8t_16multx2mult_msa(src0_ptr, src_stride,
                                       src1_ptr, src2_stride,
                                       dst, dst_stride, filter, height,
-                                      weight0, weight1, offset0, offset1,
+                                      weight0, weight1, offset,
                                       rnd_val, 48);
 }
 
@@ -1971,14 +1946,13 @@ static void hevc_vt_biwgt_8t_64w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     hevc_vt_biwgt_8t_16multx2mult_msa(src0_ptr, src_stride,
                                       src1_ptr, src2_stride,
                                       dst, dst_stride, filter, height,
-                                      weight0, weight1, offset0, offset1,
+                                      weight0, weight1, offset,
                                       rnd_val, 64);
 }
 
@@ -1993,8 +1967,7 @@ static void hevc_hv_biwgt_8t_4w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -2030,7 +2003,7 @@ static void hevc_hv_biwgt_8t_4w_msa(const uint8_t 
*src0_ptr,
     mask2 = mask0 + 4;
     mask3 = mask0 + 6;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -2138,8 +2111,7 @@ static void hevc_hv_biwgt_8t_8multx2mult_msa(const 
uint8_t *src0_ptr,
                                              int32_t height,
                                              int32_t weight0,
                                              int32_t weight1,
-                                             int32_t offset0,
-                                             int32_t offset1,
+                                             int32_t offset,
                                              int32_t rnd_val,
                                              int32_t width8mult)
 {
@@ -2169,7 +2141,7 @@ static void hevc_hv_biwgt_8t_8multx2mult_msa(const 
uint8_t *src0_ptr,
 
     src0_ptr -= ((3 * src_stride) + 3);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -2321,15 +2293,14 @@ static void hevc_hv_biwgt_8t_8w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride,
                                      src1_ptr, src2_stride,
                                      dst, dst_stride, filter_x, filter_y,
-                                     height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 1);
+                                     height, weight0, weight1, offset,
+                                     rnd_val, 1);
 }
 
 static void hevc_hv_biwgt_8t_12w_msa(const uint8_t *src0_ptr,
@@ -2343,8 +2314,7 @@ static void hevc_hv_biwgt_8t_12w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -2370,7 +2340,7 @@ static void hevc_hv_biwgt_8t_12w_msa(const uint8_t 
*src0_ptr,
 
     src0_ptr -= ((3 * src_stride) + 3);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -2594,15 +2564,14 @@ static void hevc_hv_biwgt_8t_16w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride,
                                      src1_ptr, src2_stride,
                                      dst, dst_stride, filter_x, filter_y,
-                                     height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 2);
+                                     height, weight0, weight1, offset,
+                                     rnd_val, 2);
 }
 
 static void hevc_hv_biwgt_8t_24w_msa(const uint8_t *src0_ptr,
@@ -2616,15 +2585,14 @@ static void hevc_hv_biwgt_8t_24w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride,
                                      src1_ptr, src2_stride,
                                      dst, dst_stride, filter_x, filter_y,
-                                     height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 3);
+                                     height, weight0, weight1, offset,
+                                     rnd_val, 3);
 }
 
 static void hevc_hv_biwgt_8t_32w_msa(const uint8_t *src0_ptr,
@@ -2638,15 +2606,14 @@ static void hevc_hv_biwgt_8t_32w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride,
                                      src1_ptr, src2_stride,
                                      dst, dst_stride, filter_x, filter_y,
-                                     height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 4);
+                                     height, weight0, weight1, offset,
+                                     rnd_val, 4);
 }
 
 static void hevc_hv_biwgt_8t_48w_msa(const uint8_t *src0_ptr,
@@ -2660,15 +2627,14 @@ static void hevc_hv_biwgt_8t_48w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride,
                                      src1_ptr, src2_stride,
                                      dst, dst_stride, filter_x, filter_y,
-                                     height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 6);
+                                     height, weight0, weight1, offset,
+                                     rnd_val, 6);
 }
 
 static void hevc_hv_biwgt_8t_64w_msa(const uint8_t *src0_ptr,
@@ -2682,15 +2648,14 @@ static void hevc_hv_biwgt_8t_64w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     hevc_hv_biwgt_8t_8multx2mult_msa(src0_ptr, src_stride,
                                      src1_ptr, src2_stride,
                                      dst, dst_stride, filter_x, filter_y,
-                                     height, weight0, weight1, offset0,
-                                     offset1, rnd_val, 8);
+                                     height, weight0, weight1, offset,
+                                     rnd_val, 8);
 }
 
 static void hevc_hz_biwgt_4t_4x2_msa(const uint8_t *src0_ptr,
@@ -2702,8 +2667,7 @@ static void hevc_hz_biwgt_4t_4x2_msa(const uint8_t 
*src0_ptr,
                                      const int8_t *filter,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     int32_t offset, weight, constant;
@@ -2724,7 +2688,7 @@ static void hevc_hz_biwgt_4t_4x2_msa(const uint8_t 
*src0_ptr,
 
     mask1 = mask0 + 2;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -2762,8 +2726,7 @@ static void hevc_hz_biwgt_4t_4x4_msa(const uint8_t 
*src0_ptr,
                                      const int8_t *filter,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     int32_t offset, weight, constant;
@@ -2785,7 +2748,7 @@ static void hevc_hz_biwgt_4t_4x4_msa(const uint8_t 
*src0_ptr,
 
     mask1 = mask0 + 2;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -2823,8 +2786,7 @@ static void hevc_hz_biwgt_4t_4x8multiple_msa(const 
uint8_t *src0_ptr,
                                              int32_t height,
                                              int32_t weight0,
                                              int32_t weight1,
-                                             int32_t offset0,
-                                             int32_t offset1,
+                                             int32_t offset,
                                              int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -2844,7 +2806,7 @@ static void hevc_hz_biwgt_4t_4x8multiple_msa(const 
uint8_t *src0_ptr,
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -2898,23 +2860,22 @@ static void hevc_hz_biwgt_4t_4w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     if (2 == height) {
         hevc_hz_biwgt_4t_4x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter,
-                                 weight0, weight1, offset0, offset1, rnd_val);
+                                 weight0, weight1, offset, rnd_val);
     } else if (4 == height) {
         hevc_hz_biwgt_4t_4x4_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter,
-                                 weight0, weight1, offset0, offset1, rnd_val);
+                                 weight0, weight1, offset, rnd_val);
     } else if (0 == (height % 8)) {
         hevc_hz_biwgt_4t_4x8multiple_msa(src0_ptr, src_stride,
                                          src1_ptr, src2_stride,
                                          dst, dst_stride, filter, height,
-                                         weight0, weight1, offset0, offset1,
+                                         weight0, weight1, offset,
                                          rnd_val);
     }
 }
@@ -2929,8 +2890,7 @@ static void hevc_hz_biwgt_4t_6w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -2950,7 +2910,7 @@ static void hevc_hz_biwgt_4t_6w_msa(const uint8_t 
*src0_ptr,
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3002,8 +2962,7 @@ static void hevc_hz_biwgt_4t_8x2_msa(const uint8_t 
*src0_ptr,
                                      const int8_t *filter,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     int32_t offset, weight, constant;
@@ -3021,7 +2980,7 @@ static void hevc_hz_biwgt_4t_8x2_msa(const uint8_t 
*src0_ptr,
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3058,8 +3017,7 @@ static void hevc_hz_biwgt_4t_8x6_msa(const uint8_t 
*src0_ptr,
                                      const int8_t *filter,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     int32_t weight, offset, constant;
@@ -3078,7 +3036,7 @@ static void hevc_hz_biwgt_4t_8x6_msa(const uint8_t 
*src0_ptr,
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3133,8 +3091,7 @@ static void hevc_hz_biwgt_4t_8x4multiple_msa(const 
uint8_t *src0_ptr,
                                              int32_t height,
                                              int32_t weight0,
                                              int32_t weight1,
-                                             int32_t offset0,
-                                             int32_t offset1,
+                                             int32_t offset,
                                              int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -3154,7 +3111,7 @@ static void hevc_hz_biwgt_4t_8x4multiple_msa(const 
uint8_t *src0_ptr,
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3203,23 +3160,22 @@ static void hevc_hz_biwgt_4t_8w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     if (2 == height) {
         hevc_hz_biwgt_4t_8x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter,
-                                 weight0, weight1, offset0, offset1, rnd_val);
+                                 weight0, weight1, offset, rnd_val);
     } else if (6 == height) {
         hevc_hz_biwgt_4t_8x6_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter,
-                                 weight0, weight1, offset0, offset1, rnd_val);
+                                 weight0, weight1, offset, rnd_val);
     } else if (0 == (height % 4)) {
         hevc_hz_biwgt_4t_8x4multiple_msa(src0_ptr, src_stride,
                                          src1_ptr, src2_stride,
                                          dst, dst_stride, filter, height,
-                                         weight0, weight1, offset0, offset1,
+                                         weight0, weight1, offset,
                                          rnd_val);
     }
 }
@@ -3234,8 +3190,7 @@ static void hevc_hz_biwgt_4t_12w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -3258,7 +3213,7 @@ static void hevc_hz_biwgt_4t_12w_msa(const uint8_t 
*src0_ptr,
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3320,8 +3275,7 @@ static void hevc_hz_biwgt_4t_16w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -3341,7 +3295,7 @@ static void hevc_hz_biwgt_4t_16w_msa(const uint8_t 
*src0_ptr,
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3409,8 +3363,7 @@ static void hevc_hz_biwgt_4t_24w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -3430,7 +3383,7 @@ static void hevc_hz_biwgt_4t_24w_msa(const uint8_t 
*src0_ptr,
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3496,8 +3449,7 @@ static void hevc_hz_biwgt_4t_32w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -3517,7 +3469,7 @@ static void hevc_hz_biwgt_4t_32w_msa(const uint8_t 
*src0_ptr,
     filter_vec = LD_SH(filter);
     SPLATI_H2_SH(filter_vec, 0, 1, filt0, filt1);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3568,8 +3520,7 @@ static void hevc_vt_biwgt_4t_4x2_msa(const uint8_t 
*src0_ptr,
                                      const int8_t *filter,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     int32_t weight, offset, constant;
@@ -3583,7 +3534,7 @@ static void hevc_vt_biwgt_4t_4x2_msa(const uint8_t 
*src0_ptr,
 
     src0_ptr -= src_stride;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3633,8 +3584,7 @@ static void hevc_vt_biwgt_4t_4x4_msa(const uint8_t 
*src0_ptr,
                                      const int8_t *filter,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     int32_t weight, offset, constant;
@@ -3649,7 +3599,7 @@ static void hevc_vt_biwgt_4t_4x4_msa(const uint8_t 
*src0_ptr,
 
     src0_ptr -= src_stride;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3701,8 +3651,7 @@ static void hevc_vt_biwgt_4t_4x8multiple_msa(const 
uint8_t *src0_ptr,
                                              int32_t height,
                                              int32_t weight0,
                                              int32_t weight1,
-                                             int32_t offset0,
-                                             int32_t offset1,
+                                             int32_t offset,
                                              int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -3719,7 +3668,7 @@ static void hevc_vt_biwgt_4t_4x8multiple_msa(const 
uint8_t *src0_ptr,
 
     src0_ptr -= src_stride;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3787,23 +3736,22 @@ static void hevc_vt_biwgt_4t_4w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     if (2 == height) {
         hevc_vt_biwgt_4t_4x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter,
-                                 weight0, weight1, offset0, offset1, rnd_val);
+                                 weight0, weight1, offset, rnd_val);
     } else if (4 == height) {
         hevc_vt_biwgt_4t_4x4_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter,
-                                 weight0, weight1, offset0, offset1, rnd_val);
+                                 weight0, weight1, offset, rnd_val);
     } else if (0 == (height % 8)) {
         hevc_vt_biwgt_4t_4x8multiple_msa(src0_ptr, src_stride,
                                          src1_ptr, src2_stride,
                                          dst, dst_stride, filter, height,
-                                         weight0, weight1, offset0, offset1,
+                                         weight0, weight1, offset,
                                          rnd_val);
     }
 }
@@ -3818,8 +3766,7 @@ static void hevc_vt_biwgt_4t_6w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -3835,7 +3782,7 @@ static void hevc_vt_biwgt_4t_6w_msa(const uint8_t 
*src0_ptr,
 
     src0_ptr -= src_stride;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3924,8 +3871,7 @@ static void hevc_vt_biwgt_4t_8x2_msa(const uint8_t 
*src0_ptr,
                                      const int8_t *filter,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     int32_t offset, weight, constant;
@@ -3938,7 +3884,7 @@ static void hevc_vt_biwgt_4t_8x2_msa(const uint8_t 
*src0_ptr,
 
     src0_ptr -= src_stride;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -3981,8 +3927,7 @@ static void hevc_vt_biwgt_4t_8x6_msa(const uint8_t 
*src0_ptr,
                                      const int8_t *filter,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     int32_t offset, weight, constant;
@@ -3997,7 +3942,7 @@ static void hevc_vt_biwgt_4t_8x6_msa(const uint8_t 
*src0_ptr,
 
     src0_ptr -= src_stride;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -4053,8 +3998,7 @@ static void hevc_vt_biwgt_4t_8x4multiple_msa(const 
uint8_t *src0_ptr,
                                              int32_t height,
                                              int32_t weight0,
                                              int32_t weight1,
-                                             int32_t offset0,
-                                             int32_t offset1,
+                                             int32_t offset,
                                              int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -4069,7 +4013,7 @@ static void hevc_vt_biwgt_4t_8x4multiple_msa(const 
uint8_t *src0_ptr,
 
     src0_ptr -= src_stride;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -4127,23 +4071,22 @@ static void hevc_vt_biwgt_4t_8w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     if (2 == height) {
         hevc_vt_biwgt_4t_8x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter,
-                                 weight0, weight1, offset0, offset1, rnd_val);
+                                 weight0, weight1, offset, rnd_val);
     } else if (6 == height) {
         hevc_vt_biwgt_4t_8x6_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter,
-                                 weight0, weight1, offset0, offset1, rnd_val);
+                                 weight0, weight1, offset, rnd_val);
     } else {
         hevc_vt_biwgt_4t_8x4multiple_msa(src0_ptr, src_stride,
                                          src1_ptr, src2_stride,
                                          dst, dst_stride, filter, height,
-                                         weight0, weight1, offset0, offset1,
+                                         weight0, weight1, offset,
                                          rnd_val);
     }
 }
@@ -4158,8 +4101,7 @@ static void hevc_vt_biwgt_4t_12w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -4176,7 +4118,7 @@ static void hevc_vt_biwgt_4t_12w_msa(const uint8_t 
*src0_ptr,
 
     src0_ptr -= (1 * src_stride);
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -4250,8 +4192,7 @@ static void hevc_vt_biwgt_4t_16w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -4267,7 +4208,7 @@ static void hevc_vt_biwgt_4t_16w_msa(const uint8_t 
*src0_ptr,
 
     src0_ptr -= src_stride;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -4344,8 +4285,7 @@ static void hevc_vt_biwgt_4t_24w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -4363,7 +4303,7 @@ static void hevc_vt_biwgt_4t_24w_msa(const uint8_t 
*src0_ptr,
 
     src0_ptr -= src_stride;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -4481,8 +4421,7 @@ static void hevc_vt_biwgt_4t_32w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -4501,7 +4440,7 @@ static void hevc_vt_biwgt_4t_32w_msa(const uint8_t 
*src0_ptr,
 
     src0_ptr -= src_stride;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
     constant = 128 * weight1;
@@ -4600,8 +4539,7 @@ static void hevc_hv_biwgt_4t_4x2_msa(const uint8_t 
*src0_ptr,
                                      const int8_t *filter_y,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint64_t tp0, tp1;
@@ -4630,7 +4568,7 @@ static void hevc_hv_biwgt_4t_4x2_msa(const uint8_t 
*src0_ptr,
 
     mask1 = mask0 + 2;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -4684,8 +4622,7 @@ static void hevc_hv_biwgt_4t_4x4_msa(const uint8_t 
*src0_ptr,
                                      const int8_t *filter_y,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint64_t tp0, tp1;
@@ -4717,7 +4654,7 @@ static void hevc_hv_biwgt_4t_4x4_msa(const uint8_t 
*src0_ptr,
 
     mask1 = mask0 + 2;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -4782,8 +4719,7 @@ static void hevc_hv_biwgt_4t_4multx8mult_msa(const 
uint8_t *src0_ptr,
                                              int32_t height,
                                              int32_t weight0,
                                              int32_t weight1,
-                                             int32_t offset0,
-                                             int32_t offset1,
+                                             int32_t offset,
                                              int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -4818,7 +4754,7 @@ static void hevc_hv_biwgt_4t_4multx8mult_msa(const 
uint8_t *src0_ptr,
 
     mask1 = mask0 + 2;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -4927,24 +4863,23 @@ static void hevc_hv_biwgt_4t_4w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     if (2 == height) {
         hevc_hv_biwgt_4t_4x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter_x, filter_y,
-                                 weight0, weight1, offset0, offset1, rnd_val);
+                                 weight0, weight1, offset, rnd_val);
     } else if (4 == height) {
         hevc_hv_biwgt_4t_4x4_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter_x, filter_y,
-                                 weight0, weight1, offset0, offset1, rnd_val);
+                                 weight0, weight1, offset, rnd_val);
     } else if (0 == (height % 8)) {
         hevc_hv_biwgt_4t_4multx8mult_msa(src0_ptr, src_stride,
                                          src1_ptr, src2_stride,
                                          dst, dst_stride, filter_x, filter_y,
                                          height, weight0, weight1,
-                                         offset0, offset1, rnd_val);
+                                         offset, rnd_val);
     }
 }
 
@@ -4959,8 +4894,7 @@ static void hevc_hv_biwgt_4t_6w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     uint32_t tpw0, tpw1, tpw2, tpw3;
@@ -4998,7 +4932,7 @@ static void hevc_hv_biwgt_4t_6w_msa(const uint8_t 
*src0_ptr,
 
     mask1 = mask0 + 2;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -5141,8 +5075,7 @@ static void hevc_hv_biwgt_4t_8x2_msa(const uint8_t 
*src0_ptr,
                                      const int8_t *filter_y,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     int32_t weight, offset;
@@ -5174,7 +5107,7 @@ static void hevc_hv_biwgt_4t_8x2_msa(const uint8_t 
*src0_ptr,
 
     mask1 = mask0 + 2;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -5237,8 +5170,7 @@ static void hevc_hv_biwgt_4t_8multx4_msa(const uint8_t 
*src0_ptr,
                                          const int8_t *filter_y,
                                          int32_t weight0,
                                          int32_t weight1,
-                                         int32_t offset0,
-                                         int32_t offset1,
+                                         int32_t offset,
                                          int32_t rnd_val,
                                          int32_t width8mult)
 {
@@ -5269,7 +5201,7 @@ static void hevc_hv_biwgt_4t_8multx4_msa(const uint8_t 
*src0_ptr,
     mask0 = LD_SB(ff_hevc_mask_arr);
     mask1 = mask0 + 2;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -5362,8 +5294,7 @@ static void hevc_hv_biwgt_4t_8x6_msa(const uint8_t 
*src0_ptr,
                                      const int8_t *filter_y,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t offset, weight;
@@ -5400,7 +5331,7 @@ static void hevc_hv_biwgt_4t_8x6_msa(const uint8_t 
*src0_ptr,
 
     mask1 = mask0 + 2;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -5514,8 +5445,7 @@ static void hevc_hv_biwgt_4t_8multx4mult_msa(const 
uint8_t *src0_ptr,
                                              int32_t height,
                                              int32_t weight0,
                                              int32_t weight1,
-                                             int32_t offset0,
-                                             int32_t offset1,
+                                             int32_t offset,
                                              int32_t rnd_val,
                                              int32_t width)
 {
@@ -5554,7 +5484,7 @@ static void hevc_hv_biwgt_4t_8multx4mult_msa(const 
uint8_t *src0_ptr,
 
     mask1 = mask0 + 2;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -5665,29 +5595,28 @@ static void hevc_hv_biwgt_4t_8w_msa(const uint8_t 
*src0_ptr,
                                     int32_t height,
                                     int32_t weight0,
                                     int32_t weight1,
-                                    int32_t offset0,
-                                    int32_t offset1,
+                                    int32_t offset,
                                     int32_t rnd_val)
 {
     if (2 == height) {
         hevc_hv_biwgt_4t_8x2_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter_x, filter_y,
-                                 weight0, weight1, offset0, offset1, rnd_val);
+                                 weight0, weight1, offset, rnd_val);
     } else if (4 == height) {
         hevc_hv_biwgt_4t_8multx4_msa(src0_ptr, src_stride, src1_ptr,
                                      src2_stride, dst, dst_stride, filter_x,
-                                     filter_y, weight0, weight1, offset0,
-                                     offset1, rnd_val, 1);
+                                     filter_y, weight0, weight1, offset,
+                                     rnd_val, 1);
     } else if (6 == height) {
         hevc_hv_biwgt_4t_8x6_msa(src0_ptr, src_stride, src1_ptr, src2_stride,
                                  dst, dst_stride, filter_x, filter_y,
-                                 weight0, weight1, offset0, offset1, rnd_val);
+                                 weight0, weight1, offset, rnd_val);
     } else if (0 == (height % 4)) {
         hevc_hv_biwgt_4t_8multx4mult_msa(src0_ptr, src_stride,
                                          src1_ptr, src2_stride,
                                          dst, dst_stride, filter_x, filter_y,
                                          height, weight0,
-                                         weight1, offset0, offset1, rnd_val, 
8);
+                                         weight1, offset, rnd_val, 8);
     }
 }
 
@@ -5702,8 +5631,7 @@ static void hevc_hv_biwgt_4t_12w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     uint32_t loop_cnt;
@@ -5741,7 +5669,7 @@ static void hevc_hv_biwgt_4t_12w_msa(const uint8_t 
*src0_ptr,
     mask0 = LD_SB(ff_hevc_mask_arr);
     mask1 = mask0 + 2;
 
-    offset = (offset0 + offset1) << rnd_val;
+    offset = offset << rnd_val;
     weight0 = weight0 & 0x0000FFFF;
     weight = weight0 | (weight1 << 16);
 
@@ -5944,20 +5872,19 @@ static void hevc_hv_biwgt_4t_16w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     if (4 == height) {
         hevc_hv_biwgt_4t_8multx4_msa(src0_ptr, src_stride, src1_ptr,
                                      src2_stride, dst, dst_stride, filter_x,
-                                     filter_y, weight0, weight1, offset0,
-                                     offset1, rnd_val, 2);
+                                     filter_y, weight0, weight1, offset,
+                                     rnd_val, 2);
     } else {
         hevc_hv_biwgt_4t_8multx4mult_msa(src0_ptr, src_stride, src1_ptr,
                                          src2_stride, dst, dst_stride,
                                          filter_x, filter_y, height, weight0,
-                                         weight1, offset0, offset1, rnd_val, 
16);
+                                         weight1, offset, rnd_val, 16);
     }
 }
 
@@ -5972,15 +5899,14 @@ static void hevc_hv_biwgt_4t_24w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     hevc_hv_biwgt_4t_8multx4mult_msa(src0_ptr, src_stride,
                                      src1_ptr, src2_stride,
                                      dst, dst_stride,
                                      filter_x, filter_y, height, weight0,
-                                     weight1, offset0, offset1, rnd_val, 24);
+                                     weight1, offset, rnd_val, 24);
 }
 
 static void hevc_hv_biwgt_4t_32w_msa(const uint8_t *src0_ptr,
@@ -5994,15 +5920,14 @@ static void hevc_hv_biwgt_4t_32w_msa(const uint8_t 
*src0_ptr,
                                      int32_t height,
                                      int32_t weight0,
                                      int32_t weight1,
-                                     int32_t offset0,
-                                     int32_t offset1,
+                                     int32_t offset,
                                      int32_t rnd_val)
 {
     hevc_hv_biwgt_4t_8multx4mult_msa(src0_ptr, src_stride,
                                      src1_ptr, src2_stride,
                                      dst, dst_stride,
                                      filter_x, filter_y, height, weight0,
-                                     weight1, offset0, offset1, rnd_val, 32);
+                                     weight1, offset, rnd_val, 32);
 }
 
 #define BI_W_MC_COPY(WIDTH)                                                  \
@@ -6015,8 +5940,7 @@ void 
ff_hevc_put_hevc_bi_w_pel_pixels##WIDTH##_8_msa(uint8_t *dst,           \
                                                      int denom,              \
                                                      int weight0,            \
                                                      int weight1,            \
-                                                     int offset0,            \
-                                                     int offset1,            \
+                                                     int offset,             \
                                                      intptr_t mx,            \
                                                      intptr_t my,            \
                                                      int width)              \
@@ -6026,8 +5950,7 @@ void 
ff_hevc_put_hevc_bi_w_pel_pixels##WIDTH##_8_msa(uint8_t *dst,           \
                                                                              \
     hevc_biwgt_copy_##WIDTH##w_msa(src, src_stride, src_16bit, MAX_PB_SIZE,  \
                                    dst, dst_stride, height,                  \
-                                   weight0, weight1, offset0,                \
-                                   offset1, log2Wd);                         \
+                                   weight0, weight1, offset, log2Wd);        \
 }
 
 BI_W_MC_COPY(4);
@@ -6054,8 +5977,7 @@ void 
ff_hevc_put_hevc_bi_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,         \
                                                         int denom,            \
                                                         int weight0,          \
                                                         int weight1,          \
-                                                        int offset0,          \
-                                                        int offset1,          \
+                                                        int offset,           \
                                                         intptr_t mx,          \
                                                         intptr_t my,          \
                                                         int width)            \
@@ -6066,8 +5988,7 @@ void 
ff_hevc_put_hevc_bi_w_##PEL##_##DIR##WIDTH##_8_msa(uint8_t *dst,         \
     hevc_##DIR1##_biwgt_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit,   \
                                                 MAX_PB_SIZE, dst, dst_stride, \
                                                 filter, height, weight0,      \
-                                                weight1, offset0, offset1,    \
-                                                log2Wd);                      \
+                                                weight1, offset, log2Wd);     \
 }
 
 BI_W_MC(qpel, h, 4, 8, hz, mx);
@@ -6116,8 +6037,7 @@ void 
ff_hevc_put_hevc_bi_w_##PEL##_hv##WIDTH##_8_msa(uint8_t *dst,          \
                                                      int denom,             \
                                                      int weight0,           \
                                                      int weight1,           \
-                                                     int offset0,           \
-                                                     int offset1,           \
+                                                     int offset,            \
                                                      intptr_t mx,           \
                                                      intptr_t my,           \
                                                      int width)             \
@@ -6129,8 +6049,8 @@ void 
ff_hevc_put_hevc_bi_w_##PEL##_hv##WIDTH##_8_msa(uint8_t *dst,          \
     hevc_hv_biwgt_##TAP##t_##WIDTH##w_msa(src, src_stride, src_16bit,       \
                                           MAX_PB_SIZE, dst, dst_stride,     \
                                           filter_x, filter_y, height,       \
-                                          weight0, weight1, offset0,        \
-                                          offset1, log2Wd);                 \
+                                          weight0, weight1, offset,         \
+                                          log2Wd);                          \
 }
 
 BI_W_MC_HV(qpel, 4, 8);
diff --git a/libavcodec/x86/hevc/dsp.h b/libavcodec/x86/hevc/dsp.h
index 0062699ce0..69d3987cdb 100644
--- a/libavcodec/x86/hevc/dsp.h
+++ b/libavcodec/x86/hevc/dsp.h
@@ -41,7 +41,7 @@ bi_pel_func ff_hevc_put_bi_ ## name ## W ## _ ## D ## _##opt
 
 #define WEIGHTING_PROTOTYPE(width, bitd, opt) \
 void ff_hevc_put_uni_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t 
dststride, const int16_t *_src, int height, int denom,  int _wx, int _ox);      
\
-void ff_hevc_put_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t 
dststride, const int16_t *_src, const int16_t *_src2, int height, int denom,  
int _wx0,  int _wx1, int _ox0, int _ox1)
+void ff_hevc_put_bi_w##width##_##bitd##_##opt(uint8_t *dst, ptrdiff_t 
dststride, const int16_t *_src, const int16_t *_src2, int height, int denom, 
int wx0, int wx1, int ox)
 
 #define WEIGHTING_PROTOTYPES(bitd, opt) \
         WEIGHTING_PROTOTYPE(4, bitd, opt); \
diff --git a/libavcodec/x86/hevc/dsp_init.c b/libavcodec/x86/hevc/dsp_init.c
index bd967eac67..ca3962b3f2 100644
--- a/libavcodec/x86/hevc/dsp_init.c
+++ b/libavcodec/x86/hevc/dsp_init.c
@@ -575,7 +575,7 @@ mc_rep_uni_w(12, 8, 64, sse4)
 #define mc_rep_bi_w(bitd, step, W, opt) \
 void ff_hevc_put_bi_w##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t dststride, 
const int16_t *_src, \
                                           const int16_t *_src2, int height,    
                    \
-                                          int denom,  int _wx0,  int _wx1, int 
_ox0, int _ox1)     \
+                                          int denom, int wx0, int wx1, int ox) 
                    \
 {                                                                              
                                         \
     int i;                                                                     
                                         \
     uint8_t *dst;                                                              
                                         \
@@ -584,7 +584,7 @@ void ff_hevc_put_bi_w##W##_##bitd##_##opt(uint8_t *_dst, 
ptrdiff_t dststride, co
         const int16_t *src2 = _src2 + i;                                       
                                         \
         dst  = _dst  + (i * ((bitd + 7) / 8));                                 
                                         \
         ff_hevc_put_bi_w##step##_##bitd##_##opt(dst, dststride, src, src2,     
                    \
-                                                height, denom, _wx0, _wx1, 
_ox0, _ox1);            \
+                                                height, denom, wx0, wx1, ox);  
                    \
     }                                                                          
                                         \
 }
 
@@ -672,13 +672,13 @@ static void 
hevc_put_bi_w_##name##W##_##bitd##_##opt(uint8_t *_dst, ptrdiff_t _d
                                                      const uint8_t *_src, 
ptrdiff_t _srcstride,      \
                                                      const int16_t *_src2,     
                      \
                                                      int height, int denom,    
                      \
-                                                     int _wx0, int _wx1, int 
_ox0, int _ox1,         \
+                                                     int wx0, int wx1, int ox, 
                      \
                                                      intptr_t mx, intptr_t my, 
int width)            \
 {                                                                              
                      \
     LOCAL_ALIGNED_16(int16_t, temp, [71 * MAX_PB_SIZE]);                       
                      \
     hevc_put_##name##W##_##bitd##_##opt(temp, _src, _srcstride, height, mx, 
my, width);              \
     ff_hevc_put_bi_w##W##_##bitd##_##opt(_dst, _dststride, temp, _src2,        
                      \
-                                         height, denom, _wx0, _wx1, _ox0, 
_ox1);                     \
+                                         height, denom, wx0, wx1, ox);         
                      \
 }
 
 #define mc_bi_w_funcs(name, bitd, opt)      \
diff --git a/libavcodec/x86/hevc/mc.asm b/libavcodec/x86/hevc/mc.asm
index 66ed406c26..e76807c6e2 100644
--- a/libavcodec/x86/hevc/mc.asm
+++ b/libavcodec/x86/hevc/mc.asm
@@ -1090,7 +1090,7 @@ cglobal hevc_put_uni_w%1_%2, 6, 6, 7, dst, dststride, 
src, height, denom, wx, ox
     jnz               .loop                      ; height loop
     RET
 
-cglobal hevc_put_bi_w%1_%2, 4, 6, 6, dst, dststride, src, src2, height, denom, 
wx0, wx1, ox0, ox1
+cglobal hevc_put_bi_w%1_%2, 4, 6, 6, dst, dststride, src, src2, height, denom, 
wx0, wx1, ox
     movifnidn        r5d, denomm
     movd              m3, wx0m         ; WX0
     lea              r5d, [r5d+14-%2]  ; shift = 14 - bitd + denom
@@ -1100,8 +1100,7 @@ cglobal hevc_put_bi_w%1_%2, 4, 6, 6, dst, dststride, src, 
src2, height, denom, w
     inc              r5d
     movd              m5, r5d          ; shift+1
     pshufd            m2, m2, 0
-    mov              r5d, ox0m
-    add              r5d, ox1m
+    mov              r5d, oxm
 %if %2 != 8
     shl              r5d, %2-8         ; ox << (bitd - 8)
 %endif
diff --git a/tests/checkasm/hevc_pel.c b/tests/checkasm/hevc_pel.c
index e89facb1dc..000d1e7a57 100644
--- a/tests/checkasm/hevc_pel.c
+++ b/tests/checkasm/hevc_pel.c
@@ -298,7 +298,7 @@ static void checkasm_check_hevc_qpel_bi_w(void)
     declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, 
ptrdiff_t srcstride,
                  const int16_t *src2,
                  int height, int denom, int wx0, int wx1,
-                 int ox0, int ox1, intptr_t mx, intptr_t my, int width);
+                 int ox, intptr_t mx, intptr_t my, int width);
 
     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
         ff_hevc_dsp_init(&h, bit_depth);
@@ -324,16 +324,16 @@ static void checkasm_check_hevc_qpel_bi_w(void)
                                     CLEAR_PIXEL_RECT(dst1);
                                     call_ref(dst0, dst0_stride,
                                              src0, sizes[idx] * SIZEOF_PIXEL,
-                                             ref0, sizes[idx], *denom, *wx, 
*wx, *ox, *ox, i, j, sizes[idx]);
+                                             ref0, sizes[idx], *denom, *wx, 
*wx, *ox + *ox, i, j, sizes[idx]);
                                     call_new(dst1, dst1_stride,
                                              src1, sizes[idx] * SIZEOF_PIXEL,
-                                             ref1, sizes[idx], *denom, *wx, 
*wx, *ox, *ox, i, j, sizes[idx]);
+                                             ref1, sizes[idx], *denom, *wx, 
*wx, *ox + *ox, i, j, sizes[idx]);
                                     checkasm_check_pixel_padded(dst0, 
dst0_stride,
                                                                 dst1, 
dst1_stride,
                                                                 sizes[idx], 
sizes[idx], "dst");
                                     bench_new(dst1, dst1_stride,
                                               src1, sizes[idx] * SIZEOF_PIXEL,
-                                              ref1, sizes[idx], *denom, *wx, 
*wx, *ox, *ox, i, j, sizes[idx]);
+                                              ref1, sizes[idx], *denom, *wx, 
*wx, *ox + *ox, i, j, sizes[idx]);
                                 }
                             }
                         }
@@ -571,7 +571,7 @@ static void checkasm_check_hevc_epel_bi_w(void)
     declare_func(void, uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, 
ptrdiff_t srcstride,
                  const int16_t *src2,
                  int height, int denom, int wx0, int wx1,
-                 int ox0, int ox1, intptr_t mx, intptr_t my, int width);
+                 int ox, intptr_t mx, intptr_t my, int width);
 
     for (bit_depth = 8; bit_depth <= 12; bit_depth++) {
         ff_hevc_dsp_init(&h, bit_depth);
@@ -597,16 +597,16 @@ static void checkasm_check_hevc_epel_bi_w(void)
                                     CLEAR_PIXEL_RECT(dst1);
                                     call_ref(dst0, dst0_stride,
                                              src0, sizes[idx] * SIZEOF_PIXEL,
-                                             ref0, sizes[idx], *denom, *wx, 
*wx, *ox, *ox, i, j, sizes[idx]);
+                                             ref0, sizes[idx], *denom, *wx, 
*wx, *ox + *ox, i, j, sizes[idx]);
                                     call_new(dst1, dst1_stride,
                                              src1, sizes[idx] * SIZEOF_PIXEL,
-                                             ref1, sizes[idx], *denom, *wx, 
*wx, *ox, *ox, i, j, sizes[idx]);
+                                             ref1, sizes[idx], *denom, *wx, 
*wx, *ox + *ox, i, j, sizes[idx]);
                                     checkasm_check_pixel_padded(dst0, 
dst0_stride,
                                                                 dst1, 
dst1_stride,
                                                                 sizes[idx], 
sizes[idx], "dst");
                                     bench_new(dst1, dst1_stride,
                                               src1, sizes[idx] * SIZEOF_PIXEL,
-                                              ref1, sizes[idx], *denom, *wx, 
*wx, *ox, *ox, i, j, sizes[idx]);
+                                              ref1, sizes[idx], *denom, *wx, 
*wx, *ox + *ox, i, j, sizes[idx]);
                                 }
                             }
                         }
-- 
2.52.0


>From 2d27de9aa1981aaaf643f25d5eadef11fd8c4f54 Mon Sep 17 00:00:00 2001
From: Andreas Rheinhardt <[email protected]>
Date: Wed, 24 Jun 2026 01:07:02 +0200
Subject: [PATCH 5/5] tests/checkasm/hevc_pel: Don't test impossible values

denom is in 0..7 (see pred_weight_table() in hevcdec.c).

Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 tests/checkasm/hevc_pel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/checkasm/hevc_pel.c b/tests/checkasm/hevc_pel.c
index 000d1e7a57..12d28eaccb 100644
--- a/tests/checkasm/hevc_pel.c
+++ b/tests/checkasm/hevc_pel.c
@@ -30,7 +30,7 @@ static const uint32_t pixel_mask[] = { 0xffffffff, 
0x01ff01ff, 0x03ff03ff, 0x07f
 static const uint32_t pixel_mask16[] = { 0x00ff00ff, 0x01ff01ff, 0x03ff03ff, 
0x07ff07ff, 0x0fff0fff };
 static const int sizes[] = { -1, 4, 6, 8, 12, 16, 24, 32, 48, 64 };
 static const int weights[] = { 0, 128, 255, -1 };
-static const int denoms[] = {0, 7, 12, -1 };
+static const int denoms[] = {0, 7, -1 };
 static const int offsets[] = {0, 255, -1 };
 
 #define SIZEOF_PIXEL ((bit_depth + 7) / 8)
-- 
2.52.0

_______________________________________________
ffmpeg-devel mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-devel] [PR] avcodec/{hevc/dsp,x86/hevc/mc}: Improve biweight prediction (PR #23571)

Reply via email to