The branch, master has been updated
via 7018ce14df7b0f1094ef8e21f7154c5c0c513588 (commit)
via 300cd2c2f225e483fc3c3bb632eddd3a4c7b4af9 (commit)
via dcc101167cccf338e009c2188e79471415e102ca (commit)
via 111fabf5b40841ef324f0f9f474bfaaef57f7a8a (commit)
via 363a34a7cb1bb73549b0273a291657eb868218da (commit)
via aabaab10d2e94505893c05c04dde9f5d4676ff38 (commit)
via 962858169a8b14e7b3751f9b36bab838e9003f8e (commit)
via f397fe86c3b79a2539cea5bc9a638361046bc23f (commit)
via 5dadae9febef4ada1b00ba5663d0beec850c8ceb (commit)
via 8443940002d4dba93b022874201715814d0172f4 (commit)
from 0ea961c0700441b68eff73296755a3245145588e (commit)
- Log -----------------------------------------------------------------
commit 7018ce14df7b0f1094ef8e21f7154c5c0c513588
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Tue Nov 25 11:46:40 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Thu Nov 27 12:10:49 2025 +0100
avcodec/x86/vp6dsp: Avoid packing+unpacking
Store the intermediate values as words, clipped to the 0..255 range
instead.
Old benchmarks:
filter_diag4_c: 353.4 ( 1.00x)
filter_diag4_sse2: 57.5 ( 6.15x)
New benchmarks:
filter_diag4_c: 350.6 ( 1.00x)
filter_diag4_sse2: 55.1 ( 6.36x)
Reviewed-by: Lynne <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm
index b9b562f84f..1f7443db69 100644
--- a/libavcodec/x86/vp6dsp.asm
+++ b/libavcodec/x86/vp6dsp.asm
@@ -26,26 +26,41 @@ cextern pw_64
SECTION .text
-%macro DIAG4 6
+%macro DIAG4 7
+%if %7
+ mova m0, [%1+%2]
+ mova m1, [%1+%3]
+%else
movq m0, [%1+%2]
movq m1, [%1+%3]
punpcklbw m0, m7
punpcklbw m1, m7
+%endif
pmullw m0, m4 ; src[x-8 ] * biweight [0]
pmullw m1, m5 ; src[x ] * biweight [1]
paddw m0, m1
+%if %7
+ mova m1, [%1+%4]
+ mova m2, [%1+%5]
+%else
movq m1, [%1+%4]
movq m2, [%1+%5]
punpcklbw m1, m7
punpcklbw m2, m7
+%endif
paddw m0, [pw_64] ; Add 64
pmullw m1, m6 ; src[x+8 ] * biweight [2]
pmullw m2, m3 ; src[x+16] * biweight [3]
paddw m1, m2
paddsw m0, m1
psraw m0, 7
+%if %7
packuswb m0, m0
movq [%6], m0
+%else
+ pmaxsw m0, m7 ; clip to 0-255 range
+ mova [%6], m0
+%endif
%endmacro
%macro SPLAT4REGS 0
@@ -59,7 +74,7 @@ SECTION .text
; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
; const int16_t h_weight[4], const int16_t
v_weights[4])
INIT_XMM sse2
-cglobal vp6_filter_diag4, 5, 6, 8, -8*11
+cglobal vp6_filter_diag4, 5, 6, 8, -16*11
sub r1, r2
pxor m7, m7
@@ -69,8 +84,8 @@ cglobal vp6_filter_diag4, 5, 6, 8, -8*11
mov r3, rsp
mov r5d, 11
.nextrow:
- DIAG4 r1, -1, 0, 1, 2, r3
- add r3, 8
+ DIAG4 r1, -1, 0, 1, 2, r3, 0
+ add r3, 16
add r1, r2
dec r5d
jnz .nextrow
@@ -78,11 +93,11 @@ cglobal vp6_filter_diag4, 5, 6, 8, -8*11
movq m3, [r4]
SPLAT4REGS
- lea r3, [rsp+8]
+ lea r3, [rsp+16]
mov r1d, 8
.nextcol:
- DIAG4 r3, -8, 0, 8, 16, r0
- add r3, 8
+ DIAG4 r3, -16, 0, 16, 32, r0, 1
+ add r3, 16
add r0, r2
dec r1d
jnz .nextcol
commit 300cd2c2f225e483fc3c3bb632eddd3a4c7b4af9
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Tue Nov 25 11:27:16 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Thu Nov 27 12:10:46 2025 +0100
avcodec/x86/vp6dsp: Avoid saturated addition
Only the two middle coefficients are so huge that overflow can happen.
Reviewed-by: Lynne <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm
index 83b26d03cd..b9b562f84f 100644
--- a/libavcodec/x86/vp6dsp.asm
+++ b/libavcodec/x86/vp6dsp.asm
@@ -38,11 +38,11 @@ SECTION .text
movq m2, [%1+%5]
punpcklbw m1, m7
punpcklbw m2, m7
+ paddw m0, [pw_64] ; Add 64
pmullw m1, m6 ; src[x+8 ] * biweight [2]
pmullw m2, m3 ; src[x+16] * biweight [3]
paddw m1, m2
paddsw m0, m1
- paddsw m0, [pw_64] ; Add 64
psraw m0, 7
packuswb m0, m0
movq [%6], m0
commit dcc101167cccf338e009c2188e79471415e102ca
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Tue Nov 25 11:22:45 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Thu Nov 27 12:10:43 2025 +0100
avcodec/x86/vp6dsp: Simplify splatting
Reviewed-by: Lynne <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm
index a9340ed05b..83b26d03cd 100644
--- a/libavcodec/x86/vp6dsp.asm
+++ b/libavcodec/x86/vp6dsp.asm
@@ -49,14 +49,11 @@ SECTION .text
%endmacro
%macro SPLAT4REGS 0
- pshuflw m4, m3, 0x0
- pshuflw m5, m3, 0x55
- pshuflw m6, m3, 0xAA
- pshuflw m3, m3, 0xFF
- punpcklqdq m4, m4
- punpcklqdq m5, m5
- punpcklqdq m6, m6
- punpcklqdq m3, m3
+ punpcklwd m3, m3
+ pshufd m4, m3, 0x0
+ pshufd m5, m3, 0x55
+ pshufd m6, m3, 0xAA
+ pshufd m3, m3, 0xFF
%endmacro
; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
commit 111fabf5b40841ef324f0f9f474bfaaef57f7a8a
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Tue Nov 25 11:15:15 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Thu Nov 27 12:10:40 2025 +0100
avcodec/x86/vp6dsp: Don't align the stack manually
For most systems (particularly all x64), the stack is already
guaranteed to be sufficiently aligned. So just use x86inc's
stack feature which does the right thing.
Reviewed-by: Lynne <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm
index 61336f6465..a9340ed05b 100644
--- a/libavcodec/x86/vp6dsp.asm
+++ b/libavcodec/x86/vp6dsp.asm
@@ -62,11 +62,7 @@ SECTION .text
; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
; const int16_t h_weight[4], const int16_t
v_weights[4])
INIT_XMM sse2
-cglobal vp6_filter_diag4, 5, 7, 8
- mov r5, rsp ; backup stack pointer
- and rsp, ~(mmsize-1) ; align stack
- sub rsp, 8*11
-
+cglobal vp6_filter_diag4, 5, 6, 8, -8*11
sub r1, r2
pxor m7, m7
@@ -74,25 +70,24 @@ cglobal vp6_filter_diag4, 5, 7, 8
SPLAT4REGS
mov r3, rsp
- mov r6, 11
+ mov r5d, 11
.nextrow:
DIAG4 r1, -1, 0, 1, 2, r3
add r3, 8
add r1, r2
- dec r6
+ dec r5d
jnz .nextrow
movq m3, [r4]
SPLAT4REGS
lea r3, [rsp+8]
- mov r6, 8
+ mov r1d, 8
.nextcol:
DIAG4 r3, -8, 0, 8, 16, r0
add r3, 8
add r0, r2
- dec r6
+ dec r1d
jnz .nextcol
- mov rsp, r5 ; restore stack pointer
RET
commit 363a34a7cb1bb73549b0273a291657eb868218da
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Tue Nov 25 10:57:39 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Thu Nov 27 12:10:37 2025 +0100
avcodec/x86/vp6dsp: Fix outdated comment
Forgotten in 6cb3ee80b3b58d692a722fb38ee05f170ae8b0d2.
Reviewed-by: Lynne <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/libavcodec/x86/vp6dsp.asm b/libavcodec/x86/vp6dsp.asm
index 0106541734..61336f6465 100644
--- a/libavcodec/x86/vp6dsp.asm
+++ b/libavcodec/x86/vp6dsp.asm
@@ -1,5 +1,5 @@
;******************************************************************************
-;* MMX/SSE2-optimized functions for the VP6 decoder
+;* SSE2-optimized functions for the VP6 decoder
;* Copyright (C) 2009 Sebastien Lucas <[email protected]>
;* Copyright (C) 2009 Zuxy Meng <[email protected]>
;*
commit aabaab10d2e94505893c05c04dde9f5d4676ff38
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Tue Nov 25 10:53:41 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Thu Nov 27 12:10:34 2025 +0100
tests/checkasm: Test VP6DSP
Reviewed-by: Lynne <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 6636bc7774..3762c0d83b 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -50,6 +50,7 @@ AVCODECOBJS-$(CONFIG_UTVIDEO_DECODER) += utvideodsp.o
AVCODECOBJS-$(CONFIG_V210_DECODER) += v210dec.o
AVCODECOBJS-$(CONFIG_V210_ENCODER) += v210enc.o
AVCODECOBJS-$(CONFIG_VORBIS_DECODER) += vorbisdsp.o
+AVCODECOBJS-$(CONFIG_VP6_DECODER) += vp6dsp.o
AVCODECOBJS-$(CONFIG_VP9_DECODER) += vp9dsp.o
AVCODECOBJS-$(CONFIG_VVC_DECODER) += vvc_alf.o vvc_mc.o vvc_sao.o
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 20d8f19757..8c64684fa3 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -254,6 +254,9 @@ static const struct {
#if CONFIG_VP3DSP
{ "vp3dsp", checkasm_check_vp3dsp },
#endif
+ #if CONFIG_VP6_DECODER
+ { "vp6dsp", checkasm_check_vp6dsp },
+ #endif
#if CONFIG_VP8DSP
{ "vp8dsp", checkasm_check_vp8dsp },
#endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 45cd23cac4..bd33aba263 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -154,6 +154,7 @@ void checkasm_check_vf_hflip(void);
void checkasm_check_vf_threshold(void);
void checkasm_check_vf_sobel(void);
void checkasm_check_vp3dsp(void);
+void checkasm_check_vp6dsp(void);
void checkasm_check_vp8dsp(void);
void checkasm_check_vp9dsp(void);
void checkasm_check_videodsp(void);
diff --git a/tests/checkasm/vp6dsp.c b/tests/checkasm/vp6dsp.c
new file mode 100644
index 0000000000..a5f1c9c2fc
--- /dev/null
+++ b/tests/checkasm/vp6dsp.c
@@ -0,0 +1,93 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "checkasm.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/macros.h"
+#include "libavutil/mem_internal.h"
+#include "libavcodec/vp6data.h"
+#include "libavcodec/vp56dsp.h"
+
+#define randomize_buffer(buf) \
+ do { \
+ for (size_t k = 0; k < (sizeof(buf) & ~3); k += 4) \
+ AV_WN32A(buf + k, rnd()); \
+ for (size_t k = sizeof(buf) & ~3; k < sizeof(buf); ++k) \
+ buf[k] = rnd(); \
+ } while (0)
+
+
+void checkasm_check_vp6dsp(void)
+{
+ enum {
+ BLOCK_SIZE_1D = 8,
+ SRC_ROWS_ABOVE = 1,
+ SRC_ROWS_BELOW = 2,
+ SRC_COLS_LEFT = 1,
+ SRC_COLS_RIGHT = 2,
+ SRC_ROWS = SRC_ROWS_ABOVE + BLOCK_SIZE_1D + SRC_ROWS_BELOW,
+ SRC_ROW_SIZE = SRC_COLS_LEFT + BLOCK_SIZE_1D + SRC_COLS_RIGHT,
+ MAX_STRIDE = 64, ///< arbitrary
+ SRC_BUF_SIZE = (SRC_ROWS - 1) * MAX_STRIDE + SRC_ROW_SIZE + 7 /* to
vary misalignment */,
+ DST_BUF_SIZE = (BLOCK_SIZE_1D - 1) * MAX_STRIDE + BLOCK_SIZE_1D,
+ };
+ VP6DSPContext vp6dsp;
+
+ ff_vp6dsp_init(&vp6dsp);
+
+ declare_func(void, uint8_t *dst, const uint8_t *src, ptrdiff_t stride,
+ const int16_t *h_weights, const int16_t *v_weights);
+
+ if (check_func(vp6dsp.vp6_filter_diag4, "filter_diag4")) {
+ DECLARE_ALIGNED(8, uint8_t, dstbuf_ref)[DST_BUF_SIZE];
+ DECLARE_ALIGNED(8, uint8_t, dstbuf_new)[DST_BUF_SIZE];
+ DECLARE_ALIGNED(8, uint8_t, srcbuf)[SRC_BUF_SIZE];
+
+ randomize_buffer(dstbuf_ref);
+ randomize_buffer(srcbuf);
+ memcpy(dstbuf_new, dstbuf_ref, sizeof(dstbuf_new));
+
+ ptrdiff_t stride = (rnd() % (MAX_STRIDE / 16) + 1) * 16;
+ const uint8_t *src = srcbuf + SRC_COLS_LEFT + rnd() % 8U;
+ uint8_t *dst_new = dstbuf_new, *dst_ref = dstbuf_ref;
+
+ if (rnd() & 1) {
+ dst_new += (BLOCK_SIZE_1D - 1) * stride;
+ dst_ref += (BLOCK_SIZE_1D - 1) * stride;
+ src += (SRC_ROWS - 1) * stride;
+ stride *= -1;
+ }
+ src += SRC_ROWS_ABOVE * stride;
+
+ unsigned select = rnd() % FF_ARRAY_ELEMS(vp6_block_copy_filter);
+ unsigned x8 = 1 + rnd() % (FF_ARRAY_ELEMS(vp6_block_copy_filter[0]) -
1);
+ unsigned y8 = 1 + rnd() % (FF_ARRAY_ELEMS(vp6_block_copy_filter[0]) -
1);
+ const int16_t *h_weights = vp6_block_copy_filter[select][x8];
+ const int16_t *v_weights = vp6_block_copy_filter[select][y8];
+
+ call_ref(dst_ref, src, stride, h_weights, v_weights);
+ call_new(dst_new, src, stride, h_weights, v_weights);
+ if (memcmp(dstbuf_new, dstbuf_ref, sizeof(dstbuf_new)))
+ fail();
+ bench_new(dst_new, src, stride, h_weights, v_weights);
+ }
+}
diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak
index 2be880c8db..f182efde46 100644
--- a/tests/fate/checkasm.mak
+++ b/tests/fate/checkasm.mak
@@ -76,6 +76,7 @@ FATE_CHECKASM = fate-checkasm-aacencdsp
\
fate-checkasm-videodsp \
fate-checkasm-vorbisdsp \
fate-checkasm-vp3dsp \
+ fate-checkasm-vp6dsp \
fate-checkasm-vp8dsp \
fate-checkasm-vp9dsp \
fate-checkasm-vvc_alf \
commit 962858169a8b14e7b3751f9b36bab838e9003f8e
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Tue Nov 25 01:00:58 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Thu Nov 27 12:10:32 2025 +0100
avcodec/vp6dsp: Constify source in vp6_filter_diag4
Reviewed-by: Lynne <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/libavcodec/vp56dsp.h b/libavcodec/vp56dsp.h
index 692fd0c8ac..3981de4015 100644
--- a/libavcodec/vp56dsp.h
+++ b/libavcodec/vp56dsp.h
@@ -30,7 +30,7 @@ typedef struct VP5DSPContext {
} VP5DSPContext;
typedef struct VP6DSPContext {
- void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+ void (*vp6_filter_diag4)(uint8_t *dst, const uint8_t *src, ptrdiff_t
stride,
const int16_t *h_weights,const int16_t
*v_weights);
} VP6DSPContext;
diff --git a/libavcodec/vp6dsp.c b/libavcodec/vp6dsp.c
index 76c4983960..bdaa054307 100644
--- a/libavcodec/vp6dsp.c
+++ b/libavcodec/vp6dsp.c
@@ -27,7 +27,7 @@
#include "vp56dsp.h"
-static void vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+static void vp6_filter_diag4_c(uint8_t *dst, const uint8_t *src, ptrdiff_t
stride,
const int16_t *h_weights, const int16_t
*v_weights)
{
int x, y;
diff --git a/libavcodec/x86/vp6dsp_init.c b/libavcodec/x86/vp6dsp_init.c
index 07e3becaec..db9a95767e 100644
--- a/libavcodec/x86/vp6dsp_init.c
+++ b/libavcodec/x86/vp6dsp_init.c
@@ -25,7 +25,7 @@
#include "libavutil/x86/cpu.h"
#include "libavcodec/vp56dsp.h"
-void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+void ff_vp6_filter_diag4_sse2(uint8_t *dst, const uint8_t *src, ptrdiff_t
stride,
const int16_t *h_weights,const int16_t
*v_weights);
av_cold void ff_vp6dsp_init_x86(VP6DSPContext *c)
commit f397fe86c3b79a2539cea5bc9a638361046bc23f
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Tue Nov 25 00:44:47 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Thu Nov 27 12:10:29 2025 +0100
avcodec/vp56dsp: Separate VP5DSP and VP6DSP
They don't have anything in common since
160ebe0a8d780f6db7c18e824d8ec6f437da33a2.
Reviewed-by: Lynne <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/configure b/configure
index 7202cbc57a..a7b0c18d9c 100755
--- a/configure
+++ b/configure
@@ -2701,7 +2701,6 @@ CONFIG_EXTRA="
vc1dsp
videodsp
vp3dsp
- vp56dsp
vp8dsp
vulkan_encode
vvc_sei
@@ -3197,8 +3196,8 @@ vc1image_decoder_select="vc1_decoder"
vorbis_encoder_select="audio_frame_queue"
vp3_decoder_select="hpeldsp vp3dsp videodsp"
vp4_decoder_select="vp3_decoder"
-vp5_decoder_select="h264chroma hpeldsp videodsp vp3dsp vp56dsp"
-vp6_decoder_select="h264chroma hpeldsp huffman videodsp vp3dsp vp56dsp"
+vp5_decoder_select="h264chroma hpeldsp videodsp vp3dsp"
+vp6_decoder_select="h264chroma hpeldsp huffman videodsp vp3dsp"
vp6a_decoder_select="vp6_decoder"
vp6f_decoder_select="vp6_decoder"
vp7_decoder_select="h264pred videodsp vp8dsp"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 49d696017d..40e68116e8 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -182,7 +182,6 @@ OBJS-$(CONFIG_AV1_AMF_DECODER) += amfdec.o
OBJS-$(CONFIG_VC1DSP) += vc1dsp.o
OBJS-$(CONFIG_VIDEODSP) += videodsp.o
OBJS-$(CONFIG_VP3DSP) += vp3dsp.o
-OBJS-$(CONFIG_VP56DSP) += vp56dsp.o
OBJS-$(CONFIG_VP8DSP) += vp8dsp.o
OBJS-$(CONFIG_V4L2_M2M) += v4l2_m2m.o v4l2_context.o
v4l2_buffers.o v4l2_fmt.o
OBJS-$(CONFIG_WMA_FREQS) += wma_freqs.o
@@ -806,7 +805,8 @@ OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o
vorbisdsp.o vorbis.o \
OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \
vorbis_data.o
OBJS-$(CONFIG_VP3_DECODER) += vp3.o jpegquanttables.o
-OBJS-$(CONFIG_VP5_DECODER) += vp5.o vp56.o vp56data.o vpx_rac.o
+OBJS-$(CONFIG_VP5_DECODER) += vp5.o vp56.o vp56data.o \
+ vp5dsp.o vpx_rac.o
OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o \
vp6dsp.o vpx_rac.o
OBJS-$(CONFIG_VP7_DECODER) += vp8.o vp8data.o vpx_rac.o
diff --git a/libavcodec/vp5.c b/libavcodec/vp5.c
index 77b479471b..98b8cf41f2 100644
--- a/libavcodec/vp5.c
+++ b/libavcodec/vp5.c
@@ -285,7 +285,7 @@ static av_cold int vp5_decode_init(AVCodecContext *avctx)
if ((ret = ff_vp56_init_context(avctx, s, 1, 0)) < 0)
return ret;
- ff_vp5dsp_init(&s->vp56dsp);
+ ff_vp5dsp_init(&s->vp5dsp);
s->vp56_coord_div = vp5_coord_div;
s->parse_vector_adjustment = vp5_parse_vector_adjustment;
s->parse_coeff = vp5_parse_coeff;
diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c
index 0ddf7c985c..0d13d7a276 100644
--- a/libavcodec/vp56.c
+++ b/libavcodec/vp56.c
@@ -326,8 +326,8 @@ static void vp56_deblock_filter(VP56Context *s, uint8_t
*yuv,
{
if (s->avctx->codec->id == AV_CODEC_ID_VP5) {
int t = ff_vp56_filter_threshold[s->quantizer];
- if (dx) s->vp56dsp.edge_filter_hor(yuv + 10-dx , stride, t);
- if (dy) s->vp56dsp.edge_filter_ver(yuv + stride*(10-dy), stride, t);
+ if (dx) s->vp5dsp.edge_filter_hor(yuv + 10-dx , stride, t);
+ if (dy) s->vp5dsp.edge_filter_ver(yuv + stride*(10-dy), stride, t);
} else {
int * bounding_values = s->bounding_values_array + 127;
if (dx)
diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h
index af46e2f188..6610fc2892 100644
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@@ -118,7 +118,10 @@ struct vp56_context {
HpelDSPContext hdsp;
VideoDSPContext vdsp;
VP3DSPContext vp3dsp;
- VP56DSPContext vp56dsp;
+ union {
+ VP5DSPContext vp5dsp;
+ VP6DSPContext vp6dsp;
+ };
uint8_t idct_scantable[64];
AVFrame *frames[4];
uint8_t *edge_emu_buffer_alloc;
diff --git a/libavcodec/vp56dsp.h b/libavcodec/vp56dsp.h
index f2cbb41a1e..692fd0c8ac 100644
--- a/libavcodec/vp56dsp.h
+++ b/libavcodec/vp56dsp.h
@@ -24,20 +24,19 @@
#include <stddef.h>
#include <stdint.h>
-typedef struct VP56DSPContext {
+typedef struct VP5DSPContext {
void (*edge_filter_hor)(uint8_t *yuv, ptrdiff_t stride, int t);
void (*edge_filter_ver)(uint8_t *yuv, ptrdiff_t stride, int t);
+} VP5DSPContext;
+typedef struct VP6DSPContext {
void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
const int16_t *h_weights,const int16_t
*v_weights);
-} VP56DSPContext;
+} VP6DSPContext;
-void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
- const int16_t *h_weights, const int16_t *v_weights);
+void ff_vp5dsp_init(VP5DSPContext *s);
-void ff_vp5dsp_init(VP56DSPContext *s);
-void ff_vp6dsp_init(VP56DSPContext *s);
-
-void ff_vp6dsp_init_x86(VP56DSPContext *s);
+void ff_vp6dsp_init(VP6DSPContext *s);
+void ff_vp6dsp_init_x86(VP6DSPContext *s);
#endif /* AVCODEC_VP56DSP_H */
diff --git a/libavcodec/vp56dsp.c b/libavcodec/vp5dsp.c
similarity index 87%
rename from libavcodec/vp56dsp.c
rename to libavcodec/vp5dsp.c
index 1ff67b1c87..a06c2cfd5f 100644
--- a/libavcodec/vp56dsp.c
+++ b/libavcodec/vp5dsp.c
@@ -21,8 +21,6 @@
#include <stdint.h>
-#include "config.h"
-#include "config_components.h"
#include "libavutil/attributes.h"
#include "vp56dsp.h"
#include "libavutil/common.h"
@@ -43,7 +41,6 @@ static void pfx ## _edge_filter_ ## suf(uint8_t *yuv,
ptrdiff_t stride, \
} \
}
-#if CONFIG_VP5_DECODER
/* Gives very similar result than the vp6 version except in a few cases */
static int vp5_adjust(int v, int t)
{
@@ -65,20 +62,8 @@ static int vp5_adjust(int v, int t)
VP56_EDGE_FILTER(vp5, hor, 1, stride)
VP56_EDGE_FILTER(vp5, ver, stride, 1)
-av_cold void ff_vp5dsp_init(VP56DSPContext *s)
+av_cold void ff_vp5dsp_init(VP5DSPContext *s)
{
s->edge_filter_hor = vp5_edge_filter_hor;
s->edge_filter_ver = vp5_edge_filter_ver;
}
-#endif /* CONFIG_VP5_DECODER */
-
-#if CONFIG_VP6_DECODER
-av_cold void ff_vp6dsp_init(VP56DSPContext *s)
-{
- s->vp6_filter_diag4 = ff_vp6_filter_diag4_c;
-
-#if ARCH_X86
- ff_vp6dsp_init_x86(s);
-#endif
-}
-#endif /* CONFIG_VP6_DECODER */
diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index 48ff9da818..3f4bd42d07 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -641,7 +641,7 @@ static void vp6_filter(VP56Context *s, uint8_t *dst,
uint8_t *src,
vp6_filter_hv4(dst, src+offset1, stride, stride,
vp6_block_copy_filter[select][y8]);
} else {
- s->vp56dsp.vp6_filter_diag4(dst, src+offset1+((mv.x^mv.y)>>31),
stride,
+ s->vp6dsp.vp6_filter_diag4(dst, src+offset1+((mv.x^mv.y)>>31),
stride,
vp6_block_copy_filter[select][x8],
vp6_block_copy_filter[select][y8]);
}
@@ -661,7 +661,7 @@ static av_cold int vp6_decode_init_context(AVCodecContext
*avctx,
if (ret < 0)
return ret;
- ff_vp6dsp_init(&s->vp56dsp);
+ ff_vp6dsp_init(&s->vp6dsp);
s->deblock_filtering = 0;
s->vp56_coord_div = vp6_coord_div;
diff --git a/libavcodec/vp6dsp.c b/libavcodec/vp6dsp.c
index f7f6856330..76c4983960 100644
--- a/libavcodec/vp6dsp.c
+++ b/libavcodec/vp6dsp.c
@@ -27,8 +27,8 @@
#include "vp56dsp.h"
-void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
- const int16_t *h_weights, const int16_t *v_weights)
+static void vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
+ const int16_t *h_weights, const int16_t
*v_weights)
{
int x, y;
int tmp[8*11];
@@ -59,3 +59,12 @@ void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src,
ptrdiff_t stride,
t += 8;
}
}
+
+av_cold void ff_vp6dsp_init(VP6DSPContext *s)
+{
+ s->vp6_filter_diag4 = vp6_filter_diag4_c;
+
+#if ARCH_X86
+ ff_vp6dsp_init_x86(s);
+#endif
+}
diff --git a/libavcodec/x86/vp6dsp_init.c b/libavcodec/x86/vp6dsp_init.c
index 83d45ec36c..07e3becaec 100644
--- a/libavcodec/x86/vp6dsp_init.c
+++ b/libavcodec/x86/vp6dsp_init.c
@@ -28,7 +28,7 @@
void ff_vp6_filter_diag4_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride,
const int16_t *h_weights,const int16_t
*v_weights);
-av_cold void ff_vp6dsp_init_x86(VP56DSPContext *c)
+av_cold void ff_vp6dsp_init_x86(VP6DSPContext *c)
{
int cpu_flags = av_get_cpu_flags();
commit 5dadae9febef4ada1b00ba5663d0beec850c8ceb
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Tue Nov 25 00:28:32 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Thu Nov 27 12:10:26 2025 +0100
avcodec/vp56: Fix indentation
Forgotten in 160ebe0a8d780f6db7c18e824d8ec6f437da33a2.
Reviewed-by: Lynne <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c
index dc3ae70c66..0ddf7c985c 100644
--- a/libavcodec/vp56.c
+++ b/libavcodec/vp56.c
@@ -325,9 +325,9 @@ static void vp56_deblock_filter(VP56Context *s, uint8_t
*yuv,
ptrdiff_t stride, int dx, int dy)
{
if (s->avctx->codec->id == AV_CODEC_ID_VP5) {
- int t = ff_vp56_filter_threshold[s->quantizer];
- if (dx) s->vp56dsp.edge_filter_hor(yuv + 10-dx , stride, t);
- if (dy) s->vp56dsp.edge_filter_ver(yuv + stride*(10-dy), stride, t);
+ int t = ff_vp56_filter_threshold[s->quantizer];
+ if (dx) s->vp56dsp.edge_filter_hor(yuv + 10-dx , stride, t);
+ if (dy) s->vp56dsp.edge_filter_ver(yuv + stride*(10-dy), stride, t);
} else {
int * bounding_values = s->bounding_values_array + 127;
if (dx)
commit 8443940002d4dba93b022874201715814d0172f4
Author: Andreas Rheinhardt <[email protected]>
AuthorDate: Tue Nov 25 00:19:58 2025 +0100
Commit: Andreas Rheinhardt <[email protected]>
CommitDate: Thu Nov 27 12:08:45 2025 +0100
avcodec/arm/vp6dsp: Remove VP6 edge filter functions
Forgotten in 160ebe0a8d780f6db7c18e824d8ec6f437da33a2.
Reviewed-by: Lynne <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 811b364195..e32a0bf49f 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -42,7 +42,6 @@ OBJS-$(CONFIG_RV40_DECODER) +=
arm/rv40dsp_init_arm.o
OBJS-$(CONFIG_SBC_ENCODER) += arm/sbcdsp_init_arm.o
OBJS-$(CONFIG_TRUEHD_DECODER) += arm/mlpdsp_init_arm.o
OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_init_arm.o
-OBJS-$(CONFIG_VP6_DECODER) += arm/vp6dsp_init_arm.o
OBJS-$(CONFIG_VP9_DECODER) += arm/vp9dsp_init_10bpp_arm.o \
arm/vp9dsp_init_12bpp_arm.o \
arm/vp9dsp_init_arm.o
@@ -139,7 +138,6 @@ NEON-OBJS-$(CONFIG_RV40_DECODER) +=
arm/rv34dsp_neon.o \
arm/rv40dsp_neon.o
NEON-OBJS-$(CONFIG_SBC_ENCODER) += arm/sbcdsp_neon.o
NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o
-NEON-OBJS-$(CONFIG_VP6_DECODER) += arm/vp6dsp_neon.o
NEON-OBJS-$(CONFIG_VP9_DECODER) += arm/vp9itxfm_16bpp_neon.o \
arm/vp9itxfm_neon.o \
arm/vp9lpf_16bpp_neon.o \
diff --git a/libavcodec/arm/vp6dsp_init_arm.c b/libavcodec/arm/vp6dsp_init_arm.c
deleted file mode 100644
index a59d61278c..0000000000
--- a/libavcodec/arm/vp6dsp_init_arm.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Copyright (c) 2010 Mans Rullgard <[email protected]>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdint.h>
-
-#include "libavutil/attributes.h"
-#include "libavutil/arm/cpu.h"
-
-#include "libavcodec/vp56dsp.h"
-
-void ff_vp6_edge_filter_hor_neon(uint8_t *yuv, ptrdiff_t stride, int t);
-void ff_vp6_edge_filter_ver_neon(uint8_t *yuv, ptrdiff_t stride, int t);
-
-av_cold void ff_vp6dsp_init_arm(VP56DSPContext *s)
-{
- int cpu_flags = av_get_cpu_flags();
-
- if (have_neon(cpu_flags)) {
- s->edge_filter_hor = ff_vp6_edge_filter_hor_neon;
- s->edge_filter_ver = ff_vp6_edge_filter_ver_neon;
- }
-}
diff --git a/libavcodec/arm/vp6dsp_neon.S b/libavcodec/arm/vp6dsp_neon.S
deleted file mode 100644
index 03dd28d1cb..0000000000
--- a/libavcodec/arm/vp6dsp_neon.S
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2010 Mans Rullgard <[email protected]>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/arm/asm.S"
-
-.macro vp6_edge_filter
- vdup.16 q3, r2 @ t
- vmov.i16 q13, #1
- vsubl.u8 q0, d20, d18 @ p[ 0] - p[-s]
- vsubl.u8 q1, d16, d22 @ p[-2*s] - p[ s]
- vsubl.u8 q14, d21, d19
- vsubl.u8 q15, d17, d23
- vadd.i16 q2, q0, q0 @ 2*(p[0]-p[-s])
- vadd.i16 d29, d28, d28
- vadd.i16 q0, q0, q1 @ p[0]-p[-s] + p[-2*s]-p[s]
- vadd.i16 d28, d28, d30
- vadd.i16 q0, q0, q2 @ 3*(p[0]-p[-s]) + p[-2*s]-p[s]
- vadd.i16 d28, d28, d29
- vrshr.s16 q0, q0, #3 @ v
- vrshr.s16 d28, d28, #3
- vsub.i16 q8, q3, q13 @ t-1
- vabs.s16 q1, q0 @ V
- vshr.s16 q2, q0, #15 @ s
- vabs.s16 d30, d28
- vshr.s16 d29, d28, #15
- vsub.i16 q12, q1, q3 @ V-t
- vsub.i16 d31, d30, d6
- vsub.i16 q12, q12, q13 @ V-t-1
- vsub.i16 d31, d31, d26
- vcge.u16 q12, q12, q8 @ V-t-1 >= t-1
- vcge.u16 d31, d31, d16
- vadd.i16 q13, q3, q3 @ 2*t
- vadd.i16 d16, d6, d6
- vsub.i16 q13, q13, q1 @ 2*t - V
- vsub.i16 d16, d16, d30
- vadd.i16 q13, q13, q2 @ += s
- vadd.i16 d16, d16, d29
- veor q13, q13, q2 @ ^= s
- veor d16, d16, d29
- vbif q0, q13, q12
- vbif d28, d16, d31
- vmovl.u8 q1, d20
- vmovl.u8 q15, d21
- vaddw.u8 q2, q0, d18
- vaddw.u8 q3, q14, d19
- vsub.i16 q1, q1, q0
- vsub.i16 d30, d30, d28
- vqmovun.s16 d18, q2
- vqmovun.s16 d19, q3
- vqmovun.s16 d20, q1
- vqmovun.s16 d21, q15
-.endm
-
-function ff_vp6_edge_filter_ver_neon, export=1
- sub r0, r0, r1, lsl #1
- vld1.8 {q8}, [r0], r1 @ p[-2*s]
- vld1.8 {q9}, [r0], r1 @ p[-s]
- vld1.8 {q10}, [r0], r1 @ p[0]
- vld1.8 {q11}, [r0] @ p[s]
- vp6_edge_filter
- sub r0, r0, r1, lsl #1
- sub r1, r1, #8
- vst1.8 {d18}, [r0]!
- vst1.32 {d19[0]}, [r0], r1
- vst1.8 {d20}, [r0]!
- vst1.32 {d21[0]}, [r0]
- bx lr
-endfunc
-
-function ff_vp6_edge_filter_hor_neon, export=1
- sub r3, r0, #1
- sub r0, r0, #2
- vld1.32 {d16[0]}, [r0], r1
- vld1.32 {d18[0]}, [r0], r1
- vld1.32 {d20[0]}, [r0], r1
- vld1.32 {d22[0]}, [r0], r1
- vld1.32 {d16[1]}, [r0], r1
- vld1.32 {d18[1]}, [r0], r1
- vld1.32 {d20[1]}, [r0], r1
- vld1.32 {d22[1]}, [r0], r1
- vld1.32 {d17[0]}, [r0], r1
- vld1.32 {d19[0]}, [r0], r1
- vld1.32 {d21[0]}, [r0], r1
- vld1.32 {d23[0]}, [r0], r1
- vtrn.8 q8, q9
- vtrn.8 q10, q11
- vtrn.16 q8, q10
- vtrn.16 q9, q11
- vp6_edge_filter
- vtrn.8 q9, q10
- vst1.16 {d18[0]}, [r3], r1
- vst1.16 {d20[0]}, [r3], r1
- vst1.16 {d18[1]}, [r3], r1
- vst1.16 {d20[1]}, [r3], r1
- vst1.16 {d18[2]}, [r3], r1
- vst1.16 {d20[2]}, [r3], r1
- vst1.16 {d18[3]}, [r3], r1
- vst1.16 {d20[3]}, [r3], r1
- vst1.16 {d19[0]}, [r3], r1
- vst1.16 {d21[0]}, [r3], r1
- vst1.16 {d19[1]}, [r3], r1
- vst1.16 {d21[1]}, [r3], r1
- bx lr
-endfunc
diff --git a/libavcodec/vp56dsp.c b/libavcodec/vp56dsp.c
index a668712384..1ff67b1c87 100644
--- a/libavcodec/vp56dsp.c
+++ b/libavcodec/vp56dsp.c
@@ -77,9 +77,7 @@ av_cold void ff_vp6dsp_init(VP56DSPContext *s)
{
s->vp6_filter_diag4 = ff_vp6_filter_diag4_c;
-#if ARCH_ARM
- ff_vp6dsp_init_arm(s);
-#elif ARCH_X86
+#if ARCH_X86
ff_vp6dsp_init_x86(s);
#endif
}
diff --git a/libavcodec/vp56dsp.h b/libavcodec/vp56dsp.h
index e35e232ea3..f2cbb41a1e 100644
--- a/libavcodec/vp56dsp.h
+++ b/libavcodec/vp56dsp.h
@@ -38,7 +38,6 @@ void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src,
ptrdiff_t stride,
void ff_vp5dsp_init(VP56DSPContext *s);
void ff_vp6dsp_init(VP56DSPContext *s);
-void ff_vp6dsp_init_arm(VP56DSPContext *s);
void ff_vp6dsp_init_x86(VP56DSPContext *s);
#endif /* AVCODEC_VP56DSP_H */
-----------------------------------------------------------------------
Summary of changes:
configure | 5 +-
libavcodec/Makefile | 4 +-
libavcodec/arm/Makefile | 2 -
libavcodec/arm/vp6dsp_init_arm.c | 39 ------------
libavcodec/arm/vp6dsp_neon.S | 121 -------------------------------------
libavcodec/vp5.c | 2 +-
libavcodec/vp56.c | 6 +-
libavcodec/vp56.h | 5 +-
libavcodec/vp56dsp.h | 18 +++---
libavcodec/{vp56dsp.c => vp5dsp.c} | 19 +-----
libavcodec/vp6.c | 4 +-
libavcodec/vp6dsp.c | 13 +++-
libavcodec/x86/vp6dsp.asm | 59 ++++++++++--------
libavcodec/x86/vp6dsp_init.c | 4 +-
tests/checkasm/Makefile | 1 +
tests/checkasm/checkasm.c | 3 +
tests/checkasm/checkasm.h | 1 +
tests/checkasm/vp6dsp.c | 93 ++++++++++++++++++++++++++++
tests/fate/checkasm.mak | 1 +
19 files changed, 168 insertions(+), 232 deletions(-)
delete mode 100644 libavcodec/arm/vp6dsp_init_arm.c
delete mode 100644 libavcodec/arm/vp6dsp_neon.S
rename libavcodec/{vp56dsp.c => vp5dsp.c} (86%)
create mode 100644 tests/checkasm/vp6dsp.c
hooks/post-receive
--
_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]