This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.

commit b0d5914678ab9f7b610b01170fddb8bc83a383d8
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Mon May 4 22:11:38 2026 +0200
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Wed Jul 1 20:00:46 2026 +0200

    avcodec/x86/vc1dsp_mc: Add size 8 vertical SSSE3 mc functions
    
    vc1dsp.avg_vc1_mspel_pixels_tab_mc01_8_c:              165.6 ( 1.00x)
    vc1dsp.avg_vc1_mspel_pixels_tab_mc01_8_mmxext:          44.4 ( 3.73x)
    vc1dsp.avg_vc1_mspel_pixels_tab_mc01_8_ssse3:           18.5 ( 8.97x)
    vc1dsp.avg_vc1_mspel_pixels_tab_mc02_8_c:              152.5 ( 1.00x)
    vc1dsp.avg_vc1_mspel_pixels_tab_mc02_8_mmxext:          37.3 ( 4.09x)
    vc1dsp.avg_vc1_mspel_pixels_tab_mc02_8_ssse3:           18.5 ( 8.25x)
    vc1dsp.avg_vc1_mspel_pixels_tab_mc03_8_c:              162.9 ( 1.00x)
    vc1dsp.avg_vc1_mspel_pixels_tab_mc03_8_mmxext:          44.1 ( 3.69x)
    vc1dsp.avg_vc1_mspel_pixels_tab_mc03_8_ssse3:           18.3 ( 8.88x)
    vc1dsp.put_vc1_mspel_pixels_tab_mc01_8_c:              150.5 ( 1.00x)
    vc1dsp.put_vc1_mspel_pixels_tab_mc01_8_mmx:             42.4 ( 3.55x)
    vc1dsp.put_vc1_mspel_pixels_tab_mc01_8_ssse3:           16.5 ( 9.11x)
    vc1dsp.put_vc1_mspel_pixels_tab_mc02_8_c:               78.4 ( 1.00x)
    vc1dsp.put_vc1_mspel_pixels_tab_mc02_8_mmx:             36.1 ( 2.17x)
    vc1dsp.put_vc1_mspel_pixels_tab_mc02_8_ssse3:           16.5 ( 4.76x)
    vc1dsp.put_vc1_mspel_pixels_tab_mc03_8_c:              144.7 ( 1.00x)
    vc1dsp.put_vc1_mspel_pixels_tab_mc03_8_mmx:             42.6 ( 3.40x)
    vc1dsp.put_vc1_mspel_pixels_tab_mc03_8_ssse3:           16.3 ( 8.89x)
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vc1dsp_init.c |  3 ++
 libavcodec/x86/vc1dsp_mc.asm | 69 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/libavcodec/x86/vc1dsp_init.c b/libavcodec/x86/vc1dsp_init.c
index 02bea6b052..06740b3ec9 100644
--- a/libavcodec/x86/vc1dsp_init.c
+++ b/libavcodec/x86/vc1dsp_init.c
@@ -144,6 +144,9 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
         dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = 
ff_put_vc1_chroma_mc8_nornd_ssse3;
         dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = 
ff_avg_vc1_chroma_mc8_nornd_ssse3;
 
+        MSPEL_FUNCS_SIZE(0, 1, 8, ssse3);
+        MSPEL_FUNCS_SIZE(0, 2, 8, ssse3);
+        MSPEL_FUNCS_SIZE(0, 3, 8, ssse3);
         MSPEL_FUNCS(1, 0, ssse3);
         MSPEL_FUNCS(2, 0, ssse3);
         MSPEL_FUNCS(3, 0, ssse3);
diff --git a/libavcodec/x86/vc1dsp_mc.asm b/libavcodec/x86/vc1dsp_mc.asm
index 2228254fd2..74510e218f 100644
--- a/libavcodec/x86/vc1dsp_mc.asm
+++ b/libavcodec/x86/vc1dsp_mc.asm
@@ -23,7 +23,12 @@
 
 SECTION_RODATA
 
+pb_m4_18: times 8 db -4, 18
+pb_53_m3: times 8 db 53, -3
+pb_m3_53: times 8 db -3, 53
+pb_18_m4: times 8 db 18, -4
 pb_m4_36: times 8 db -4, 36
+pb_36_m4: times 8 db 36, -4
 pb_m4_53: times 8 db -4, 53
 pb_m3_18: times 8 db -3, 18
 
@@ -282,3 +287,67 @@ HOR_8B avg, 8
 
 HOR_8B put, 16
 HOR_8B avg, 16
+
+%macro VER_8B 2
+cglobal vc1_%1_mspel_mc01_%2, 4, 4, 6, dst, src, stride, rnd
+    mova              m1, [pb_m4_18]
+    mova              m2, [pb_53_m3]
+    add             rndd, 31
+    jmp               vc1_%1_mspel_mc03_%2_after_prologue
+
+cglobal vc1_%1_mspel_mc02_%2, 4, 4, 6, dst, src, stride, rnd
+    mova              m1, [pb_m4_36]
+    mova              m2, [pb_36_m4]
+    lea             rndd, [4*rndd+28]
+    jmp               vc1_%1_mspel_mc03_%2_after_prologue
+
+cglobal vc1_%1_mspel_mc03_%2, 4, 4, 6, dst, src, stride, rnd
+    mova              m1, [pb_m3_53]
+    mova              m2, [pb_18_m4]
+    add             rndd, 31
+
+vc1_%1_mspel_mc03_%2_after_prologue:
+    neg          strideq
+    movd              m0, rndd
+    WIN64_SPILL_XMM    8
+    MOV%2             m3, [srcq+strideq]
+    neg          strideq
+    MOV%2             m4, [srcq]
+    MOV%2             m5, [srcq+strideq]
+    SPLATW            m0, m0
+    lea             srcq, [srcq+2*strideq]
+%define hd  rndd
+    punpcklbw         m3, m5
+    mov               hd, %2
+
+.loop:
+    MOV%2             m6, [srcq]
+    pmaddubsw         m3, m1
+    punpcklbw         m4, m6
+    pmaddubsw         m7, m4, m2
+    paddw             m3, m0
+    add             srcq, strideq
+    paddw             m7, m3
+    mova              m3, m4
+%ifidn %1, avg
+    movq              m4, [dstq]
+%endif
+    psraw             m7, 6
+%ifnidn %1, avg
+    mova              m4, m5
+%endif
+    packuswb          m7, m7
+%ifidn %1, avg
+    pavgb             m7, m4
+    mova              m4, m5
+%endif
+    movq          [dstq], m7
+    add             dstq, strideq
+    mova              m5, m6
+    dec               hd
+    jnz            .loop
+    RET
+%endmacro
+
+VER_8B put, 8
+VER_8B avg, 8

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to