vc1dsp_mmx: Remove purely vertical MMX mc functions

Andreas Rheinhardt via ffmpeg-cvslog Wed, 01 Jul 2026 12:30:35 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit 4c6e8fdbf6408c29cc871dcd08767962ec9ca184
Author:     Andreas Rheinhardt <[email protected]>
AuthorDate: Tue May 5 12:31:28 2026 +0200
Commit:     Andreas Rheinhardt <[email protected]>
CommitDate: Wed Jul 1 20:00:46 2026 +0200

    avcodec/x86/vc1dsp_mmx: Remove purely vertical MMX mc functions
    
    They have been superseded by SSSE3. Notice that the functions removed
    occupied 3424B with GCC and 6176B with Clang here, whereas
    the SSSE3 functions replacing them occupy only 944B.
    
    Signed-off-by: Andreas Rheinhardt <[email protected]>
---
 libavcodec/x86/vc1dsp_mmx.c | 120 --------------------------------------------
 1 file changed, 120 deletions(-)

diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index 7470fe23d8..b1a85150eb 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -74,64 +74,6 @@ void ff_vc1_avg_hor_16b_shift2_mmxext(uint8_t *dst, x86_reg 
stride,
      "punpcklwd %%mm7, %%mm7           \n\t"    \
      "punpckldq %%mm7, %%mm7           \n\t"
 
-/**
- * Purely vertical or horizontal 1/2 shift interpolation.
- * Sacrifice mm6 for *9 factor.
- */
-#define VC1_SHIFT2(OP, OPNAME)\
-static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
-                                     x86_reg stride, int rnd, x86_reg offset)\
-{\
-    rnd = 8-rnd;\
-    __asm__ volatile(\
-        "mov       $8, %%"FF_REG_c"        \n\t"\
-        LOAD_ROUNDER_MMX("%5")\
-        "movq      "MANGLE(ff_pw_9)", %%mm6\n\t"\
-        "1:                                \n\t"\
-        "movd      0(%0   ), %%mm3         \n\t"\
-        "movd      4(%0   ), %%mm4         \n\t"\
-        "movd      0(%0,%2), %%mm1         \n\t"\
-        "movd      4(%0,%2), %%mm2         \n\t"\
-        "add       %2, %0                  \n\t"\
-        "punpcklbw %%mm0, %%mm3            \n\t"\
-        "punpcklbw %%mm0, %%mm4            \n\t"\
-        "punpcklbw %%mm0, %%mm1            \n\t"\
-        "punpcklbw %%mm0, %%mm2            \n\t"\
-        "paddw     %%mm1, %%mm3            \n\t"\
-        "paddw     %%mm2, %%mm4            \n\t"\
-        "movd      0(%0,%3), %%mm1         \n\t"\
-        "movd      4(%0,%3), %%mm2         \n\t"\
-        "pmullw    %%mm6, %%mm3            \n\t" /* 0,9,9,0*/\
-        "pmullw    %%mm6, %%mm4            \n\t" /* 0,9,9,0*/\
-        "punpcklbw %%mm0, %%mm1            \n\t"\
-        "punpcklbw %%mm0, %%mm2            \n\t"\
-        "psubw     %%mm1, %%mm3            \n\t" /*-1,9,9,0*/\
-        "psubw     %%mm2, %%mm4            \n\t" /*-1,9,9,0*/\
-        "movd      0(%0,%2), %%mm1         \n\t"\
-        "movd      4(%0,%2), %%mm2         \n\t"\
-        "punpcklbw %%mm0, %%mm1            \n\t"\
-        "punpcklbw %%mm0, %%mm2            \n\t"\
-        "psubw     %%mm1, %%mm3            \n\t" /*-1,9,9,-1*/\
-        "psubw     %%mm2, %%mm4            \n\t" /*-1,9,9,-1*/\
-        NORMALIZE_MMX("$4")\
-        "packuswb  %%mm4, %%mm3            \n\t"\
-        OP((%1), %%mm3)\
-        "movq      %%mm3, (%1)             \n\t"\
-        "add       %6, %0                  \n\t"\
-        "add       %4, %1                  \n\t"\
-        "dec       %%"FF_REG_c"            \n\t"\
-        "jnz 1b                            \n\t"\
-        : "+r"(src),  "+r"(dst)\
-        : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\
-          "g"(stride-offset)\
-          NAMED_CONSTRAINTS_ADD(ff_pw_9)\
-        : "%"FF_REG_c, "memory"\
-    );\
-}
-
-VC1_SHIFT2(OP_PUT, put_)
-VC1_SHIFT2(OP_AVG, avg_)
-
 /**
  * Core of the 1/4 and 3/4 shift bicubic interpolation.
  *
@@ -270,59 +212,18 @@ OPNAME ## vc1_hor_16b_ ## NAME ## _mmx(uint8_t *dst, 
x86_reg stride,    \
     );                                                                  \
 }
 
-/**
- * Macro to build the 8 bits, any direction, version of vc1_put_shift[13].
- * Here, offset=src_stride. Parameters passed A1 to A4 must use
- * %3 (offset) and %4 (3*offset).
- *
- * @param  NAME   Either 1 or 3
- * @see MSPEL_FILTER13_CORE for information on A1->A4
- */
-#define MSPEL_FILTER13_8B(NAME, A1, A2, A3, A4, OP, OPNAME)             \
-static void                                                             \
-OPNAME ## vc1_## NAME ## _mmx(uint8_t *dst, const uint8_t *src,         \
-                        x86_reg stride, int rnd, x86_reg offset)      \
-{                                                                       \
-    int h = 8;                                                          \
-    src -= offset;                                                      \
-    rnd = 32-rnd;                                                       \
-    __asm__ volatile (                                                      \
-        LOAD_ROUNDER_MMX("%6")                                          \
-        "movq      "MANGLE(ff_pw_53)", %%mm5       \n\t"                \
-        "movq      "MANGLE(ff_pw_18)", %%mm6       \n\t"                \
-        ".p2align 3                \n\t"                                \
-        "1:                        \n\t"                                \
-        MSPEL_FILTER13_CORE(DO_UNPACK, "movd   1", A1, A2, A3, A4)      \
-        NORMALIZE_MMX("$6")                                             \
-        TRANSFER_DO_PACK(OP)                                            \
-        "add       %5, %1          \n\t"                                \
-        "add       %5, %2          \n\t"                                \
-        "decl      %0              \n\t"                                \
-        "jnz 1b                    \n\t"                                \
-        : "+r"(h), "+r" (src),  "+r" (dst)                              \
-        : "r"(offset), "r"(3*offset), "g"(stride), "m"(rnd)             \
-          NAMED_CONSTRAINTS_ADD(ff_pw_53,ff_pw_18,ff_pw_3)              \
-        : "memory"                                                      \
-    );                                                                  \
-}
-
 /** 1/4 shift bicubic interpolation */
-MSPEL_FILTER13_8B     (shift1, "0(%1,%4  )", "0(%1,%3,2)", "0(%1,%3  )", "0(%1 
    )", OP_PUT, put_)
-MSPEL_FILTER13_8B     (shift1, "0(%1,%4  )", "0(%1,%3,2)", "0(%1,%3  )", "0(%1 
    )", OP_AVG, avg_)
 MSPEL_FILTER13_VER_16B(shift1, "0(%1,%4  )", "0(%1,%3,2)", "0(%1,%3  )", "0(%1 
    )")
 MSPEL_FILTER13_HOR_16B(shift1, "2*3(%1)", "2*2(%1)", "2*1(%1)", "2*0(%1)", 
OP_PUT, put_)
 MSPEL_FILTER13_HOR_16B(shift1, "2*3(%1)", "2*2(%1)", "2*1(%1)", "2*0(%1)", 
OP_AVG, avg_)
 
 /** 3/4 shift bicubic interpolation */
-MSPEL_FILTER13_8B     (shift3, "0(%1     )", "0(%1,%3  )", "0(%1,%3,2)", 
"0(%1,%4  )", OP_PUT, put_)
-MSPEL_FILTER13_8B     (shift3, "0(%1     )", "0(%1,%3  )", "0(%1,%3,2)", 
"0(%1,%4  )", OP_AVG, avg_)
 MSPEL_FILTER13_VER_16B(shift3, "0(%1     )", "0(%1,%3  )", "0(%1,%3,2)", 
"0(%1,%4  )")
 MSPEL_FILTER13_HOR_16B(shift3, "2*0(%1)", "2*1(%1)", "2*2(%1)", "2*3(%1)", 
OP_PUT, put_)
 MSPEL_FILTER13_HOR_16B(shift3, "2*0(%1)", "2*1(%1)", "2*2(%1)", "2*3(%1)", 
OP_AVG, avg_)
 
 typedef void (*vc1_mspel_mc_filter_ver_16bits)(int16_t *dst, const uint8_t 
*src, x86_reg src_stride, int rnd, int64_t shift);
 typedef void (*vc1_mspel_mc_filter_hor_16bits)(uint8_t *dst, x86_reg 
dst_stride, const int16_t *src, int rnd);
-typedef void (*vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, 
x86_reg stride, int rnd, x86_reg offset);
 
 /**
  * Interpolate fractional pel values by applying proper vertical then
@@ -343,15 +244,12 @@ static void OP ## vc1_mspel_mc(uint8_t *dst, const 
uint8_t *src, ptrdiff_t strid
          { NULL, vc1_put_ver_16b_shift1_mmx, ff_vc1_put_ver_16b_shift2_mmx, 
vc1_put_ver_16b_shift3_mmx };\
     static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\
          { NULL, OP ## vc1_hor_16b_shift1_mmx, ff_vc1_ ## OP ## 
hor_16b_shift2_ ## INSTR, OP ## vc1_hor_16b_shift3_mmx };\
-    static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =\
-         { NULL, OP ## vc1_shift1_mmx, OP ## vc1_shift2_mmx, OP ## 
vc1_shift3_mmx };\
 \
     __asm__ volatile(\
         "pxor %%mm0, %%mm0         \n\t"\
         ::: "memory"\
     );\
 \
-        if (hmode) { /* Horizontal filter to apply, output to tmp */\
             static const int shift_value[] = { 0, 5, 1, 5 };\
             int              shift = 
(shift_value[hmode]+shift_value[vmode])>>1;\
             int              r;\
@@ -361,12 +259,6 @@ static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t 
*src, ptrdiff_t strid
             vc1_put_shift_ver_16bits[vmode](tmp, src-1, stride, r, shift);\
 \
             vc1_put_shift_hor_16bits[hmode](dst, stride, tmp+1, 64-rnd);\
-            return;\
-        }\
-        else { /* No horizontal filter, output 8 lines to dst */\
-            vc1_put_shift_8bits[vmode](dst, src, stride, 1-rnd, stride);\
-            return;\
-        }\
 } \
 static void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src, \
                                   int stride, int hmode, int vmode, int rnd)\
@@ -412,10 +304,6 @@ static void avg_vc1_mspel_mc ## a ## b ## 
_16_mmxext(uint8_t *dst,      \
      avg_vc1_mspel_mc_16(dst, src, stride, a, b, rnd);                  \
 }
 
-DECLARE_FUNCTION(0, 1)
-DECLARE_FUNCTION(0, 2)
-DECLARE_FUNCTION(0, 3)
-
 DECLARE_FUNCTION(1, 1)
 DECLARE_FUNCTION(1, 2)
 DECLARE_FUNCTION(1, 3)
@@ -434,10 +322,6 @@ DECLARE_FUNCTION(3, 3)
 
 av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
 {
-    FN_ASSIGN(put_, 0, 1, _mmx);
-    FN_ASSIGN(put_, 0, 2, _mmx);
-    FN_ASSIGN(put_, 0, 3, _mmx);
-
     FN_ASSIGN(put_, 1, 1, _mmx);
     FN_ASSIGN(put_, 1, 2, _mmx);
     FN_ASSIGN(put_, 1, 3, _mmx);
@@ -453,10 +337,6 @@ av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
 
 av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
 {
-    FN_ASSIGN(avg_, 0, 1, _mmxext);
-    FN_ASSIGN(avg_, 0, 2, _mmxext);
-    FN_ASSIGN(avg_, 0, 3, _mmxext);
-
     FN_ASSIGN(avg_, 1, 1, _mmxext);
     FN_ASSIGN(avg_, 1, 2, _mmxext);
     FN_ASSIGN(avg_, 1, 3, _mmxext);

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 11/11: avcodec/x86/vc1dsp_mmx: Remove purely vertical MMX mc functions

Reply via email to