On 6/11/2016 5:32 PM, Martin Storsjö wrote: > --- > libavcodec/x86/h264_idct.asm | 5 +++++ > libavcodec/x86/h264_idct_10bit.asm | 4 ++++ > 2 files changed, 9 insertions(+) > > diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm > index 313791a..9abed3c 100644 > --- a/libavcodec/x86/h264_idct.asm > +++ b/libavcodec/x86/h264_idct.asm > @@ -82,6 +82,7 @@ SECTION .text > INIT_MMX mmx > ; void ff_h264_idct_add_8_mmx(uint8_t *dst, int16_t *block, int stride) > cglobal h264_idct_add_8, 3, 3, 0 > + movsxd r2, r2d > IDCT4_ADD r0, r1, r2 > RET > > @@ -204,6 +205,7 @@ cglobal h264_idct_add_8, 3, 3, 0 > INIT_MMX mmx > ; void ff_h264_idct8_add_8_mmx(uint8_t *dst, int16_t *block, int stride) > cglobal h264_idct8_add_8, 3, 4, 0 > + movsxd r2, r2d > %assign pad 128+4-(stack_offset&7) > SUB rsp, pad > > @@ -272,6 +274,7 @@ cglobal h264_idct8_add_8, 3, 4, 0 > INIT_XMM sse2 > ; void ff_h264_idct8_add_8_sse2(uint8_t *dst, int16_t *block, int stride) > cglobal h264_idct8_add_8, 3, 4, 10 > + movsxd r2, r2d > IDCT8_ADD_SSE r0, r1, r2, r3 > RET > > @@ -310,6 +313,7 @@ INIT_MMX mmxext > ; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride) > %if ARCH_X86_64 > cglobal h264_idct_dc_add_8, 3, 4, 0 > + movsxd r2, r2d > movsx r3, word [r1] > mov dword [r1], 0 > DC_ADD_MMXEXT_INIT r3, r2 > @@ -318,6 +322,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0 > > ; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int > stride) > cglobal h264_idct8_dc_add_8, 3, 4, 0 > + movsxd r2, r2d > movsx r3, word [r1] > mov dword [r1], 0 > DC_ADD_MMXEXT_INIT r3, r2 > diff --git a/libavcodec/x86/h264_idct_10bit.asm > b/libavcodec/x86/h264_idct_10bit.asm > index b7d5105..a5bfb34 100644 > --- a/libavcodec/x86/h264_idct_10bit.asm > +++ b/libavcodec/x86/h264_idct_10bit.asm > @@ -77,6 +77,7 @@ SECTION .text > > %macro IDCT_ADD_10 0 > cglobal h264_idct_add_10, 3,3 > + movsxd r2, r2d > IDCT4_ADD_10 r0, r1, r2 > RET > %endmacro > @@ -190,6 +191,7 @@ IDCT_ADD16_10 > > INIT_MMX mmxext > cglobal h264_idct_dc_add_10,3,3 > + movsxd r2, r2d > movd m0, [r1] > mov dword [r1], 0 > paddd m0, [pd_32] > @@ -205,6 +207,7 @@ cglobal h264_idct_dc_add_10,3,3 > > ;----------------------------------------------------------------------------- > %macro IDCT8_DC_ADD 0 > cglobal h264_idct8_dc_add_10,3,4,7 > + movsxd r2, r2d > movd m0, [r1] > mov dword[r1], 0 > paddd m0, [pd_32] > @@ -438,6 +441,7 @@ IDCT_ADD8 > > %macro IDCT8_ADD 0 > cglobal h264_idct8_add_10, 3,4,16 > + movsxd r2, r2d > %if UNIX64 == 0 > %assign pad 16-gprsize-(stack_offset&15) > sub rsp, pad >
Use the movsxdifnidn macro, to avoid emitting a movsxd on x86_32. _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
