On 6/11/2016 5:32 PM, Martin Storsjö wrote:
> ---
>  libavcodec/x86/h264_idct.asm       | 5 +++++
>  libavcodec/x86/h264_idct_10bit.asm | 4 ++++
>  2 files changed, 9 insertions(+)
> 
> diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
> index 313791a..9abed3c 100644
> --- a/libavcodec/x86/h264_idct.asm
> +++ b/libavcodec/x86/h264_idct.asm
> @@ -82,6 +82,7 @@ SECTION .text
>  INIT_MMX mmx
>  ; void ff_h264_idct_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
>  cglobal h264_idct_add_8, 3, 3, 0
> +    movsxd       r2, r2d
>      IDCT4_ADD    r0, r1, r2
>      RET
>  
> @@ -204,6 +205,7 @@ cglobal h264_idct_add_8, 3, 3, 0
>  INIT_MMX mmx
>  ; void ff_h264_idct8_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
>  cglobal h264_idct8_add_8, 3, 4, 0
> +    movsxd      r2, r2d
>      %assign pad 128+4-(stack_offset&7)
>      SUB         rsp, pad
>  
> @@ -272,6 +274,7 @@ cglobal h264_idct8_add_8, 3, 4, 0
>  INIT_XMM sse2
>  ; void ff_h264_idct8_add_8_sse2(uint8_t *dst, int16_t *block, int stride)
>  cglobal h264_idct8_add_8, 3, 4, 10
> +    movsxd    r2, r2d
>      IDCT8_ADD_SSE r0, r1, r2, r3
>      RET
>  
> @@ -310,6 +313,7 @@ INIT_MMX mmxext
>  ; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
>  %if ARCH_X86_64
>  cglobal h264_idct_dc_add_8, 3, 4, 0
> +    movsxd       r2, r2d
>      movsx        r3, word [r1]
>      mov  dword [r1], 0
>      DC_ADD_MMXEXT_INIT r3, r2
> @@ -318,6 +322,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0
>  
>  ; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int 
> stride)
>  cglobal h264_idct8_dc_add_8, 3, 4, 0
> +    movsxd       r2, r2d
>      movsx        r3, word [r1]
>      mov  dword [r1], 0
>      DC_ADD_MMXEXT_INIT r3, r2
> diff --git a/libavcodec/x86/h264_idct_10bit.asm 
> b/libavcodec/x86/h264_idct_10bit.asm
> index b7d5105..a5bfb34 100644
> --- a/libavcodec/x86/h264_idct_10bit.asm
> +++ b/libavcodec/x86/h264_idct_10bit.asm
> @@ -77,6 +77,7 @@ SECTION .text
>  
>  %macro IDCT_ADD_10 0
>  cglobal h264_idct_add_10, 3,3
> +    movsxd       r2, r2d
>      IDCT4_ADD_10 r0, r1, r2
>      RET
>  %endmacro
> @@ -190,6 +191,7 @@ IDCT_ADD16_10
>  
>  INIT_MMX mmxext
>  cglobal h264_idct_dc_add_10,3,3
> +    movsxd    r2, r2d
>      movd      m0, [r1]
>      mov dword [r1], 0
>      paddd     m0, [pd_32]
> @@ -205,6 +207,7 @@ cglobal h264_idct_dc_add_10,3,3
>  
> ;-----------------------------------------------------------------------------
>  %macro IDCT8_DC_ADD 0
>  cglobal h264_idct8_dc_add_10,3,4,7
> +    movsxd    r2, r2d
>      movd      m0, [r1]
>      mov dword[r1], 0
>      paddd     m0, [pd_32]
> @@ -438,6 +441,7 @@ IDCT_ADD8
>  
>  %macro IDCT8_ADD 0
>  cglobal h264_idct8_add_10, 3,4,16
> +    movsxd    r2, r2d
>  %if UNIX64 == 0
>      %assign pad 16-gprsize-(stack_offset&15)
>      sub  rsp, pad
> 

Use the movsxdifnidn macro, to avoid emitting a movsxd on x86_32.

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to