On 6/11/2016 5:55 PM, Martin Storsjö wrote:
> On Sat, 11 Jun 2016, James Almer wrote:
> 
>> On 6/11/2016 5:32 PM, Martin Storsjö wrote:
>>> ---
>>>  libavcodec/x86/h264_idct.asm       | 5 +++++
>>>  libavcodec/x86/h264_idct_10bit.asm | 4 ++++
>>>  2 files changed, 9 insertions(+)
>>>
>>> diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
>>> index 313791a..9abed3c 100644
>>> --- a/libavcodec/x86/h264_idct.asm
>>> +++ b/libavcodec/x86/h264_idct.asm
>>> @@ -82,6 +82,7 @@ SECTION .text
>>>  INIT_MMX mmx
>>>  ; void ff_h264_idct_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
>>>  cglobal h264_idct_add_8, 3, 3, 0
>>> +    movsxd       r2, r2d
>>>      IDCT4_ADD    r0, r1, r2
>>>      RET
>>>
>>> @@ -204,6 +205,7 @@ cglobal h264_idct_add_8, 3, 3, 0
>>>  INIT_MMX mmx
>>>  ; void ff_h264_idct8_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
>>>  cglobal h264_idct8_add_8, 3, 4, 0
>>> +    movsxd      r2, r2d
>>>      %assign pad 128+4-(stack_offset&7)
>>>      SUB         rsp, pad
>>>
>>> @@ -272,6 +274,7 @@ cglobal h264_idct8_add_8, 3, 4, 0
>>>  INIT_XMM sse2
>>>  ; void ff_h264_idct8_add_8_sse2(uint8_t *dst, int16_t *block, int stride)
>>>  cglobal h264_idct8_add_8, 3, 4, 10
>>> +    movsxd    r2, r2d
>>>      IDCT8_ADD_SSE r0, r1, r2, r3
>>>      RET
>>>
>>> @@ -310,6 +313,7 @@ INIT_MMX mmxext
>>>  ; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int 
>>> stride)
>>>  %if ARCH_X86_64
>>>  cglobal h264_idct_dc_add_8, 3, 4, 0
>>> +    movsxd       r2, r2d
>>>      movsx        r3, word [r1]
>>>      mov  dword [r1], 0
>>>      DC_ADD_MMXEXT_INIT r3, r2
>>> @@ -318,6 +322,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0
>>>
>>>  ; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int 
>>> stride)
>>>  cglobal h264_idct8_dc_add_8, 3, 4, 0
>>> +    movsxd       r2, r2d
>>>      movsx        r3, word [r1]
>>>      mov  dword [r1], 0
>>>      DC_ADD_MMXEXT_INIT r3, r2
>>> diff --git a/libavcodec/x86/h264_idct_10bit.asm 
>>> b/libavcodec/x86/h264_idct_10bit.asm
>>> index b7d5105..a5bfb34 100644
>>> --- a/libavcodec/x86/h264_idct_10bit.asm
>>> +++ b/libavcodec/x86/h264_idct_10bit.asm
>>> @@ -77,6 +77,7 @@ SECTION .text
>>>
>>>  %macro IDCT_ADD_10 0
>>>  cglobal h264_idct_add_10, 3,3
>>> +    movsxd       r2, r2d
>>>      IDCT4_ADD_10 r0, r1, r2
>>>      RET
>>>  %endmacro
>>> @@ -190,6 +191,7 @@ IDCT_ADD16_10
>>>
>>>  INIT_MMX mmxext
>>>  cglobal h264_idct_dc_add_10,3,3
>>> +    movsxd    r2, r2d
>>>      movd      m0, [r1]
>>>      mov dword [r1], 0
>>>      paddd     m0, [pd_32]
>>> @@ -205,6 +207,7 @@ cglobal h264_idct_dc_add_10,3,3
>>>  
>>> ;-----------------------------------------------------------------------------
>>>  %macro IDCT8_DC_ADD 0
>>>  cglobal h264_idct8_dc_add_10,3,4,7
>>> +    movsxd    r2, r2d
>>>      movd      m0, [r1]
>>>      mov dword[r1], 0
>>>      paddd     m0, [pd_32]
>>> @@ -438,6 +441,7 @@ IDCT_ADD8
>>>
>>>  %macro IDCT8_ADD 0
>>>  cglobal h264_idct8_add_10, 3,4,16
>>> +    movsxd    r2, r2d
>>>  %if UNIX64 == 0
>>>      %assign pad 16-gprsize-(stack_offset&15)
>>>      sub  rsp, pad
>>>
>>
>> Use the movsxdifnidn macro, to avoid emitting a movsxd on x86_32.
> 
> Thanks, I can change that.
> 
> Some of the codepaths are within an %if ARCH_X86_64 (like 
> ff_h264_idct_dc_add_8_mmxext); is it still preferred to use that macro form 
> there, or an explicit movsxd?

I don't have a preference. movsxd would make it clear to whoever reads the
code that it's an x86_64 codepath whereas using movsxdifnidn everywhere is
more consistent in a way.

It's mostly a cosmetic choice, i guess.

> 
> // Martin
> _______________________________________________
> libav-devel mailing list
> [email protected]
> https://lists.libav.org/mailman/listinfo/libav-devel

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to