On 2016-06-11 23:55:46 +0300, Martin Storsjö wrote:
> On Sat, 11 Jun 2016, James Almer wrote:
> 
> >On 6/11/2016 5:32 PM, Martin Storsjö wrote:
> >>---
> >> libavcodec/x86/h264_idct.asm       | 5 +++++
> >> libavcodec/x86/h264_idct_10bit.asm | 4 ++++
> >> 2 files changed, 9 insertions(+)
> >>
> >>diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
> >>index 313791a..9abed3c 100644
> >>--- a/libavcodec/x86/h264_idct.asm
> >>+++ b/libavcodec/x86/h264_idct.asm
> >>@@ -82,6 +82,7 @@ SECTION .text
> >> INIT_MMX mmx
> >> ; void ff_h264_idct_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
> >> cglobal h264_idct_add_8, 3, 3, 0
> >>+    movsxd       r2, r2d
> >>     IDCT4_ADD    r0, r1, r2
> >>     RET
> >>
> >>@@ -204,6 +205,7 @@ cglobal h264_idct_add_8, 3, 3, 0
> >> INIT_MMX mmx
> >> ; void ff_h264_idct8_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
> >> cglobal h264_idct8_add_8, 3, 4, 0
> >>+    movsxd      r2, r2d
> >>     %assign pad 128+4-(stack_offset&7)
> >>     SUB         rsp, pad
> >>
> >>@@ -272,6 +274,7 @@ cglobal h264_idct8_add_8, 3, 4, 0
> >> INIT_XMM sse2
> >> ; void ff_h264_idct8_add_8_sse2(uint8_t *dst, int16_t *block, int stride)
> >> cglobal h264_idct8_add_8, 3, 4, 10
> >>+    movsxd    r2, r2d
> >>     IDCT8_ADD_SSE r0, r1, r2, r3
> >>     RET
> >>
> >>@@ -310,6 +313,7 @@ INIT_MMX mmxext
> >> ; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int 
> >> stride)
> >> %if ARCH_X86_64
> >> cglobal h264_idct_dc_add_8, 3, 4, 0
> >>+    movsxd       r2, r2d
> >>     movsx        r3, word [r1]
> >>     mov  dword [r1], 0
> >>     DC_ADD_MMXEXT_INIT r3, r2
> >>@@ -318,6 +322,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0
> >>
> >> ; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int 
> >> stride)
> >> cglobal h264_idct8_dc_add_8, 3, 4, 0
> >>+    movsxd       r2, r2d
> >>     movsx        r3, word [r1]
> >>     mov  dword [r1], 0
> >>     DC_ADD_MMXEXT_INIT r3, r2
> >>diff --git a/libavcodec/x86/h264_idct_10bit.asm 
> >>b/libavcodec/x86/h264_idct_10bit.asm
> >>index b7d5105..a5bfb34 100644
> >>--- a/libavcodec/x86/h264_idct_10bit.asm
> >>+++ b/libavcodec/x86/h264_idct_10bit.asm
> >>@@ -77,6 +77,7 @@ SECTION .text
> >>
> >> %macro IDCT_ADD_10 0
> >> cglobal h264_idct_add_10, 3,3
> >>+    movsxd       r2, r2d
> >>     IDCT4_ADD_10 r0, r1, r2
> >>     RET
> >> %endmacro
> >>@@ -190,6 +191,7 @@ IDCT_ADD16_10
> >>
> >> INIT_MMX mmxext
> >> cglobal h264_idct_dc_add_10,3,3
> >>+    movsxd    r2, r2d
> >>     movd      m0, [r1]
> >>     mov dword [r1], 0
> >>     paddd     m0, [pd_32]
> >>@@ -205,6 +207,7 @@ cglobal h264_idct_dc_add_10,3,3
> >> ;-----------------------------------------------------------------------------
> >> %macro IDCT8_DC_ADD 0
> >> cglobal h264_idct8_dc_add_10,3,4,7
> >>+    movsxd    r2, r2d
> >>     movd      m0, [r1]
> >>     mov dword[r1], 0
> >>     paddd     m0, [pd_32]
> >>@@ -438,6 +441,7 @@ IDCT_ADD8
> >>
> >> %macro IDCT8_ADD 0
> >> cglobal h264_idct8_add_10, 3,4,16
> >>+    movsxd    r2, r2d
> >> %if UNIX64 == 0
> >>     %assign pad 16-gprsize-(stack_offset&15)
> >>     sub  rsp, pad
> >>
> >
> >Use the movsxdifnidn macro, to avoid emitting a movsxd on x86_32.
> 
> Thanks, I can change that.
> 
> Some of the codepaths are within an %if ARCH_X86_64 (like
> ff_h264_idct_dc_add_8_mmxext); is it still preferred to use that macro form
> there, or an explicit movsxd?

I guess movsxdifnidn is a little easier to spot when the stride is 
changed to ptrdiff_t. patch ok s/movsxd/movsxdifnidn/ in the added lines

Janne
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to