On 2016-06-11 23:55:46 +0300, Martin Storsjö wrote: > On Sat, 11 Jun 2016, James Almer wrote: > > >On 6/11/2016 5:32 PM, Martin Storsjö wrote: > >>--- > >> libavcodec/x86/h264_idct.asm | 5 +++++ > >> libavcodec/x86/h264_idct_10bit.asm | 4 ++++ > >> 2 files changed, 9 insertions(+) > >> > >>diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm > >>index 313791a..9abed3c 100644 > >>--- a/libavcodec/x86/h264_idct.asm > >>+++ b/libavcodec/x86/h264_idct.asm > >>@@ -82,6 +82,7 @@ SECTION .text > >> INIT_MMX mmx > >> ; void ff_h264_idct_add_8_mmx(uint8_t *dst, int16_t *block, int stride) > >> cglobal h264_idct_add_8, 3, 3, 0 > >>+ movsxd r2, r2d > >> IDCT4_ADD r0, r1, r2 > >> RET > >> > >>@@ -204,6 +205,7 @@ cglobal h264_idct_add_8, 3, 3, 0 > >> INIT_MMX mmx > >> ; void ff_h264_idct8_add_8_mmx(uint8_t *dst, int16_t *block, int stride) > >> cglobal h264_idct8_add_8, 3, 4, 0 > >>+ movsxd r2, r2d > >> %assign pad 128+4-(stack_offset&7) > >> SUB rsp, pad > >> > >>@@ -272,6 +274,7 @@ cglobal h264_idct8_add_8, 3, 4, 0 > >> INIT_XMM sse2 > >> ; void ff_h264_idct8_add_8_sse2(uint8_t *dst, int16_t *block, int stride) > >> cglobal h264_idct8_add_8, 3, 4, 10 > >>+ movsxd r2, r2d > >> IDCT8_ADD_SSE r0, r1, r2, r3 > >> RET > >> > >>@@ -310,6 +313,7 @@ INIT_MMX mmxext > >> ; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int > >> stride) > >> %if ARCH_X86_64 > >> cglobal h264_idct_dc_add_8, 3, 4, 0 > >>+ movsxd r2, r2d > >> movsx r3, word [r1] > >> mov dword [r1], 0 > >> DC_ADD_MMXEXT_INIT r3, r2 > >>@@ -318,6 +322,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0 > >> > >> ; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int > >> stride) > >> cglobal h264_idct8_dc_add_8, 3, 4, 0 > >>+ movsxd r2, r2d > >> movsx r3, word [r1] > >> mov dword [r1], 0 > >> DC_ADD_MMXEXT_INIT r3, r2 > >>diff --git a/libavcodec/x86/h264_idct_10bit.asm > >>b/libavcodec/x86/h264_idct_10bit.asm > >>index b7d5105..a5bfb34 100644 > >>--- a/libavcodec/x86/h264_idct_10bit.asm > >>+++ b/libavcodec/x86/h264_idct_10bit.asm > >>@@ -77,6 +77,7 @@ SECTION .text > >> > >> %macro IDCT_ADD_10 0 > >> cglobal h264_idct_add_10, 3,3 > >>+ movsxd r2, r2d > >> IDCT4_ADD_10 r0, r1, r2 > >> RET > >> %endmacro > >>@@ -190,6 +191,7 @@ IDCT_ADD16_10 > >> > >> INIT_MMX mmxext > >> cglobal h264_idct_dc_add_10,3,3 > >>+ movsxd r2, r2d > >> movd m0, [r1] > >> mov dword [r1], 0 > >> paddd m0, [pd_32] > >>@@ -205,6 +207,7 @@ cglobal h264_idct_dc_add_10,3,3 > >> ;----------------------------------------------------------------------------- > >> %macro IDCT8_DC_ADD 0 > >> cglobal h264_idct8_dc_add_10,3,4,7 > >>+ movsxd r2, r2d > >> movd m0, [r1] > >> mov dword[r1], 0 > >> paddd m0, [pd_32] > >>@@ -438,6 +441,7 @@ IDCT_ADD8 > >> > >> %macro IDCT8_ADD 0 > >> cglobal h264_idct8_add_10, 3,4,16 > >>+ movsxd r2, r2d > >> %if UNIX64 == 0 > >> %assign pad 16-gprsize-(stack_offset&15) > >> sub rsp, pad > >> > > > >Use the movsxdifnidn macro, to avoid emitting a movsxd on x86_32. > > Thanks, I can change that. > > Some of the codepaths are within an %if ARCH_X86_64 (like > ff_h264_idct_dc_add_8_mmxext); is it still preferred to use that macro form > there, or an explicit movsxd?
I guess movsxdifnidn is a little easier to spot when the stride is changed to ptrdiff_t. patch ok s/movsxd/movsxdifnidn/ in the added lines Janne _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
