On Wed, Sep 17, 2014 at 10:55:52PM +0200, Vitor Sessak wrote: > On Wed, Sep 17, 2014 at 10:31 PM, Michael Niedermayer <michae...@gmx.at> > wrote: > > On Wed, Sep 17, 2014 at 09:21:08PM +0200, Vitor Sessak wrote: > >> Hi! > >> > >> I'm not sure we can count on registers been preserved across ASM > >> blocks. Moreover, this causes problems with utils that instrument the > >> code by inserting function calls between lines (like ThreadSanitizer). > >> > >> The attached patch fix one instance of this problem in swscale. > >> > >> -Vitor > > > >> swscale.c | 82 > >> +++++++++++++++++++++++++++++++------------------------------- > >> 1 file changed, 42 insertions(+), 40 deletions(-) > >> 7fe9f7a0c1a7c1ed843b891f5f810fbd94a2e03f > >> 0001-swscale-x86-do-not-expect-registers-to-be-preserved-.patch > >> From f1c59628b2baeb9994bed8947c0a2c228610bf4f Mon Sep 17 00:00:00 2001 > >> From: Vitor Sessak <vses...@google.com> > >> Date: Wed, 17 Sep 2014 21:10:16 +0200 > >> Subject: [PATCH] swscale/x86: do not expect registers to be preserved > >> across > >> inline ASM blocks > >> > >> --- > >> libswscale/x86/swscale.c | 82 > >> +++++++++++++++++++++++++----------------------- > >> 1 file changed, 42 insertions(+), 40 deletions(-) > >> > >> diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c > >> index c4c0e28..38157c8 100644 > >> --- a/libswscale/x86/swscale.c > >> +++ b/libswscale/x86/swscale.c > >> @@ -205,36 +205,19 @@ static void yuv2yuvX_sse3(const int16_t *filter, int > >> filterSize, > >> yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, > >> offset); > >> return; > >> } > >> - if (offset) { > >> - __asm__ volatile("movq (%0), %%xmm3\n\t" > >> - "movdqa %%xmm3, %%xmm4\n\t" > >> - "psrlq $24, %%xmm3\n\t" > >> - "psllq $40, %%xmm4\n\t" > >> - "por %%xmm4, %%xmm3\n\t" > >> - :: "r"(dither) > >> - ); > >> - } else { > >> - __asm__ volatile("movq (%0), %%xmm3\n\t" > >> - :: "r"(dither) > >> - ); > >> - } > > > >> - filterSize--; > > > > not sure i missed it but it seems this is lost in teh new code > > Good catch. Fixed! > > >> - __asm__ volatile( > >> - "pxor %%xmm0, %%xmm0\n\t" > >> - "punpcklbw %%xmm0, %%xmm3\n\t" > >> - "movd %0, %%xmm1\n\t" > >> - "punpcklwd %%xmm1, %%xmm1\n\t" > >> - "punpckldq %%xmm1, %%xmm1\n\t" > >> - "punpcklqdq %%xmm1, %%xmm1\n\t" > >> - "psllw $3, %%xmm1\n\t" > >> - "paddw %%xmm1, %%xmm3\n\t" > >> - "psraw $4, %%xmm3\n\t" > >> - ::"m"(filterSize) > >> - ); > >> - __asm__ volatile( > >> - "movdqa %%xmm3, %%xmm4\n\t" > >> - "movdqa %%xmm3, %%xmm7\n\t" > >> - "movl %3, %%ecx\n\t" > >> +#define MAIN_FUNCTION \ > >> + "pxor %%xmm0, %%xmm0 \n\t" \ > >> + "punpcklbw %%xmm0, %%xmm3 \n\t" \ > >> + "movd %4, %%xmm1 \n\t" \ > >> + "punpcklwd %%xmm1, %%xmm1 \n\t" \ > >> + "punpckldq %%xmm1, %%xmm1 \n\t" \ > >> + "punpcklqdq %%xmm1, %%xmm1 \n\t" \ > >> + "psllw $3, %%xmm1 \n\t" \ > >> + "paddw %%xmm1, %%xmm3 \n\t" \ > >> + "psraw $4, %%xmm3 \n\t" \ > >> + "movdqa %%xmm3, %%xmm4 \n\t" \ > >> + "movdqa %%xmm3, %%xmm7 \n\t" \ > >> + "movl %3, %%ecx \n\t" \ > >> "mov %0, %%"REG_d" \n\t"\ > >> "mov (%%"REG_d"), %%"REG_S" \n\t"\ > >> ".p2align 4 \n\t" /* > >> FIXME Unroll? */\ > >> @@ -252,20 +235,39 @@ static void yuv2yuvX_sse3(const int16_t *filter, int > >> filterSize, > >> " jnz 1b \n\t"\ > >> "psraw $3, %%xmm3 \n\t"\ > >> "psraw $3, %%xmm4 \n\t"\ > >> - "packuswb %%xmm4, %%xmm3 \n\t" > >> - "movntdq %%xmm3, (%1, %%"REG_c")\n\t" > >> + "packuswb %%xmm4, %%xmm3 \n\t"\ > >> + "movntdq %%xmm3, (%1, %%"REG_c")\n\t"\ > >> "add $16, %%"REG_c" \n\t"\ > >> "cmp %2, %%"REG_c" \n\t"\ > >> - "movdqa %%xmm7, %%xmm3\n\t" > >> - "movdqa %%xmm7, %%xmm4\n\t" > >> + "movdqa %%xmm7, %%xmm3 \n\t" \ > >> + "movdqa %%xmm7, %%xmm4 \n\t" \ > >> "mov %0, %%"REG_d" \n\t"\ > >> "mov (%%"REG_d"), %%"REG_S" \n\t"\ > >> - "jb 1b \n\t"\ > >> - :: "g" (filter), > >> - "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) > >> - : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , > >> "%xmm5" , "%xmm7" ,) > >> - "%"REG_d, "%"REG_S, "%"REG_c > >> - ); > >> + "jb 1b \n\t" > >> + > >> + if (offset) { > >> + __asm__ volatile( > >> + "movq %5, %%xmm3 \n\t" > >> + "movdqa %%xmm3, %%xmm4 \n\t" > >> + "psrlq $24, %%xmm3 \n\t" > >> + "psllq $40, %%xmm4 \n\t" > >> + "por %%xmm4, %%xmm3 \n\t" > >> + MAIN_FUNCTION > >> + :: "g" (filter), > >> + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset), > >> + "m"(filterSize), "m"(((uint64_t *) dither)[0]) > >> + : "%"REG_d, "%"REG_S, "%"REG_c > >> + ); > > > > you lost the XMM_CLOBBERS() > > True. Restored it. > > >> + } else { > >> + __asm__ volatile( > >> + "movq %5, %%xmm3 \n\t" > >> + MAIN_FUNCTION > >> + :: "g" (filter), > >> + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset), > >> + "m"(filterSize), "m"(((uint64_t *) dither)[0]) > >> + : "%"REG_d, "%"REG_S, "%"REG_c > >> + ); > > > > tabs > > Why the hell the default emacs profile is so bad :-( > > New patch attached. > > -Vitor
> swscale.c | 83 > ++++++++++++++++++++++++++++++++------------------------------ > 1 file changed, 44 insertions(+), 39 deletions(-) > 89ceea8040113ac92b4b7420c1b5876275ee9c78 > 0001-swscale-x86-do-not-expect-registers-to-be-preserved-.patch > From 19c13aa203f5d5970a125fbeb30c3e36d7ab9899 Mon Sep 17 00:00:00 2001 > From: Vitor Sessak <vses...@google.com> > Date: Wed, 17 Sep 2014 21:10:16 +0200 > Subject: [PATCH] swscale/x86: do not expect registers to be preserved across > inline ASM blocks applied, hopefully it wont break with old GCC versions, i tested a few but not many thanks [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Concerning the gods, I have no means of knowing whether they exist or not or of what sort they may be, because of the obscurity of the subject, and the brevity of human life -- Protagoras
signature.asc
Description: Digital signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel