On Wed, Sep 17, 2014 at 09:21:08PM +0200, Vitor Sessak wrote: > Hi! > > I'm not sure we can count on registers been preserved across ASM > blocks. Moreover, this causes problems with utils that instrument the > code by inserting function calls between lines (like ThreadSanitizer). > > The attached patch fix one instance of this problem in swscale. > > -Vitor
> swscale.c | 82 > +++++++++++++++++++++++++++++++------------------------------- > 1 file changed, 42 insertions(+), 40 deletions(-) > 7fe9f7a0c1a7c1ed843b891f5f810fbd94a2e03f > 0001-swscale-x86-do-not-expect-registers-to-be-preserved-.patch > From f1c59628b2baeb9994bed8947c0a2c228610bf4f Mon Sep 17 00:00:00 2001 > From: Vitor Sessak <vses...@google.com> > Date: Wed, 17 Sep 2014 21:10:16 +0200 > Subject: [PATCH] swscale/x86: do not expect registers to be preserved across > inline ASM blocks > > --- > libswscale/x86/swscale.c | 82 > +++++++++++++++++++++++++----------------------- > 1 file changed, 42 insertions(+), 40 deletions(-) > > diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c > index c4c0e28..38157c8 100644 > --- a/libswscale/x86/swscale.c > +++ b/libswscale/x86/swscale.c > @@ -205,36 +205,19 @@ static void yuv2yuvX_sse3(const int16_t *filter, int > filterSize, > yuv2yuvX_mmxext(filter, filterSize, src, dest, dstW, dither, offset); > return; > } > - if (offset) { > - __asm__ volatile("movq (%0), %%xmm3\n\t" > - "movdqa %%xmm3, %%xmm4\n\t" > - "psrlq $24, %%xmm3\n\t" > - "psllq $40, %%xmm4\n\t" > - "por %%xmm4, %%xmm3\n\t" > - :: "r"(dither) > - ); > - } else { > - __asm__ volatile("movq (%0), %%xmm3\n\t" > - :: "r"(dither) > - ); > - } > - filterSize--; not sure i missed it but it seems this is lost in teh new code > - __asm__ volatile( > - "pxor %%xmm0, %%xmm0\n\t" > - "punpcklbw %%xmm0, %%xmm3\n\t" > - "movd %0, %%xmm1\n\t" > - "punpcklwd %%xmm1, %%xmm1\n\t" > - "punpckldq %%xmm1, %%xmm1\n\t" > - "punpcklqdq %%xmm1, %%xmm1\n\t" > - "psllw $3, %%xmm1\n\t" > - "paddw %%xmm1, %%xmm3\n\t" > - "psraw $4, %%xmm3\n\t" > - ::"m"(filterSize) > - ); > - __asm__ volatile( > - "movdqa %%xmm3, %%xmm4\n\t" > - "movdqa %%xmm3, %%xmm7\n\t" > - "movl %3, %%ecx\n\t" > +#define MAIN_FUNCTION \ > + "pxor %%xmm0, %%xmm0 \n\t" \ > + "punpcklbw %%xmm0, %%xmm3 \n\t" \ > + "movd %4, %%xmm1 \n\t" \ > + "punpcklwd %%xmm1, %%xmm1 \n\t" \ > + "punpckldq %%xmm1, %%xmm1 \n\t" \ > + "punpcklqdq %%xmm1, %%xmm1 \n\t" \ > + "psllw $3, %%xmm1 \n\t" \ > + "paddw %%xmm1, %%xmm3 \n\t" \ > + "psraw $4, %%xmm3 \n\t" \ > + "movdqa %%xmm3, %%xmm4 \n\t" \ > + "movdqa %%xmm3, %%xmm7 \n\t" \ > + "movl %3, %%ecx \n\t" \ > "mov %0, %%"REG_d" \n\t"\ > "mov (%%"REG_d"), %%"REG_S" \n\t"\ > ".p2align 4 \n\t" /* FIXME > Unroll? */\ > @@ -252,20 +235,39 @@ static void yuv2yuvX_sse3(const int16_t *filter, int > filterSize, > " jnz 1b \n\t"\ > "psraw $3, %%xmm3 \n\t"\ > "psraw $3, %%xmm4 \n\t"\ > - "packuswb %%xmm4, %%xmm3 \n\t" > - "movntdq %%xmm3, (%1, %%"REG_c")\n\t" > + "packuswb %%xmm4, %%xmm3 \n\t"\ > + "movntdq %%xmm3, (%1, %%"REG_c")\n\t"\ > "add $16, %%"REG_c" \n\t"\ > "cmp %2, %%"REG_c" \n\t"\ > - "movdqa %%xmm7, %%xmm3\n\t" > - "movdqa %%xmm7, %%xmm4\n\t" > + "movdqa %%xmm7, %%xmm3 \n\t" \ > + "movdqa %%xmm7, %%xmm4 \n\t" \ > "mov %0, %%"REG_d" \n\t"\ > "mov (%%"REG_d"), %%"REG_S" \n\t"\ > - "jb 1b \n\t"\ > - :: "g" (filter), > - "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset) > - : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , > "%xmm5" , "%xmm7" ,) > - "%"REG_d, "%"REG_S, "%"REG_c > - ); > + "jb 1b \n\t" > + > + if (offset) { > + __asm__ volatile( > + "movq %5, %%xmm3 \n\t" > + "movdqa %%xmm3, %%xmm4 \n\t" > + "psrlq $24, %%xmm3 \n\t" > + "psllq $40, %%xmm4 \n\t" > + "por %%xmm4, %%xmm3 \n\t" > + MAIN_FUNCTION > + :: "g" (filter), > + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset), > + "m"(filterSize), "m"(((uint64_t *) dither)[0]) > + : "%"REG_d, "%"REG_S, "%"REG_c > + ); you lost the XMM_CLOBBERS() > + } else { > + __asm__ volatile( > + "movq %5, %%xmm3 \n\t" > + MAIN_FUNCTION > + :: "g" (filter), > + "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset), > + "m"(filterSize), "m"(((uint64_t *) dither)[0]) > + : "%"REG_d, "%"REG_S, "%"REG_c > + ); tabs [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB Everything should be made as simple as possible, but not simpler. -- Albert Einstein
signature.asc
Description: Digital signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel