Thanks,
It built and apears to be working good for me. Have you submitted your
findings to the avifile folks? I would like to see this problem go away
for good.
Thanks again,
Ken
Gwenole Beauchesne wrote:
>On Fri, 19 Jul 2002, Goetz Waschk wrote:
>
>
>
>>the avifile bug still hasn't been fixed:
>>
>>
>[...]
>
>As you tell it, the avifile bug hasn't been fixed, and that doesn't look
>like a gcc bug to me.
>
>Look, their asm template requests 6 registers, including explicit
>requirement for %eax. The problem is you have 8 GPRs on those poor x86,
>%esp can't be used, %ebp don't get allocated in asm templates, and %ebx is
>used to hold a pointer to the GOT when compiling with -fPIC. i.e. you are
>left with only 5 registers for the asm template. How could gcc find the
>extra one?
>
>It compiles now with the attached patch. But (i) that still looks bogus,
>(ii) I haven't tested it. BTW, they should really learn not to use "g"
>constraints.
>
>Please test.
>
>Bye,
>Gwenole.
>
>
>------------------------------------------------------------------------
>
>--- avifile0.7-0.7.8/ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c.asmfixes
>2002-05-27 10:29:39.000000000 +0200
>+++ avifile0.7-0.7.8/ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c 2002-07-21
>09:32:51.000000000 +0200
>@@ -83,16 +83,21 @@ static int RENAME(dct_quantize)(MpegEncC
> }
>
> if(s->out_format == FMT_H263){
>-
>- asm volatile(
>+ /* PROLOGUE */
>+ asm volatile(
> "movd %%eax, %%mm3 \n\t" // last_non_zero_p1
> SPREADW(%%mm3)
> "pxor %%mm7, %%mm7 \n\t" // 0
> "pxor %%mm4, %%mm4 \n\t" // 0
>- "movq (%2), %%mm5 \n\t" // qmat[0]
>+ "movq (%1), %%mm5 \n\t" // qmat[0]
> "pxor %%mm6, %%mm6 \n\t"
>- "psubw (%3), %%mm6 \n\t" // -bias[0]
>+ "psubw (%2), %%mm6 \n\t" // -bias[0]
> "movl $-128, %%eax \n\t"
>+ : "+a" (last_non_zero_p1)
>+ : "r" (qmat), "r" (bias)
>+ );
>+ /* CORE */
>+ asm volatile(
> ".balign 16 \n\t"
> "1: \n\t"
> "pxor %%mm1, %%mm1 \n\t" // 0
>@@ -105,14 +110,19 @@ static int RENAME(dct_quantize)(MpegEncC
> "por %%mm0, %%mm4 \n\t"
> "pxor %%mm1, %%mm0 \n\t"
> "psubw %%mm1, %%mm0 \n\t" //
>out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
>- "movq %%mm0, (%5, %%eax) \n\t"
>+ "movq %%mm0, (%3, %%eax) \n\t"
> "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
>- "movq (%4, %%eax), %%mm1 \n\t"
>+ "movq (%2, %%eax), %%mm1 \n\t"
> "movq %%mm7, (%1, %%eax) \n\t" // 0
> "pandn %%mm1, %%mm0 \n\t"
> PMAXW(%%mm0, %%mm3)
> "addl $8, %%eax \n\t"
> " js 1b \n\t"
>+ : "+a" (last_non_zero_p1)
>+ : "r" (block+64), "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
>+ );
>+ /* EPILOGUE */
>+ asm volatile(
> "movq %%mm3, %%mm0 \n\t"
> "psrlq $32, %%mm3 \n\t"
> PMAXW(%%mm0, %%mm3)
>@@ -121,48 +131,46 @@ static int RENAME(dct_quantize)(MpegEncC
> PMAXW(%%mm0, %%mm3)
> "movd %%mm3, %%eax \n\t"
> "movzbl %%al, %%eax \n\t" // last_non_zero_p1
>- : "+a" (last_non_zero_p1)
>- : "r" (block+64), "r" (qmat), "r" (bias),
>- "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
>- );
>- // note the asm is split cuz gcc doesnt like that many operands ...
>- asm volatile(
>- "movd %1, %%mm1 \n\t" // max_qcoeff
>+ "movd %2, %%mm1 \n\t" // max_qcoeff
> SPREADW(%%mm1)
> "psubusw %%mm1, %%mm4 \n\t"
> "packuswb %%mm4, %%mm4 \n\t"
>- "movd %%mm4, %0 \n\t" // *overflow
>- : "=g" (*overflow)
>- : "g" (s->max_qcoeff)
>- );
>+ "movd %%mm4, %1 \n\t" // *overflow
>+ : "+a" (last_non_zero_p1), "=r" (*overflow)
>+ : "r" (s->max_qcoeff)
>+ );
> }else{ // FMT_H263
> asm volatile(
>- "movd %%eax, %%mm3 \n\t" // last_non_zero_p1
>+ "pushl %%ebp \n\t"
>+ "pushl %%ebx \n\t"
>+ "movl %0, %%ebp \n\t"
>+ "movl (%%ebp), %%ebx \n\t"
>+ "movd %%ebx, %%mm3 \n\t" // last_non_zero_p1
> SPREADW(%%mm3)
> "pxor %%mm7, %%mm7 \n\t" // 0
> "pxor %%mm4, %%mm4 \n\t" // 0
>- "movl $-128, %%eax \n\t"
>+ "movl $-128, %%ebx \n\t"
> ".balign 16 \n\t"
> "1: \n\t"
> "pxor %%mm1, %%mm1 \n\t" // 0
>- "movq (%1, %%eax), %%mm0 \n\t" // block[i]
>+ "movq (%1, %%ebx), %%mm0 \n\t" // block[i]
> "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00
> "pxor %%mm1, %%mm0 \n\t"
> "psubw %%mm1, %%mm0 \n\t" // ABS(block[i])
>- "movq (%3, %%eax), %%mm6 \n\t" // bias[0]
>+ "movq (%3, %%ebx), %%mm6 \n\t" // bias[0]
> "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0]
>- "movq (%2, %%eax), %%mm5 \n\t" // qmat[i]
>+ "movq (%2, %%ebx), %%mm5 \n\t" // qmat[i]
> "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] +
>bias[0]*qmat[0])>>16
> "por %%mm0, %%mm4 \n\t"
> "pxor %%mm1, %%mm0 \n\t"
> "psubw %%mm1, %%mm0 \n\t" //
>out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
>- "movq %%mm0, (%5, %%eax) \n\t"
>+ "movq %%mm0, (%5, %%ebx) \n\t"
> "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00
>- "movq (%4, %%eax), %%mm1 \n\t"
>- "movq %%mm7, (%1, %%eax) \n\t" // 0
>+ "movq (%4, %%ebx), %%mm1 \n\t"
>+ "movq %%mm7, (%1, %%ebx) \n\t" // 0
> "pandn %%mm1, %%mm0 \n\t"
> PMAXW(%%mm0, %%mm3)
>- "addl $8, %%eax \n\t"
>+ "addl $8, %%ebx \n\t"
> " js 1b \n\t"
> "movq %%mm3, %%mm0 \n\t"
> "psrlq $32, %%mm3 \n\t"
>@@ -170,10 +178,14 @@ static int RENAME(dct_quantize)(MpegEncC
> "movq %%mm3, %%mm0 \n\t"
> "psrlq $16, %%mm3 \n\t"
> PMAXW(%%mm0, %%mm3)
>- "movd %%mm3, %%eax \n\t"
>- "movzbl %%al, %%eax \n\t" // last_non_zero_p1
>- : "+a" (last_non_zero_p1)
>- : "r" (block+64), "r" (qmat+64), "r" (bias+64),
>+ "movd %%mm3, %%ebx \n\t"
>+ "movzbl %%bl, %%ebx \n\t" // last_non_zero_p1
>+ "movl %%ebx, (%%ebp) \n\t"
>+ "popl %%ebx \n\t"
>+ "popl %%ebp \n\t"
>+ :
>+ : "m" (last_non_zero_p1),
>+ "r" (block+64), "r" (qmat+64), "r" (bias+64),
> "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
> );
> // note the asm is split cuz gcc doesnt like that many operands ...
>@@ -183,8 +195,8 @@ static int RENAME(dct_quantize)(MpegEncC
> "psubusw %%mm1, %%mm4 \n\t"
> "packuswb %%mm4, %%mm4 \n\t"
> "movd %%mm4, %0 \n\t" // *overflow
>- : "=g" (*overflow)
>- : "g" (s->max_qcoeff)
>+ : "=r" (*overflow)
>+ : "r" (s->max_qcoeff)
> );
> }
>
>@@ -192,8 +204,7 @@ static int RENAME(dct_quantize)(MpegEncC
> // last_non_zero_p1=64;
> /* permute for IDCT */
> asm volatile(
>- "movl %0, %%eax \n\t"
>- "pushl %%ebp \n\t"
>+ "pushl %%ebp \n\t"
> "movl %%esp, " MANGLE(esp_temp) "\n\t"
> "1: \n\t"
> "movzbl (%1, %%eax), %%ebx \n\t"
>@@ -208,10 +219,10 @@ static int RENAME(dct_quantize)(MpegEncC
> " js 1b \n\t"
> "movl " MANGLE(esp_temp) ", %%esp\n\t"
> "popl %%ebp \n\t"
>- :
>- : "g" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S"
>(temp_block), "D" (block)
>- : "%eax", "%ebx", "%ecx"
>- );
>+ :
>+ : "a" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S"
>(temp_block), "D" (block)
>+ : "%ebx", "%ecx"
>+ );
> /*
> for(i=0; i<last_non_zero_p1; i++)
> {
>
>
--
Kenton A. Groombridge
[EMAIL PROTECTED]
http://home.coastalnow.net/~kgroombr/