> From: "Mark Taylor" <[EMAIL PROTECTED]>
>
> The code enabled by
>
> #define USE_GNUC_ASM
>
> is currently broken. Takehiro was trying some improvements,
> (Takehiro, maybe you could revert CVS back to the original until
> these are working? )

I got Takehiro's code working (the fp stack was out of order) but it seems
to be slightly slower than the original. Here's the fixed code anyway - the
main loop in quantize_xrpow

  {
      __asm__ __volatile__(
        "\n\nloop1:\n\t"    // 0 1 2 3 4

        "fld" F8type " 0*" F8size "(%1)\n\t"  // 0 i
        "fmul %%st(1)\n\t"
        "fld" F8type " 1*" F8size "(%1)\n\t"  // 1 0 i
        "fmul %%st(2)\n\t"
        "fld" F8type " 2*" F8size "(%1)\n\t"  // 2 1 0 i
        "fmul %%st(3)\n\t"
        "fld" F8type " 3*" F8size "(%1)\n\t"  // 3 2 1 0 i
        "fmul %%st(4)\n\t"

        "fxch %%st(3)\n\t"    // 0 2 1 3 i
        "fistl (%3)\n\t"
        "fxch %%st(2)\n\t"    // 1 2 0 3 i
        "fistl 4(%3)\n\t"
        "fxch %%st(1)\n\t"    // 2 1 0 3 i
        "fistl 8(%3)\n\t"
        "fxch %%st(3)\n\t"    // 3 1 0 2 i
        "fistl 12(%3)\n\t"

        "addl $4*" F8size ", %1\n\t"
        "addl $16, %3\n\t"
        "dec %4\n\t"

        "movl -16(%3), %%eax\n\t"
        "movl -12(%3), %%ebx\n\t"
        "fxch %%st(2)\n\t"    // 0 1 3 2 i
        "fadd" F8type " (%2,%%eax," F8size ")\n\t"
        "fxch %%st(1)\n\t"    // 1 0 3 2 i
        "fadd" F8type " (%2,%%ebx," F8size ")\n\t"

        "movl -8(%3), %%eax\n\t"
        "movl -4(%3), %%ebx\n\t"
        "fxch %%st(3)\n\t"    // 2 0 3 1 i
        "fadd" F8type " (%2,%%eax," F8size ")\n\t"
        "fxch %%st(2)\n\t"    // 3 0 2 1 i
        "fadd" F8type " (%2,%%ebx," F8size ")\n\t"

        "fxch %%st(1)\n\t"    // 0 3 2 1 i
        "fistpl -16(%3)\n\t"    // 3 2 1 i
        "fxch %%st(2)\n\t"    // 1 2 3 i
        "fistpl -12(%3)\n\t"    // 2 3 i
        "fistpl -8(%3)\n\t"    // 3 i
        "fistpl -4(%3)\n\t"    // i

        "jnz loop1\n\n"
        : /* no outputs */
        : "t" (istep), "r" (xr), "r" (adj43asm), "r" (ix), "r" (576 / 4)
        : "%eax", "%ebx", "memory", "cc"
      );
  }

-- Mat.



--
MP3 ENCODER mailing list ( http://geek.rcc.se/mp3encoder/ )

Reply via email to