> From: Mathew Hendry [mailto:[EMAIL PROTECTED]]
> 
> > From: Takehiro Tominaga [mailto:[EMAIL PROTECTED]]
> >
> > before check in new quantize routine, see this hint and make 
> > faster one :p
> > 
> > table making.
> >     adj43[0] = 0.0;
> >     for (i = 1; i < PRECALC_SIZE; i++)
> >     adj43[i] = i - 0.5 - pow(0.5 * (pow((double)(i - 1), 4.0/3.0)
> >                                     + pow((double)i, 
> > 4.0/3.0)), 0.75);
> 
> Better make that adj43asm[...], or it will break the non-asm versions.

Works quite nicely. Try this quantize_xrpow with a modified adj43asm

/*
Timings for the MSVC asm version, before and after, testing "fools.wav"

        Func          Func+Child           Hit
        Time   %         Time      %      Count  Function
---------------------------------------------------------
     860.617   6.2      860.617   6.2    84439 _quantize_xrpow
     789.424   5.7      789.424   5.7    84439 _quantize_xrpow
*/

void quantize_xrpow(FLOAT8 xr[576], int ix[576], gr_info *cod_info) {
  /* quantize on xr^(3/4) instead of xr */
  const FLOAT8 quantizerStepSize = cod_info->quantizerStepSize;
  const FLOAT8 istep = pow(2.0, quantizerStepSize * -0.1875);
  
#ifndef _MSC_VER
  {
      FLOAT8 x;
      int j, rx;
      for (j = 576 / 4; j > 0; --j) {
          x = *xr++ * istep;
          XRPOW_FTOI(x, rx);
          XRPOW_FTOI(x + QUANTFAC(rx), *ix++);
          x = *xr++ * istep;
          XRPOW_FTOI(x, rx);
          XRPOW_FTOI(x + QUANTFAC(rx), *ix++);
          x = *xr++ * istep;
          XRPOW_FTOI(x, rx);
          XRPOW_FTOI(x + QUANTFAC(rx), *ix++);
          x = *xr++ * istep;
          XRPOW_FTOI(x, rx);
          XRPOW_FTOI(x + QUANTFAC(rx), *ix++);
      }
  }
#else
/* def _MSC_VER */
  {
      /* asm from Acy Stapp <[EMAIL PROTECTED]> */
      int rx[4];
      _asm {
          fld qword ptr [istep]
          mov esi, dword ptr [xr]
          lea edi, dword ptr [adj43asm]
          mov edx, dword ptr [ix]
          mov ecx, 576/4
      } loop1: _asm {
          fld qword ptr [esi]         // 0
          fld qword ptr [esi+8]       // 1 0
          fld qword ptr [esi+16]      // 2 1 0
          fld qword ptr [esi+24]      // 3 2 1 0

          fxch st(3)                  // 0 2 1 3
          fmul st(0), st(4)
          fxch st(2)                  // 1 2 0 3
          fmul st(0), st(4)
          fxch st(1)                  // 2 1 0 3
          fmul st(0), st(4)
          fxch st(3)                  // 3 1 0 2
          fmul st(0), st(4)

          add esi, 32
          add edx, 16

          fxch st(2)                  // 0 1 3 2
          fist dword ptr [rx]
          fxch st(1)                  // 1 0 3 2
          fist dword ptr [rx+4]
          fxch st(3)                  // 2 0 3 1
          fist dword ptr [rx+8]
          fxch st(2)                  // 3 0 2 1
          fist dword ptr [rx+12]

          dec ecx

          mov eax, dword ptr [rx]
          mov ebx, dword ptr [rx+4]
          fxch st(1)                  // 0 3 2 1
          fadd qword ptr [edi+eax*8]
          fxch st(3)                  // 1 3 2 0
          fadd qword ptr [edi+ebx*8]

          mov eax, dword ptr [rx+8]
          mov ebx, dword ptr [rx+12]
          fxch st(2)                  // 2 3 1 0
          fadd qword ptr [edi+eax*8]
          fxch st(1)                  // 3 2 1 0
          fadd qword ptr [edi+ebx*8]

          fxch st(3)                  // 0 2 1 3
          fistp dword ptr [edx-16]    // 2 1 3
          fxch st(1)                  // 1 2 3
          fistp dword ptr [edx-12]    // 2 3
          fistp dword ptr [edx-8]     // 3
          fistp dword ptr [edx-4]

          jnz loop1

          mov dword ptr [xr], esi
          mov dword ptr [ix], edx
          fstp st(0)
      }
  }
#endif
}

-- Mat.
--
MP3 ENCODER mailing list ( http://geek.rcc.se/mp3encoder/ )

Reply via email to