> From: Mathew Hendry [mailto:[EMAIL PROTECTED]]
>
> > From: Takehiro Tominaga [mailto:[EMAIL PROTECTED]]
> >
> > before check in new quantize routine, see this hint and make
> > faster one :p
> >
> > table making.
> > adj43[0] = 0.0;
> > for (i = 1; i < PRECALC_SIZE; i++)
> > adj43[i] = i - 0.5 - pow(0.5 * (pow((double)(i - 1), 4.0/3.0)
> > + pow((double)i,
> > 4.0/3.0)), 0.75);
>
> Better make that adj43asm[...], or it will break the non-asm versions.
Works quite nicely. Try this quantize_xrpow with a modified adj43asm
/*
Timings for the MSVC asm version, before and after, testing "fools.wav"
Func Func+Child Hit
Time % Time % Count Function
---------------------------------------------------------
860.617 6.2 860.617 6.2 84439 _quantize_xrpow
789.424 5.7 789.424 5.7 84439 _quantize_xrpow
*/
void quantize_xrpow(FLOAT8 xr[576], int ix[576], gr_info *cod_info) {
/* quantize on xr^(3/4) instead of xr */
const FLOAT8 quantizerStepSize = cod_info->quantizerStepSize;
const FLOAT8 istep = pow(2.0, quantizerStepSize * -0.1875);
#ifndef _MSC_VER
{
FLOAT8 x;
int j, rx;
for (j = 576 / 4; j > 0; --j) {
x = *xr++ * istep;
XRPOW_FTOI(x, rx);
XRPOW_FTOI(x + QUANTFAC(rx), *ix++);
x = *xr++ * istep;
XRPOW_FTOI(x, rx);
XRPOW_FTOI(x + QUANTFAC(rx), *ix++);
x = *xr++ * istep;
XRPOW_FTOI(x, rx);
XRPOW_FTOI(x + QUANTFAC(rx), *ix++);
x = *xr++ * istep;
XRPOW_FTOI(x, rx);
XRPOW_FTOI(x + QUANTFAC(rx), *ix++);
}
}
#else
/* def _MSC_VER */
{
/* asm from Acy Stapp <[EMAIL PROTECTED]> */
int rx[4];
_asm {
fld qword ptr [istep]
mov esi, dword ptr [xr]
lea edi, dword ptr [adj43asm]
mov edx, dword ptr [ix]
mov ecx, 576/4
} loop1: _asm {
fld qword ptr [esi] // 0
fld qword ptr [esi+8] // 1 0
fld qword ptr [esi+16] // 2 1 0
fld qword ptr [esi+24] // 3 2 1 0
fxch st(3) // 0 2 1 3
fmul st(0), st(4)
fxch st(2) // 1 2 0 3
fmul st(0), st(4)
fxch st(1) // 2 1 0 3
fmul st(0), st(4)
fxch st(3) // 3 1 0 2
fmul st(0), st(4)
add esi, 32
add edx, 16
fxch st(2) // 0 1 3 2
fist dword ptr [rx]
fxch st(1) // 1 0 3 2
fist dword ptr [rx+4]
fxch st(3) // 2 0 3 1
fist dword ptr [rx+8]
fxch st(2) // 3 0 2 1
fist dword ptr [rx+12]
dec ecx
mov eax, dword ptr [rx]
mov ebx, dword ptr [rx+4]
fxch st(1) // 0 3 2 1
fadd qword ptr [edi+eax*8]
fxch st(3) // 1 3 2 0
fadd qword ptr [edi+ebx*8]
mov eax, dword ptr [rx+8]
mov ebx, dword ptr [rx+12]
fxch st(2) // 2 3 1 0
fadd qword ptr [edi+eax*8]
fxch st(1) // 3 2 1 0
fadd qword ptr [edi+ebx*8]
fxch st(3) // 0 2 1 3
fistp dword ptr [edx-16] // 2 1 3
fxch st(1) // 1 2 3
fistp dword ptr [edx-12] // 2 3
fistp dword ptr [edx-8] // 3
fistp dword ptr [edx-4]
jnz loop1
mov dword ptr [xr], esi
mov dword ptr [ix], edx
fstp st(0)
}
}
#endif
}
-- Mat.
--
MP3 ENCODER mailing list ( http://geek.rcc.se/mp3encoder/ )