Hi all,
did a couple of speedups... drop in replacements
window_subband (encode.c)
calc_noise1 (loop.c)
Windowsubband speedup 20% - derived from Tord's bladeenc code. added pointers
to make it even faster. (if i get really bored next week, I may think about
Zubars' combined windowsubband/filtersubband routine)
Calc_noise1 speedup 20% - the maximum size the pow43[] table needs to be is
8206. This is derived by looking at the check for max size in count_bits(). So
just take up some more space and increase the PRECALC_SIZE from 1024 to 8206
and you can then ignore the check to see if ix[] lies within the PRECALC_SIZE.
( The same could be done for calc_noise2, but i got sick of looking at
code. could even make the pow43 table global for loop.c)
Future:
- mdct.c based on fft. (soonish)
- count_bits speedup. This requires a little bit of fiddling (the
first time i tried it I forgot about short blocks)
. find the maximum xrpow value in each channel and its index
in the xrpow[] array. [the "tricky" bit is keeping a
check on the maximum value in each third of the xr[]
array if we have a short block]
. you then have to keep track of which channel is currently
being processed before each quantize() call.
. find the ix[] value for the maximum xrpow[] value
. if this is over 8205 then we've chosen an unsuitable
quantizer value. force an exit, skip the count_bits
and choose a new stepsize.
. This eliminates the maxvalue check at the start of count_bits
and according to a little quick test I did, will result
in a vast improvement in count_bits() - but
unfortunately less than a 2% overall effect.
. The problem is there's a little bit of bookkeeping and
branching to do in xrpow[] quantize and i'm not sure
whether the speedup will outweigh the slowdowns.
Leaving it for a rainy day.
later
mike
www.cryogen.com/mikecheng
-----------------------------
/************************************************************************
*
* window_subband()
*
* PURPOSE: Overlapping window on PCM samples
*
* SEMANTICS:
* 32 16-bit pcm samples are scaled to fractional 2's complement and
* concatenated to the end of the window buffer #x#. The updated window
* buffer #x# is then windowed by the analysis window #c# to produce the
* windowed sample #z#
*
************************************************************************/
extern double enwindow[];
void window_subband(buffer, z, k)
short **buffer;
double z[HAN_SIZE];
int k;
{
typedef double FAR XX[2][HAN_SIZE];
static XX FAR *x;
double *xk;
int i;
static int off[2] = {0,0};
static char init = 0;
double t;
double *ep0,*ep1, *ep2, *ep3, *ep4, *ep5, *ep6, *ep7;
if (!init) {
x = (XX FAR *) mem_alloc(sizeof(XX),"x");
memset(x, 0, 2*HAN_SIZE);
init = 1;
}
xk=(*x)[k];
/* replace 32 oldest samples with 32 new samples */
for (i=0;i<32;i++)
xk[31-i+off[k]] = (double) *(*buffer)++/SCALE;
ep0=&enwindow[0];
ep1=&enwindow[64];
ep2=&enwindow[128];
ep3=&enwindow[192];
ep4=&enwindow[256];
ep5=&enwindow[320];
ep6=&enwindow[384];
ep7=&enwindow[448];
/* shift samples into proper window positions */
for( i = 0 ; i<64 ; i++ )
{
t = xk[(i+off[k])&512-1] * *ep0++;
t += xk[(i+64+off[k])&512-1] * *ep1++;
t += xk[(i+128+off[k])&512-1] * *ep2++;
t += xk[(i+192+off[k])&512-1] * *ep3++;
t += xk[(i+256+off[k])&512-1] * *ep4++;
t += xk[(i+320+off[k])&512-1] * *ep5++;
t += xk[(i+384+off[k])&512-1] * *ep6++;
t += xk[(i+448+off[k])&512-1] * *ep7++;
z[i] = t;
}
off[k] += 480; /*offset is modulo (HAN_SIZE-1)*/
off[k] &= HAN_SIZE-1;
}
/*************************************************************************/
/* calc_noise */
/*************************************************************************/
/* mt 5/99: Function: Improved calc_noise for a single channel */
int calc_noise1( double xr[576], int ix[576], gr_info *cod_info,
double xfsf[4][CBLIMIT], int distort[4][CBLIMIT],
III_psy_xmin *l3_xmin,int gr, int ch, double noise)
{
int start, end, sfb, l, i, over=0;
double sum,step,bw;
D192_3 *xr_s;
I192_3 *ix_s;
#define PRECALC_SIZE 8206 /* 8191+15. should never be outside this. see
count_bits() */
static double pow43[PRECALC_SIZE];
static int init=0;
noise=0;
if (init==0) {
init++;
for(i=0;i<PRECALC_SIZE;i++)
pow43[i] = pow((double)i, 4.0/3.0);
}
xr_s = (D192_3 *) xr;
ix_s = (I192_3 *) ix;
step = pow( 2.0, (cod_info->quantizerStepSize) * 0.25 );
for ( sfb = 0; sfb < cod_info->sfb_lmax; sfb++ )
{
start = scalefac_band_long[ sfb ];
end = scalefac_band_long[ sfb+1 ];
bw = end - start;
for ( sum = 0.0, l = start; l < end; l++ )
{
double temp;
temp = fabs(xr[l]) - pow43[ix[l]] * step;
sum += temp * temp;
}
xfsf[0][sfb] = sum / bw;
distort[0][sfb] = ( xfsf[0][sfb] > l3_xmin->l[gr][ch][sfb] );
if (distort[0][sfb]) over++;
if (distort[0][sfb]) noise += xfsf[0][sfb] - l3_xmin->l[gr][ch][sfb];
}
for ( i = 0; i < 3; i++ )
{
step = pow( 2.0, (cod_info->quantizerStepSize) * 0.25 ); /*
subblock_gain ? */
if (cod_info->subblock_gain[i] )
step *= pow(2.0,-2.0*cod_info->subblock_gain[i]);
for ( sfb = cod_info->sfb_smax; sfb < 12; sfb++ )
{
start = scalefac_band_short[ sfb ];
end = scalefac_band_short[ sfb+1 ];
bw = end - start;
for ( sum = 0.0, l = start; l < end; l++ )
{
double temp;
temp = fabs((*xr_s)[l][i]) - pow43[(*ix_s)[l][i]] * step;
sum += temp * temp;
}
xfsf[i+1][sfb] = sum / bw;
distort[i+1][sfb] =
( xfsf[i+1][sfb] > l3_xmin->s[gr][ch][sfb][i] );
if (distort[i+1][sfb]) over++;
if (distort[i+1][sfb]) noise +=
xfsf[i+1][sfb]-l3_xmin->s[gr][ch][sfb][i];
}
}
return over;
}
--
MP3 ENCODER mailing list ( http://geek.rcc.se/mp3encoder/ )