[MP3 ENCODER] speedups...

mikecheng Fri, 2 Jul 1999 04:37:38 -0700
Hi all,
        did a couple of speedups... drop in replacements 
        window_subband  (encode.c)
        calc_noise1     (loop.c)

Windowsubband speedup 20% -  derived from Tord's bladeenc code. added pointers
to make it even faster.  (if i get really bored next week, I may think about
Zubars' combined  windowsubband/filtersubband routine)

Calc_noise1 speedup 20% - the maximum size the pow43[] table needs to be is
8206. This is derived by looking at the check for max size in count_bits(). So
just take up some more space and increase the PRECALC_SIZE from 1024 to 8206
and you can then ignore the check to see if ix[] lies within the PRECALC_SIZE.
     (   The same could be done for calc_noise2, but i got sick of looking at
code.  could even make the pow43 table global for loop.c)

Future:
        - mdct.c based on fft. (soonish)
        - count_bits speedup.  This requires a little bit of fiddling (the
first time i tried it I forgot about short blocks)
                . find the maximum xrpow value in each channel and its index
                        in the xrpow[] array. [the "tricky" bit is keeping a 
                        check on the maximum value in each third of the xr[]
                        array if we have a short block]
                . you then have to keep track of which channel is currently
                        being processed before each quantize() call.
                . find the ix[] value for the maximum xrpow[] value
                . if this is over 8205 then we've chosen an unsuitable
                        quantizer value. force an exit, skip the count_bits
                        and choose a new stepsize.
                . This eliminates the maxvalue check at the start of count_bits
                        and according to a little quick test I did, will result
                        in a vast improvement in count_bits() - but
                        unfortunately less than a 2% overall effect.
                . The problem is there's a little bit of bookkeeping and
                        branching to do in xrpow[] quantize and i'm not sure
                        whether the speedup will outweigh the slowdowns.
                        Leaving it for a rainy day.

later
mike
www.cryogen.com/mikecheng
-----------------------------

/************************************************************************
*
* window_subband()
*
* PURPOSE:  Overlapping window on PCM samples
*
* SEMANTICS:
* 32 16-bit pcm samples are scaled to fractional 2's complement and
* concatenated to the end of the window buffer #x#. The updated window
* buffer #x# is then windowed by the analysis window #c# to produce the
* windowed sample #z#
*
************************************************************************/

extern double enwindow[];

void window_subband(buffer, z, k)
short **buffer;
double z[HAN_SIZE];
int k;
{
    typedef double FAR XX[2][HAN_SIZE];
    static XX FAR *x;
    double *xk;
    int i;
    static int off[2] = {0,0};
    static char init = 0;
    double t;
    double *ep0,*ep1, *ep2, *ep3, *ep4, *ep5, *ep6, *ep7;
    if (!init) {
        x = (XX FAR *) mem_alloc(sizeof(XX),"x");
        memset(x, 0, 2*HAN_SIZE);
        init = 1;
    }
    xk=(*x)[k];

    /* replace 32 oldest samples with 32 new samples */
    for (i=0;i<32;i++) 
      xk[31-i+off[k]] = (double) *(*buffer)++/SCALE;

    ep0=&enwindow[0];
    ep1=&enwindow[64];
    ep2=&enwindow[128];
    ep3=&enwindow[192];
    ep4=&enwindow[256];
    ep5=&enwindow[320];
    ep6=&enwindow[384];
    ep7=&enwindow[448];
 
    /* shift samples into proper window positions */
    for( i = 0 ; i<64 ; i++ )
      {
        t =  xk[(i+off[k])&512-1] * *ep0++;
        t += xk[(i+64+off[k])&512-1] * *ep1++;
        t += xk[(i+128+off[k])&512-1] * *ep2++;
        t += xk[(i+192+off[k])&512-1] * *ep3++;
        t += xk[(i+256+off[k])&512-1] * *ep4++;
        t += xk[(i+320+off[k])&512-1] * *ep5++;
        t += xk[(i+384+off[k])&512-1] * *ep6++;
        t += xk[(i+448+off[k])&512-1] * *ep7++;
        z[i] = t;
      }
    
    off[k] += 480;              /*offset is modulo (HAN_SIZE-1)*/
    off[k] &= HAN_SIZE-1;

}


/*************************************************************************/
/*            calc_noise                                                 */
/*************************************************************************/
/*  mt 5/99:  Function: Improved calc_noise for a single channel   */
int calc_noise1( double xr[576], int ix[576], gr_info *cod_info,
            double xfsf[4][CBLIMIT], int distort[4][CBLIMIT],
            III_psy_xmin *l3_xmin,int gr, int ch, double noise)

{
    int start, end, sfb, l, i, over=0;
    double sum,step,bw;

    D192_3 *xr_s;
    I192_3 *ix_s;

    #define PRECALC_SIZE 8206 /* 8191+15. should never be outside this. see
count_bits() */
    static double pow43[PRECALC_SIZE];
    static int init=0;
    noise=0;

    if (init==0) {
      init++;
      for(i=0;i<PRECALC_SIZE;i++)
        pow43[i] = pow((double)i, 4.0/3.0);
    }
      
    xr_s = (D192_3 *) xr;
    ix_s = (I192_3 *) ix;

    step = pow( 2.0, (cod_info->quantizerStepSize) * 0.25 );
    for ( sfb = 0; sfb < cod_info->sfb_lmax; sfb++ )
    {
        start = scalefac_band_long[ sfb ];
        end   = scalefac_band_long[ sfb+1 ];
        bw = end - start;

        for ( sum = 0.0, l = start; l < end; l++ )
        {
            double temp;
            temp = fabs(xr[l]) - pow43[ix[l]] * step;
            sum += temp * temp;
        }
        xfsf[0][sfb] = sum / bw;
        distort[0][sfb] = ( xfsf[0][sfb] > l3_xmin->l[gr][ch][sfb] );
        if (distort[0][sfb]) over++;
        if (distort[0][sfb]) noise += xfsf[0][sfb] - l3_xmin->l[gr][ch][sfb];
    }


    for ( i = 0; i < 3; i++ )
    {
        step = pow( 2.0, (cod_info->quantizerStepSize) * 0.25 ); /*
subblock_gain ? */
        if (cod_info->subblock_gain[i] )
          step *= pow(2.0,-2.0*cod_info->subblock_gain[i]);

        for ( sfb = cod_info->sfb_smax; sfb < 12; sfb++ )
        {
            start = scalefac_band_short[ sfb ];
            end   = scalefac_band_short[ sfb+1 ];
            bw = end - start;
            
            for ( sum = 0.0, l = start; l < end; l++ )
            {
                double temp;
                temp = fabs((*xr_s)[l][i]) - pow43[(*ix_s)[l][i]] * step;
                sum += temp * temp;
            }       
            xfsf[i+1][sfb] = sum / bw;
            distort[i+1][sfb] = 
            ( xfsf[i+1][sfb] > l3_xmin->s[gr][ch][sfb][i] );
            if (distort[i+1][sfb]) over++;
            if (distort[i+1][sfb]) noise +=
xfsf[i+1][sfb]-l3_xmin->s[gr][ch][sfb][i];
        }
    }
return over;
}


 
--
MP3 ENCODER mailing list ( http://geek.rcc.se/mp3encoder/ )
[MP3 ENCODER] speedups...

Reply via email to