Hi all,
        find attached a small tweak to l3psycho_energy (just cut and paste into
l3psy.c). a 30% improvement in this routine.
        all i did was re-order the checks for chn type.

A few ideas/things about this routine
        - when does chn==2 or chn==3?  I guess from the code that it's for MID
and SIDE channels, but as far as i can see, nothing ever calls it with those
values.  Is this code for the future? or is it code from the past that's never
been removed?
        - tried combining window[] and scalefac into one array to reduce number
of multiplications, but for some reason it actually slows things down. (this
combined window must by double (not FLOAT) to get the same results)
        - why is syncsize=1328 but savebuffer[1344]? The last
16 places in savebuffer appear never to be used. not a big point, but just
wondering if there's some logic there that i'm missing
        - it may be possible to do a bit of offset-modulo arithmetic for this
routine (similar to the window_subband routine).  The routine as it is, shifts
the whole array down by 576 samples and then shifts in 576 new samples.  You
could just shift 576 samples into the array over the oldest 576 samples and 
remember where the current "first element" is. Not sure offset-modulo would be
faster.
        - the window[] array is symmetric. using a window[] value more than
once may give a speed bonus.
        - /SQRT can be easily changed to *iSQRT. (but this is for chn==2,3. and
i'm not sure that they're ever used)

later
mike

---------------------

void L3psycho_energy( short int *buffer, 
    FLOAT energy[HBLKSIZE],
    FLOAT ax[HBLKSIZE], FLOAT bx[HBLKSIZE],
    FLOAT energy_s[3][HBLKSIZE_s],
    FLOAT ax_s[3][HBLKSIZE_s], FLOAT bx_s[3][HBLKSIZE_s],
     int chn,int gr_out , layer * info)
{
  static short int savebuffer[2][1344];
  static int sync_flush,flush,syncsize;
  static FLOAT scalefac;
  static FLOAT window_s[BLKSIZE_s];
  static FLOAT window[BLKSIZE];
  static int firstcall=1;

#ifdef HAVEGTK
  static FLOAT energy_save[4][HBLKSIZE];
#endif

  int i,j,k,sblock;
  
  FLOAT wsamp_r[BLKSIZE];
  FLOAT wsamp_rs[256];
  
  if(firstcall) {
    firstcall=0;
    memset((char *) savebuffer, 0, sizeof(savebuffer));    
    sync_flush=WINDELAY; flush=576; syncsize=WINDELAY+576;
    
    scalefac=1.0;
    if (force_ms) scalefac=SQRT2;
    
    /* calculate HANN window coefficients */
    /*   for(i=0;i<BLKSIZE;i++)  window[i] 
=0.5*(1-cos(2.0*PI*i/(BLKSIZE-1.0)));*/
    for(i=0;i<BLKSIZE;i++)  window[i]  =0.5*(1-cos(2.0*PI*(i+0.5)/BLKSIZE));
    for(i=0;i<BLKSIZE_s;i++)window_s[i]=0.5*(1-cos(2.0*PI*(i+0.5)/BLKSIZE_s));

  }

  /**********************************************************************
   *  compute FFTs
   **********************************************************************/
  if (chn<2) { /* the most common option */
    for ( j = 0; j < sync_flush; j++ ) /* for long window samples */
      savebuffer[chn][j] = savebuffer[chn][j+flush];
    for ( j = sync_flush; j < syncsize; j++ )
      savebuffer[chn][j] = buffer[j-sync_flush];

    for ( j = 0; j < BLKSIZE; j++ ) 
      wsamp_r[j] = window[j] * savebuffer[chn][j] * scalefac;

  } else
    if (chn==2) {
      for ( j = 0; j < BLKSIZE; j++ )
        wsamp_r[j] = window[j] * (savebuffer[0][j] + savebuffer[1][j] )/SQRT2;
    } else /* if (chn==3)   */
      {
        for ( j = 0; j < BLKSIZE; j++ )
          wsamp_r[j] = window[j] * (savebuffer[0][j] - savebuffer[1][j] )/SQRT2;
      }

  fft( wsamp_r, energy, ax, bx, 1024 );
  /* mt 7/99
    Note: fft_side() can be used to compute energy, ax & bx for the
    mid and side channels (chn=2,3) without calling additional FFTs. 
    But it requires wsamp_r to be saved from channels 0 and 1.  
    My tests show that the FFT is so fast that this gives no savings.
    Probably the extra memory hurts the cache performance.
  */

  
#ifdef HAVEGTK
  if(gtkflag) {
    for (j=0; j<HBLKSIZE ; j++) {
      pinfo->energy[gr_out][chn][j]=energy_save[chn][j];
      energy_save[chn][j]=energy[j];
    }
  }
#endif
  for ( sblock = 0; sblock < 3; sblock++ ) {
    int shlen = 192;
    int shoff = 1;
    if (chn<2) 
      for ( j = 0, k = shlen * (shoff + sblock); j < 256; j++, k++ ) 
        wsamp_rs[j] = window_s[j]* savebuffer[chn][k]*scalefac;
    if (chn==2) 
      for ( j = 0, k = shlen * (shoff + sblock); j < 256; j++, k++ ) 
        wsamp_rs[j] = window_s[j] *
((savebuffer[0][k]+savebuffer[1][k])/2)*SQRT2;
    if (chn==3) 
      for ( j = 0, k = shlen * (shoff + sblock); j < 256; j++, k++ ) 
        wsamp_rs[j] = window_s[j] *
((savebuffer[0][k]-savebuffer[1][k])/2)*SQRT2;

    fft( wsamp_rs, energy_s[sblock], ax_s[sblock], bx_s[sblock], 256 );
  }


}

--
MP3 ENCODER mailing list ( http://geek.rcc.se/mp3encoder/ )

Reply via email to