Re: [libav-devel] [PATCH 2/2] VoxWare MetaSound decoder

Kostya Shishkov Tue, 30 Jul 2013 09:55:34 -0700

On Tue, Jul 30, 2013 at 06:47:37PM +0200, Vitor Sessak wrote:
> Hi
> 
> On Jul 30, 2013 12:36 PM, "Kostya Shishkov" <[email protected]>
> wrote:
> 
> > +#include "twinvq.h"
> > +#include "metasound_data.h"
> >
> >  /**
> >   * Inverse quantization. Read CB coefficients for cb1 and cb2 from the
> > @@ -135,22 +58,21 @@ static void dequant(TwinContext *tctx, GetBitContext
> *gb, float *out,
> >          int bitstream_second_part = (i >=
> tctx->bits_main_spec_change[ftype]);
> >
> >          int bits = tctx->bits_main_spec[0][ftype][bitstream_second_part];
> > +        tmp0 = get_bits(gb, bits);
> >          if (bits == 7) {
> > -            if (get_bits1(gb))
> > +            if (tmp0 & 0x40)
> >                  sign0 = -1;
> > -            bits = 6;
> > +            tmp0 &= 0x3F;
> >          }
> > -        tmp0 = get_bits(gb, bits);
> >
> >          bits = tctx->bits_main_spec[1][ftype][bitstream_second_part];
> >
> > +        tmp1 = get_bits(gb, bits);
> >          if (bits == 7) {
> > -            if (get_bits1(gb))
> > +            if (tmp1 & 0x40)
> >                  sign1 = -1;
> > -
> > -            bits = 6;
> > +            tmp1 &= 0x3F;
> >          }
> > -        tmp1 = get_bits(gb, bits);
> >
> >          tab0 = cb0 + tmp0 * cb_len;
> >          tab1 = cb1 + tmp1 * cb_len;
> 
> Can't this modified version be used both for TwinVQ and Metasound?


Yes, I'm working on a new version that will read into some temporary structure
(like the binary ElenrilSound decoder does) and then feed bits from it to the
reconstruction functions - that should near codec-specific bits to
dec_bark_env(), decode_ppc() and mode selection.

> 
> > @@ -163,67 +85,24 @@ static void dequant(TwinContext *tctx, GetBitContext
> *gb, float *out,
> >      }
> >  }
> >
> > -/**
> > - * Evaluate a * b / 400 rounded to the nearest integer. When, for
> example,
> > - * a * b == 200 and the nearest integer is ill-defined, use a table to
> emulate
> > - * the following broken float-based implementation used by the binary
> decoder:
> > - *
> > - * @code
> > - * static int very_broken_op(int a, int b)
> > - * {
> > - *    static float test; // Ugh, force gcc to do the division first...
> > - *
> > - *    test = a / 400.0;
> > - *    return b * test + 0.5;
> > - * }
> > - * @endcode
> > - *
> > - * @note if this function is replaced by just ROUNDED_DIV(a * b, 400.0),
> the
> > - * stddev between the original file (before encoding with Yamaha
> encoder) and
> > - * the decoded output increases, which leads one to believe that the
> encoder
> > - * expects exactly this broken calculation.
> > - */
> > -static int very_broken_op(int a, int b)
> > -{
> > -    int x = a * b + 200;
> > -    int size;
> > -    const uint8_t *rtab;
> > -
> > -    if (x % 400 || b % 5)
> > -        return x / 400;
> > -
> > -    x /= 400;
> > -
> > -    size = tabs[b / 5].size;
> > -    rtab = tabs[b / 5].tab;
> > -    return x - rtab[size * av_log2(2 * (x - 1) / size) + (x - 1) % size];
> > -}
> > -
> > -/**
> > - * Sum to data a periodic peak of a given period, width and shape.
> > - *
> > - * @param period the period of the peak divised by 400.0
> > - */
> > -static void add_peak(int period, int width, const float *shape,
> > +static void add_peak(float period, int width, const float *shape,
> >                       float ppc_gain, float *speech, int len)
> >  {
> > -    int i, j;
> > -
> > +    int i, j, center;
> >      const float *shape_end = shape + len;
> > -    int center;
> >
> >      // First peak centered around zero
> >      for (i = 0; i < width / 2; i++)
> >          speech[i] += ppc_gain * *shape++;
> >
> >      for (i = 1; i < ROUNDED_DIV(len, width); i++) {
> > -        center = very_broken_op(period, i);
> > +        center = (int)(i * period + 0.5);
> >          for (j = -width / 2; j < (width + 1) / 2; j++)
> >              speech[j + center] += ppc_gain * *shape++;
> >      }
> >
> >      // For the last block, be careful not to go beyond the end of the
> buffer
> > -    center = very_broken_op(period, i);
> > +    center = (int)(i * period + 0.5);
> >      for (j = -width / 2; j < (width + 1) / 2 && shape < shape_end; j++)
> >          speech[j + center] += ppc_gain * *shape++;
> >  }
> > @@ -231,26 +110,42 @@ static void add_peak(int period, int width, const
> float *shape,
> >  static void decode_ppc(TwinContext *tctx, int period_coef, const float
> *shape,
> >                         float ppc_gain, float *speech)
> >  {
> > -    const ModeTab *mtab = tctx->mtab;
> > -    int isampf          = tctx->avctx->sample_rate / 1000;
> > -    int ibps            = tctx->avctx->bit_rate / (1000 *
> tctx->avctx->channels);
> > -    int min_period      = ROUNDED_DIV(40 * 2 * mtab->size, isampf);
> > -    int max_period      = ROUNDED_DIV(40 * 2 * mtab->size * 6, isampf);
> > -    int period_range    = max_period - min_period;
> > -
> > -    // This is actually the period multiplied by 400. It is just
> linearly coded
> > -    // between its maximum and minimum value.
> > -    int period = min_period +
> > -                 ROUNDED_DIV(period_coef * period_range,
> > -                             (1 << mtab->ppc_period_bit) - 1);
> > +    const MetasoundModeTab *mtab = tctx->mtab;
> > +    int isampf       = tctx->avctx->sample_rate / 1000;
> > +    int ibps         = tctx->avctx->bit_rate / (1000 *
> tctx->avctx->channels);
> >      int width;
> >
> > -    if (isampf == 22 && ibps == 32) {
> > -        // For some unknown reason, NTT decided to code this case
> differently...
> > -        width = ROUNDED_DIV((period + 800) * mtab->peak_per2wid,
> > -                            400 * mtab->size);
> > -    } else
> > -        width = period * mtab->peak_per2wid / (400 * mtab->size);
> > +    float ratio = (float)mtab->size / isampf;
> > +    float min_period, max_period, period_range, period;
> > +    float some_mult;
> > +
> > +    if (tctx->avctx->channels == 1) {
> > +        min_period = log2(ratio * 0.2);
> > +        max_period = min_period + log2(6);
> > +    } else {
> > +        min_period = (int)(ratio * 0.2 * 400     + 0.5) / 400.0;
> > +        max_period = (int)(ratio * 0.2 * 400 * 6 + 0.5) / 400.0;
> > +    }
> > +    period_range = max_period - min_period;
> > +    period       = min_period + period_coef * period_range /
> > +                   ((1 << mtab->ppc_period_bit) - 1);
> > +    if (tctx->avctx->channels == 1)
> > +        period = powf(2.0, period);
> > +    else
> > +        period = (int)(period * 400 + 0.5) / 400.0;
> > +
> > +    switch (isampf) {
> > +    case  8: some_mult = 2.0; break;
> > +    case 11: some_mult = 3.0; break;
> > +    case 16: some_mult = 3.0; break;
> > +    case 22: some_mult = ibps == 32 ? 2.0 : 4.0; break;
> > +    case 44: some_mult = 8.0; break;
> > +    default: some_mult = 4.0;
> > +    }
> > +
> > +    width = (int)(some_mult / (mtab->size / period) *
> mtab->ppc_shape_len);
> > +    if (isampf == 22 && ibps == 32)
> > +        width = (int)((2.0 / period + 1) * width + 0.5);
> >
> >      add_peak(period, width, shape, ppc_gain, speech,
> mtab->ppc_shape_len);
> >  }
> 
> The way the peaks are added makes the decoder pretty sensible to floating
> point rounding errors. I would volunteer to make the calculation of
> "center" with only fixed-point math, but I imagine the decoder will still
> be tweaked to be closer to the binary one and I don't want to do it twice.

I'm pretty sure it's not that bad.
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/2] VoxWare MetaSound decoder

Reply via email to