Re: [libav-devel] [PATCH 2/2] VoxWare MetaSound decoder

Vitor Sessak Tue, 30 Jul 2013 09:48:11 -0700

Hi

On Jul 30, 2013 12:36 PM, "Kostya Shishkov" <[email protected]>
wrote:


> +#include "twinvq.h"
> +#include "metasound_data.h"
>
>  /**
>   * Inverse quantization. Read CB coefficients for cb1 and cb2 from the
> @@ -135,22 +58,21 @@ static void dequant(TwinContext *tctx, GetBitContext
*gb, float *out,
>          int bitstream_second_part = (i >=
tctx->bits_main_spec_change[ftype]);
>
>          int bits = tctx->bits_main_spec[0][ftype][bitstream_second_part];
> +        tmp0 = get_bits(gb, bits);
>          if (bits == 7) {
> -            if (get_bits1(gb))
> +            if (tmp0 & 0x40)
>                  sign0 = -1;
> -            bits = 6;
> +            tmp0 &= 0x3F;
>          }
> -        tmp0 = get_bits(gb, bits);
>
>          bits = tctx->bits_main_spec[1][ftype][bitstream_second_part];
>
> +        tmp1 = get_bits(gb, bits);
>          if (bits == 7) {
> -            if (get_bits1(gb))
> +            if (tmp1 & 0x40)
>                  sign1 = -1;
> -
> -            bits = 6;
> +            tmp1 &= 0x3F;
>          }
> -        tmp1 = get_bits(gb, bits);
>
>          tab0 = cb0 + tmp0 * cb_len;
>          tab1 = cb1 + tmp1 * cb_len;

Can't this modified version be used both for TwinVQ and Metasound?

> @@ -163,67 +85,24 @@ static void dequant(TwinContext *tctx, GetBitContext
*gb, float *out,
>      }
>  }
>
> -/**
> - * Evaluate a * b / 400 rounded to the nearest integer. When, for
example,
> - * a * b == 200 and the nearest integer is ill-defined, use a table to
emulate
> - * the following broken float-based implementation used by the binary
decoder:
> - *
> - * @code
> - * static int very_broken_op(int a, int b)
> - * {
> - *    static float test; // Ugh, force gcc to do the division first...
> - *
> - *    test = a / 400.0;
> - *    return b * test + 0.5;
> - * }
> - * @endcode
> - *
> - * @note if this function is replaced by just ROUNDED_DIV(a * b, 400.0),
the
> - * stddev between the original file (before encoding with Yamaha
encoder) and
> - * the decoded output increases, which leads one to believe that the
encoder
> - * expects exactly this broken calculation.
> - */
> -static int very_broken_op(int a, int b)
> -{
> -    int x = a * b + 200;
> -    int size;
> -    const uint8_t *rtab;
> -
> -    if (x % 400 || b % 5)
> -        return x / 400;
> -
> -    x /= 400;
> -
> -    size = tabs[b / 5].size;
> -    rtab = tabs[b / 5].tab;
> -    return x - rtab[size * av_log2(2 * (x - 1) / size) + (x - 1) % size];
> -}
> -
> -/**
> - * Sum to data a periodic peak of a given period, width and shape.
> - *
> - * @param period the period of the peak divised by 400.0
> - */
> -static void add_peak(int period, int width, const float *shape,
> +static void add_peak(float period, int width, const float *shape,
>                       float ppc_gain, float *speech, int len)
>  {
> -    int i, j;
> -
> +    int i, j, center;
>      const float *shape_end = shape + len;
> -    int center;
>
>      // First peak centered around zero
>      for (i = 0; i < width / 2; i++)
>          speech[i] += ppc_gain * *shape++;
>
>      for (i = 1; i < ROUNDED_DIV(len, width); i++) {
> -        center = very_broken_op(period, i);
> +        center = (int)(i * period + 0.5);
>          for (j = -width / 2; j < (width + 1) / 2; j++)
>              speech[j + center] += ppc_gain * *shape++;
>      }
>
>      // For the last block, be careful not to go beyond the end of the
buffer
> -    center = very_broken_op(period, i);
> +    center = (int)(i * period + 0.5);
>      for (j = -width / 2; j < (width + 1) / 2 && shape < shape_end; j++)
>          speech[j + center] += ppc_gain * *shape++;
>  }
> @@ -231,26 +110,42 @@ static void add_peak(int period, int width, const
float *shape,
>  static void decode_ppc(TwinContext *tctx, int period_coef, const float
*shape,
>                         float ppc_gain, float *speech)
>  {
> -    const ModeTab *mtab = tctx->mtab;
> -    int isampf          = tctx->avctx->sample_rate / 1000;
> -    int ibps            = tctx->avctx->bit_rate / (1000 *
tctx->avctx->channels);
> -    int min_period      = ROUNDED_DIV(40 * 2 * mtab->size, isampf);
> -    int max_period      = ROUNDED_DIV(40 * 2 * mtab->size * 6, isampf);
> -    int period_range    = max_period - min_period;
> -
> -    // This is actually the period multiplied by 400. It is just
linearly coded
> -    // between its maximum and minimum value.
> -    int period = min_period +
> -                 ROUNDED_DIV(period_coef * period_range,
> -                             (1 << mtab->ppc_period_bit) - 1);
> +    const MetasoundModeTab *mtab = tctx->mtab;
> +    int isampf       = tctx->avctx->sample_rate / 1000;
> +    int ibps         = tctx->avctx->bit_rate / (1000 *
tctx->avctx->channels);
>      int width;
>
> -    if (isampf == 22 && ibps == 32) {
> -        // For some unknown reason, NTT decided to code this case
differently...
> -        width = ROUNDED_DIV((period + 800) * mtab->peak_per2wid,
> -                            400 * mtab->size);
> -    } else
> -        width = period * mtab->peak_per2wid / (400 * mtab->size);
> +    float ratio = (float)mtab->size / isampf;
> +    float min_period, max_period, period_range, period;
> +    float some_mult;
> +
> +    if (tctx->avctx->channels == 1) {
> +        min_period = log2(ratio * 0.2);
> +        max_period = min_period + log2(6);
> +    } else {
> +        min_period = (int)(ratio * 0.2 * 400     + 0.5) / 400.0;
> +        max_period = (int)(ratio * 0.2 * 400 * 6 + 0.5) / 400.0;
> +    }
> +    period_range = max_period - min_period;
> +    period       = min_period + period_coef * period_range /
> +                   ((1 << mtab->ppc_period_bit) - 1);
> +    if (tctx->avctx->channels == 1)
> +        period = powf(2.0, period);
> +    else
> +        period = (int)(period * 400 + 0.5) / 400.0;
> +
> +    switch (isampf) {
> +    case  8: some_mult = 2.0; break;
> +    case 11: some_mult = 3.0; break;
> +    case 16: some_mult = 3.0; break;
> +    case 22: some_mult = ibps == 32 ? 2.0 : 4.0; break;
> +    case 44: some_mult = 8.0; break;
> +    default: some_mult = 4.0;
> +    }
> +
> +    width = (int)(some_mult / (mtab->size / period) *
mtab->ppc_shape_len);
> +    if (isampf == 22 && ibps == 32)
> +        width = (int)((2.0 / period + 1) * width + 0.5);
>
>      add_peak(period, width, shape, ppc_gain, speech,
mtab->ppc_shape_len);
>  }

The way the peaks are added makes the decoder pretty sensible to floating
point rounding errors. I would volunteer to make the calculation of
"center" with only fixed-point math, but I imagine the decoder will still
be tweaked to be closer to the binary one and I don't want to do it twice.

-Vitor
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/2] VoxWare MetaSound decoder

Reply via email to