On Tue, Jul 30, 2013 at 06:47:37PM +0200, Vitor Sessak wrote:
> Hi
>
> On Jul 30, 2013 12:36 PM, "Kostya Shishkov" <[email protected]>
> wrote:
>
> > +#include "twinvq.h"
> > +#include "metasound_data.h"
> >
> > /**
> > * Inverse quantization. Read CB coefficients for cb1 and cb2 from the
> > @@ -135,22 +58,21 @@ static void dequant(TwinContext *tctx, GetBitContext
> *gb, float *out,
> > int bitstream_second_part = (i >=
> tctx->bits_main_spec_change[ftype]);
> >
> > int bits = tctx->bits_main_spec[0][ftype][bitstream_second_part];
> > + tmp0 = get_bits(gb, bits);
> > if (bits == 7) {
> > - if (get_bits1(gb))
> > + if (tmp0 & 0x40)
> > sign0 = -1;
> > - bits = 6;
> > + tmp0 &= 0x3F;
> > }
> > - tmp0 = get_bits(gb, bits);
> >
> > bits = tctx->bits_main_spec[1][ftype][bitstream_second_part];
> >
> > + tmp1 = get_bits(gb, bits);
> > if (bits == 7) {
> > - if (get_bits1(gb))
> > + if (tmp1 & 0x40)
> > sign1 = -1;
> > -
> > - bits = 6;
> > + tmp1 &= 0x3F;
> > }
> > - tmp1 = get_bits(gb, bits);
> >
> > tab0 = cb0 + tmp0 * cb_len;
> > tab1 = cb1 + tmp1 * cb_len;
>
> Can't this modified version be used both for TwinVQ and Metasound?
Yes, I'm working on a new version that will read into some temporary structure
(like the binary ElenrilSound decoder does) and then feed bits from it to the
reconstruction functions - that should near codec-specific bits to
dec_bark_env(), decode_ppc() and mode selection.
>
> > @@ -163,67 +85,24 @@ static void dequant(TwinContext *tctx, GetBitContext
> *gb, float *out,
> > }
> > }
> >
> > -/**
> > - * Evaluate a * b / 400 rounded to the nearest integer. When, for
> example,
> > - * a * b == 200 and the nearest integer is ill-defined, use a table to
> emulate
> > - * the following broken float-based implementation used by the binary
> decoder:
> > - *
> > - * @code
> > - * static int very_broken_op(int a, int b)
> > - * {
> > - * static float test; // Ugh, force gcc to do the division first...
> > - *
> > - * test = a / 400.0;
> > - * return b * test + 0.5;
> > - * }
> > - * @endcode
> > - *
> > - * @note if this function is replaced by just ROUNDED_DIV(a * b, 400.0),
> the
> > - * stddev between the original file (before encoding with Yamaha
> encoder) and
> > - * the decoded output increases, which leads one to believe that the
> encoder
> > - * expects exactly this broken calculation.
> > - */
> > -static int very_broken_op(int a, int b)
> > -{
> > - int x = a * b + 200;
> > - int size;
> > - const uint8_t *rtab;
> > -
> > - if (x % 400 || b % 5)
> > - return x / 400;
> > -
> > - x /= 400;
> > -
> > - size = tabs[b / 5].size;
> > - rtab = tabs[b / 5].tab;
> > - return x - rtab[size * av_log2(2 * (x - 1) / size) + (x - 1) % size];
> > -}
> > -
> > -/**
> > - * Sum to data a periodic peak of a given period, width and shape.
> > - *
> > - * @param period the period of the peak divised by 400.0
> > - */
> > -static void add_peak(int period, int width, const float *shape,
> > +static void add_peak(float period, int width, const float *shape,
> > float ppc_gain, float *speech, int len)
> > {
> > - int i, j;
> > -
> > + int i, j, center;
> > const float *shape_end = shape + len;
> > - int center;
> >
> > // First peak centered around zero
> > for (i = 0; i < width / 2; i++)
> > speech[i] += ppc_gain * *shape++;
> >
> > for (i = 1; i < ROUNDED_DIV(len, width); i++) {
> > - center = very_broken_op(period, i);
> > + center = (int)(i * period + 0.5);
> > for (j = -width / 2; j < (width + 1) / 2; j++)
> > speech[j + center] += ppc_gain * *shape++;
> > }
> >
> > // For the last block, be careful not to go beyond the end of the
> buffer
> > - center = very_broken_op(period, i);
> > + center = (int)(i * period + 0.5);
> > for (j = -width / 2; j < (width + 1) / 2 && shape < shape_end; j++)
> > speech[j + center] += ppc_gain * *shape++;
> > }
> > @@ -231,26 +110,42 @@ static void add_peak(int period, int width, const
> float *shape,
> > static void decode_ppc(TwinContext *tctx, int period_coef, const float
> *shape,
> > float ppc_gain, float *speech)
> > {
> > - const ModeTab *mtab = tctx->mtab;
> > - int isampf = tctx->avctx->sample_rate / 1000;
> > - int ibps = tctx->avctx->bit_rate / (1000 *
> tctx->avctx->channels);
> > - int min_period = ROUNDED_DIV(40 * 2 * mtab->size, isampf);
> > - int max_period = ROUNDED_DIV(40 * 2 * mtab->size * 6, isampf);
> > - int period_range = max_period - min_period;
> > -
> > - // This is actually the period multiplied by 400. It is just
> linearly coded
> > - // between its maximum and minimum value.
> > - int period = min_period +
> > - ROUNDED_DIV(period_coef * period_range,
> > - (1 << mtab->ppc_period_bit) - 1);
> > + const MetasoundModeTab *mtab = tctx->mtab;
> > + int isampf = tctx->avctx->sample_rate / 1000;
> > + int ibps = tctx->avctx->bit_rate / (1000 *
> tctx->avctx->channels);
> > int width;
> >
> > - if (isampf == 22 && ibps == 32) {
> > - // For some unknown reason, NTT decided to code this case
> differently...
> > - width = ROUNDED_DIV((period + 800) * mtab->peak_per2wid,
> > - 400 * mtab->size);
> > - } else
> > - width = period * mtab->peak_per2wid / (400 * mtab->size);
> > + float ratio = (float)mtab->size / isampf;
> > + float min_period, max_period, period_range, period;
> > + float some_mult;
> > +
> > + if (tctx->avctx->channels == 1) {
> > + min_period = log2(ratio * 0.2);
> > + max_period = min_period + log2(6);
> > + } else {
> > + min_period = (int)(ratio * 0.2 * 400 + 0.5) / 400.0;
> > + max_period = (int)(ratio * 0.2 * 400 * 6 + 0.5) / 400.0;
> > + }
> > + period_range = max_period - min_period;
> > + period = min_period + period_coef * period_range /
> > + ((1 << mtab->ppc_period_bit) - 1);
> > + if (tctx->avctx->channels == 1)
> > + period = powf(2.0, period);
> > + else
> > + period = (int)(period * 400 + 0.5) / 400.0;
> > +
> > + switch (isampf) {
> > + case 8: some_mult = 2.0; break;
> > + case 11: some_mult = 3.0; break;
> > + case 16: some_mult = 3.0; break;
> > + case 22: some_mult = ibps == 32 ? 2.0 : 4.0; break;
> > + case 44: some_mult = 8.0; break;
> > + default: some_mult = 4.0;
> > + }
> > +
> > + width = (int)(some_mult / (mtab->size / period) *
> mtab->ppc_shape_len);
> > + if (isampf == 22 && ibps == 32)
> > + width = (int)((2.0 / period + 1) * width + 0.5);
> >
> > add_peak(period, width, shape, ppc_gain, speech,
> mtab->ppc_shape_len);
> > }
>
> The way the peaks are added makes the decoder pretty sensible to floating
> point rounding errors. I would volunteer to make the calculation of
> "center" with only fixed-point math, but I imagine the decoder will still
> be tweaked to be closer to the binary one and I don't want to do it twice.
I'm pretty sure it's not that bad.
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel