Hi
On Jul 30, 2013 12:36 PM, "Kostya Shishkov" <[email protected]>
wrote:
> +#include "twinvq.h"
> +#include "metasound_data.h"
>
> /**
> * Inverse quantization. Read CB coefficients for cb1 and cb2 from the
> @@ -135,22 +58,21 @@ static void dequant(TwinContext *tctx, GetBitContext
*gb, float *out,
> int bitstream_second_part = (i >=
tctx->bits_main_spec_change[ftype]);
>
> int bits = tctx->bits_main_spec[0][ftype][bitstream_second_part];
> + tmp0 = get_bits(gb, bits);
> if (bits == 7) {
> - if (get_bits1(gb))
> + if (tmp0 & 0x40)
> sign0 = -1;
> - bits = 6;
> + tmp0 &= 0x3F;
> }
> - tmp0 = get_bits(gb, bits);
>
> bits = tctx->bits_main_spec[1][ftype][bitstream_second_part];
>
> + tmp1 = get_bits(gb, bits);
> if (bits == 7) {
> - if (get_bits1(gb))
> + if (tmp1 & 0x40)
> sign1 = -1;
> -
> - bits = 6;
> + tmp1 &= 0x3F;
> }
> - tmp1 = get_bits(gb, bits);
>
> tab0 = cb0 + tmp0 * cb_len;
> tab1 = cb1 + tmp1 * cb_len;
Can't this modified version be used both for TwinVQ and Metasound?
> @@ -163,67 +85,24 @@ static void dequant(TwinContext *tctx, GetBitContext
*gb, float *out,
> }
> }
>
> -/**
> - * Evaluate a * b / 400 rounded to the nearest integer. When, for
example,
> - * a * b == 200 and the nearest integer is ill-defined, use a table to
emulate
> - * the following broken float-based implementation used by the binary
decoder:
> - *
> - * @code
> - * static int very_broken_op(int a, int b)
> - * {
> - * static float test; // Ugh, force gcc to do the division first...
> - *
> - * test = a / 400.0;
> - * return b * test + 0.5;
> - * }
> - * @endcode
> - *
> - * @note if this function is replaced by just ROUNDED_DIV(a * b, 400.0),
the
> - * stddev between the original file (before encoding with Yamaha
encoder) and
> - * the decoded output increases, which leads one to believe that the
encoder
> - * expects exactly this broken calculation.
> - */
> -static int very_broken_op(int a, int b)
> -{
> - int x = a * b + 200;
> - int size;
> - const uint8_t *rtab;
> -
> - if (x % 400 || b % 5)
> - return x / 400;
> -
> - x /= 400;
> -
> - size = tabs[b / 5].size;
> - rtab = tabs[b / 5].tab;
> - return x - rtab[size * av_log2(2 * (x - 1) / size) + (x - 1) % size];
> -}
> -
> -/**
> - * Sum to data a periodic peak of a given period, width and shape.
> - *
> - * @param period the period of the peak divised by 400.0
> - */
> -static void add_peak(int period, int width, const float *shape,
> +static void add_peak(float period, int width, const float *shape,
> float ppc_gain, float *speech, int len)
> {
> - int i, j;
> -
> + int i, j, center;
> const float *shape_end = shape + len;
> - int center;
>
> // First peak centered around zero
> for (i = 0; i < width / 2; i++)
> speech[i] += ppc_gain * *shape++;
>
> for (i = 1; i < ROUNDED_DIV(len, width); i++) {
> - center = very_broken_op(period, i);
> + center = (int)(i * period + 0.5);
> for (j = -width / 2; j < (width + 1) / 2; j++)
> speech[j + center] += ppc_gain * *shape++;
> }
>
> // For the last block, be careful not to go beyond the end of the
buffer
> - center = very_broken_op(period, i);
> + center = (int)(i * period + 0.5);
> for (j = -width / 2; j < (width + 1) / 2 && shape < shape_end; j++)
> speech[j + center] += ppc_gain * *shape++;
> }
> @@ -231,26 +110,42 @@ static void add_peak(int period, int width, const
float *shape,
> static void decode_ppc(TwinContext *tctx, int period_coef, const float
*shape,
> float ppc_gain, float *speech)
> {
> - const ModeTab *mtab = tctx->mtab;
> - int isampf = tctx->avctx->sample_rate / 1000;
> - int ibps = tctx->avctx->bit_rate / (1000 *
tctx->avctx->channels);
> - int min_period = ROUNDED_DIV(40 * 2 * mtab->size, isampf);
> - int max_period = ROUNDED_DIV(40 * 2 * mtab->size * 6, isampf);
> - int period_range = max_period - min_period;
> -
> - // This is actually the period multiplied by 400. It is just
linearly coded
> - // between its maximum and minimum value.
> - int period = min_period +
> - ROUNDED_DIV(period_coef * period_range,
> - (1 << mtab->ppc_period_bit) - 1);
> + const MetasoundModeTab *mtab = tctx->mtab;
> + int isampf = tctx->avctx->sample_rate / 1000;
> + int ibps = tctx->avctx->bit_rate / (1000 *
tctx->avctx->channels);
> int width;
>
> - if (isampf == 22 && ibps == 32) {
> - // For some unknown reason, NTT decided to code this case
differently...
> - width = ROUNDED_DIV((period + 800) * mtab->peak_per2wid,
> - 400 * mtab->size);
> - } else
> - width = period * mtab->peak_per2wid / (400 * mtab->size);
> + float ratio = (float)mtab->size / isampf;
> + float min_period, max_period, period_range, period;
> + float some_mult;
> +
> + if (tctx->avctx->channels == 1) {
> + min_period = log2(ratio * 0.2);
> + max_period = min_period + log2(6);
> + } else {
> + min_period = (int)(ratio * 0.2 * 400 + 0.5) / 400.0;
> + max_period = (int)(ratio * 0.2 * 400 * 6 + 0.5) / 400.0;
> + }
> + period_range = max_period - min_period;
> + period = min_period + period_coef * period_range /
> + ((1 << mtab->ppc_period_bit) - 1);
> + if (tctx->avctx->channels == 1)
> + period = powf(2.0, period);
> + else
> + period = (int)(period * 400 + 0.5) / 400.0;
> +
> + switch (isampf) {
> + case 8: some_mult = 2.0; break;
> + case 11: some_mult = 3.0; break;
> + case 16: some_mult = 3.0; break;
> + case 22: some_mult = ibps == 32 ? 2.0 : 4.0; break;
> + case 44: some_mult = 8.0; break;
> + default: some_mult = 4.0;
> + }
> +
> + width = (int)(some_mult / (mtab->size / period) *
mtab->ppc_shape_len);
> + if (isampf == 22 && ibps == 32)
> + width = (int)((2.0 / period + 1) * width + 0.5);
>
> add_peak(period, width, shape, ppc_gain, speech,
mtab->ppc_shape_len);
> }
The way the peaks are added makes the decoder pretty sensible to floating
point rounding errors. I would volunteer to make the calculation of
"center" with only fixed-point math, but I imagine the decoder will still
be tweaked to be closer to the binary one and I don't want to do it twice.
-Vitor
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel