+ speech[j+center] += ppc_gain * *shape++;
+ }
+
+ // For the last block, be careful not to go beyond the end of the buffer
+ center = (int)(i * period + 0.5);
+ for (j = -width/2; j < (width - 1)/2 + 1 && shape < shape_end; j++)
+ speech[j+center] += ppc_gain * *shape++;
+}
+
+static void decode_ppc(TwinContext *tctx, int period_coef, const float *shape,
+ float ppc_gain, float *speech)
+{
+ const MetasoundModeTab *mtab = tctx->mtab;
+ int isampf = tctx->avctx->sample_rate/1000;
+ int ibps = tctx->avctx->bit_rate/(1000 * tctx->avctx->channels);
+ int width;
+
+ float ratio = (float)mtab->size / isampf;
+ float min_period, max_period, period_range, period;
+ float some_mult;
+
+ if (tctx->avctx->channels == 1) {
+ min_period = log2(ratio * 0.2);
+ max_period = min_period + log2(6);
+ } else {
+ min_period = (int)( ratio * 0.2 * 400 + 0.5) / 400.0;
+ max_period = (int)(6 * ratio * 0.2 * 400 + 0.5) / 400.0;
+ }
+ period_range = max_period - min_period;
+ period = min_period + period_coef * period_range / ((1 <<
mtab->ppc_period_bit) - 1);
+ if (tctx->avctx->channels == 1)
+ period = powf(2.0, period);
+ else
+ period = (int)(period * 400 + 0.5) / 400.0;
+
+ switch (isampf) {
+ case 8: some_mult = 2.0; break;
+ case 11: some_mult = 3.0; break;
+ case 16: some_mult = 3.0; break;
+ case 22: some_mult = ibps == 32 ? 2.0 : 4.0; break;
+ case 44: some_mult = 8.0; break;
+ default: some_mult = 4.0;
+ }
+
+ width = (int)(some_mult / (mtab->size / period) * mtab->ppc_shape_len);
+ if (isampf == 22 && ibps == 32)
+ width = (int)((2.0 / period + 1) * width + 0.5);
+
+ add_peak(period, width, shape, ppc_gain, speech, mtab->ppc_shape_len);
+}
+
+static void dec_gain(TwinContext *tctx, GetBitContext *gb, enum FrameType
ftype,
+ float *out)
+{
+ const ModeTab *mtab = tctx->mtab;
+ int i, j;
+ int sub = mtab->fmode[ftype].sub;
+ float step = AMP_MAX / ((1 << GAIN_BITS) - 1);
+ float sub_step = SUB_AMP_MAX / ((1 << SUB_GAIN_BITS) - 1);
+
+ if (ftype == FT_LONG) {
+ for (i = 0; i < tctx->avctx->channels; i++)
+ out[i] = (1./(1<<13)) *
+ mulawinv(step * 0.5 + step * get_bits(gb, GAIN_BITS),
+ AMP_MAX, MULAW_MU);
+ } else {
+ for (i = 0; i < tctx->avctx->channels; i++) {
+ float val = (1./(1<<23)) *
+ mulawinv(step * 0.5 + step * get_bits(gb, GAIN_BITS),
+ AMP_MAX, MULAW_MU);
+
+ for (j = 0; j < sub; j++) {
+ out[i*sub + j] =
+ val*mulawinv(sub_step* 0.5 +
+ sub_step* get_bits(gb, SUB_GAIN_BITS),
+ SUB_AMP_MAX, MULAW_MU);
+ }
+ }
+ }
+}
+
+static void dec_bark_env(TwinContext *tctx, const uint8_t *in, int use_hist,
+ int ch, float *out, float gain, enum FrameType ftype)
+{
+ const MetasoundModeTab *mtab = tctx->mtab;
+ int i,j;
+ float *hist = tctx->bark_hist[ftype][ch];
+ float val = ((const float []) {0.4, 0.35, 0.28})[ftype];
+ int bark_n_coef = mtab->fmode[ftype].bark_n_coef;
+ int fw_cb_len = mtab->fmode[ftype].bark_env_size / bark_n_coef;
+ int idx = 0;
+
+ if (tctx->avctx->channels == 1)
+ val = 0.5;
+ for (i = 0; i < fw_cb_len; i++)
+ for (j = 0; j < bark_n_coef; j++, idx++) {
+ float tmp2 =
+ mtab->fmode[ftype].bark_cb[fw_cb_len*in[j] + i] * (1./2048);
+ float st;
+
+ if (tctx->avctx->channels == 1)
+ st = use_hist ?
+ tmp2 + val*hist[idx] + 1. : tmp2 + 1.;
+ else
+ st = use_hist ?
+ (1. - val) * tmp2 + val*hist[idx] + 1. : tmp2 + 1.;
+
+ hist[idx] = tmp2;
+ if (st < 0.1) st = 0.1;
+
+ memset_float(out, st * gain, mtab->fmode[ftype].bark_tab[idx]);
+ out += mtab->fmode[ftype].bark_tab[idx];
+ }
+
+}
+
+static void read_and_decode_spectrum(TwinContext *tctx, GetBitContext *gb,
+ float *out, enum FrameType ftype)
+{
+ const MetasoundModeTab *mtab = tctx->mtab;
+ int channels = tctx->avctx->channels;
+ int sub = mtab->fmode[ftype].sub;
+ int block_size = mtab->size / sub;
+ float gain[CHANNELS_MAX*SUBBLOCKS_MAX];
+ float ppc_shape[PPC_SHAPE_LEN_MAX * CHANNELS_MAX * 4];
+ uint8_t bark1[CHANNELS_MAX][SUBBLOCKS_MAX][BARK_N_COEF_MAX];
+ uint8_t bark_use_hist[CHANNELS_MAX][SUBBLOCKS_MAX];
+
+ uint8_t lpc_idx1[CHANNELS_MAX];
+ uint8_t lpc_idx2[CHANNELS_MAX][LSP_SPLIT_MAX];
+ uint8_t lpc_hist_idx[CHANNELS_MAX];
+
+ int i, j, k;
+
+ dequant(tctx, gb, out, ftype,
+ mtab->fmode[ftype].cb0, mtab->fmode[ftype].cb1,
+ mtab->fmode[ftype].cb_len_read);
+
+ for (i = 0; i < channels; i++)
+ for (j = 0; j < sub; j++)
+ for (k = 0; k < mtab->fmode[ftype].bark_n_coef; k++)
+ bark1[i][j][k] =
+ get_bits(gb, mtab->fmode[ftype].bark_n_bit);
+
+ for (i = 0; i < channels; i++)
+ for (j = 0; j < sub; j++)
+ bark_use_hist[i][j] = get_bits1(gb);
+
+ dec_gain(tctx, gb, ftype, gain);
+
+ for (i = 0; i < channels; i++) {
+ lpc_hist_idx[i] = get_bits(gb, tctx->mtab->lsp_bit0);
+ lpc_idx1 [i] = get_bits(gb, tctx->mtab->lsp_bit1);
+
+ for (j = 0; j < tctx->mtab->lsp_split; j++)
+ lpc_idx2[i][j] = get_bits(gb, tctx->mtab->lsp_bit2);
+ }
+
+ if (ftype == FT_LONG) {
+ int cb_len_p = (tctx->n_div[3] + mtab->ppc_shape_len*channels - 1)/
+ tctx->n_div[3];
+ dequant(tctx, gb, ppc_shape, FT_PPC, mtab->ppc_shape_cb,
+ mtab->ppc_shape_cb + cb_len_p*PPC_SHAPE_CB_SIZE, cb_len_p);
+ }
+
+ for (i = 0; i < channels; i++) {
+ float *chunk = out + mtab->size * i;
+ float lsp[LSP_COEFS_MAX];
+
+ for (j = 0; j < sub; j++) {
+ dec_bark_env(tctx, bark1[i][j], bark_use_hist[i][j], i,
+ tctx->tmp_buf, gain[sub*i+j], ftype);
+
+ tctx->fdsp.vector_fmul(chunk + block_size*j, chunk + block_size*j,
+ tctx->tmp_buf, block_size);
+
+ }
+
+ if (ftype == FT_LONG) {
+ float pgain_base = tctx->avctx->channels == 2 ? 25000. : 20000.;
+ float pgain_step = pgain_base / ((1 << mtab->pgain_bit) - 1);
+ int p_coef = get_bits(gb, tctx->mtab->ppc_period_bit);
+ int g_coef = get_bits(gb, tctx->mtab->pgain_bit);
+ float v = 1./8192*
+ mulawinv(pgain_step*g_coef+ pgain_step/2, pgain_base,
mtab->peak_per2wid);
+
+ decode_ppc(tctx, p_coef, ppc_shape + i*mtab->ppc_shape_len, v,
+ chunk);
+ }
+
+ twinvq_decode_lsp(tctx, lpc_idx1[i], lpc_idx2[i], lpc_hist_idx[i],
+ lsp, tctx->lsp_hist[i]);
+
+ twinvq_dec_lpc_spectrum_inv(tctx, lsp, ftype, tctx->tmp_buf);
+
+ for (j = 0; j < mtab->fmode[ftype].sub; j++) {
+ tctx->fdsp.vector_fmul(chunk, chunk, tctx->tmp_buf, block_size);
+ chunk += block_size;
+ }
+ }
+}
+
+static int metasound_decode_frame(AVCodecContext * avctx, void *data,
+ int *got_frame_ptr, AVPacket *avpkt)
+{
+ AVFrame *frame = data;
+ const uint8_t *buf = avpkt->data;
+ int buf_size = avpkt->size;
+ TwinContext *tctx = avctx->priv_data;
+ GetBitContext gb;
+ const MetasoundModeTab *mtab = tctx->mtab;
+ float **out = NULL;
+ enum FrameType ftype;
+ int window_type, ret;
+ static const enum FrameType wtype_to_ftype_table[] = {
+ FT_LONG, FT_LONG, FT_SHORT, FT_LONG,
+ FT_MEDIUM, FT_LONG, FT_LONG, FT_MEDIUM, FT_MEDIUM
+ };
+
+ if (buf_size*8 < avctx->bit_rate*mtab->size/avctx->sample_rate) {
+ av_log(avctx, AV_LOG_ERROR,
+ "Frame too small (%d bytes). Truncated file?\n", buf_size);
+ return AVERROR(EINVAL);
+ }
+
+ /* get output buffer */
+ if (tctx->discarded_packets >= 2) {
+ frame->nb_samples = mtab->size;
+ if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
+ av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+ return ret;
+ }
+ out = (float **)frame->extended_data;
+ }
+
+ init_get_bits(&gb, buf, buf_size * 8);
+ window_type = get_bits(&gb, WINDOW_TYPE_BITS);
+
+ if (window_type > 8) {
+ av_log(avctx, AV_LOG_ERROR, "Invalid window type, broken sample?\n");
+ return -1;
+ }
+
+ ftype = wtype_to_ftype_table[window_type];
+ if (ftype != FT_SHORT)
+ get_bits(&gb, 2);
+
+ read_and_decode_spectrum(tctx, &gb, tctx->spectrum, ftype);
+
+ twinvq_imdct_output(tctx, ftype, window_type, out);
+
+ FFSWAP(float*, tctx->curr_frame, tctx->prev_frame);
+
+ if (tctx->discarded_packets < 2) {
+ tctx->discarded_packets++;
+ *got_frame_ptr = 0;
+ return buf_size;
+ }
+
+ *got_frame_ptr = 1;
+
+ return buf_size;
+}
+
+typedef struct MSProps {
+ uint32_t tag;
+ int bit_rate;
+ int channels;
+ int sample_rate;
+} MSProps;
+
+static const MSProps codec_props[] = {
+ { MKTAG('V','X','0','3'), 6, 1, 8000 },
+ { MKTAG('V','X','0','4'), 12, 2, 8000 },
+
+ { MKTAG('V','O','X','i'), 8, 1, 8000 },
+ { MKTAG('V','O','X','j'), 10, 1, 11025 },
+ { MKTAG('V','O','X','k'), 16, 1, 16000 },
+ { MKTAG('V','O','X','L'), 24, 1, 22050 },
+ { MKTAG('V','O','X','q'), 32, 1, 44100 },
+ { MKTAG('V','O','X','r'), 40, 1, 44100 },
+ { MKTAG('V','O','X','s'), 48, 1, 44100 },
+ { MKTAG('V','O','X','t'), 16, 2, 8000 },
+ { MKTAG('V','O','X','u'), 20, 2, 11025 },
+ { MKTAG('V','O','X','v'), 32, 2, 16000 },
+ { MKTAG('V','O','X','w'), 48, 2, 22050 },
+ { MKTAG('V','O','X','x'), 64, 2, 44100 },
+ { MKTAG('V','O','X','y'), 80, 2, 44100 },
+ { MKTAG('V','O','X','z'), 96, 2, 44100 },
+
+ { 0, 0, 0, 0 }
+};
+
+static av_cold int metasound_decode_init(AVCodecContext *avctx)
+{
+ int ret;
+ TwinContext *tctx = avctx->priv_data;
+ int isampf, ibps;
+ uint32_t tag;
+ const MSProps *props = codec_props;
+
+ tctx->avctx = avctx;
+ avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
+
+ if (!avctx->extradata || avctx->extradata_size < 16) {
+ av_log(avctx, AV_LOG_ERROR, "Missing or incomplete extradata\n");
+ return AVERROR_INVALIDDATA;
+ }
+
+ tag = AV_RL32(avctx->extradata + 12);
+
+ for (;;) {
+ if (!props->tag) {
+ av_log(avctx, AV_LOG_ERROR, "Could not find tag %08X\n", tag);
+ return AVERROR_INVALIDDATA;
+ }
+ if (props->tag == tag) {
+ avctx->sample_rate = props->sample_rate;
+ avctx->channels = props->channels;
+ avctx->bit_rate = props->bit_rate * 1000;
+ isampf = avctx->sample_rate / 1000;
+ break;
+ }
+ props++;
+ }
+
+ if (avctx->channels <= 0 || avctx->channels > CHANNELS_MAX) {
+ av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %i\n",
+ avctx->channels);
+ return -1;
+ }
+ avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO :
+ AV_CH_LAYOUT_STEREO;
+
+ ibps = avctx->bit_rate / (1000 * avctx->channels);
+
+ switch ((avctx->channels << 16) + (isampf << 8) + ibps) {
+ case (1 << 16) + ( 8 << 8) + 8: tctx->mtab = &metasound_mode0808; break;
+ case (1 << 16) + (16 << 8) + 16: tctx->mtab = &metasound_mode1616; break;
+ case (1 << 16) + (44 << 8) + 32: tctx->mtab = &metasound_mode4432; break;
+ case (2 << 16) + (44 << 8) + 48: tctx->mtab = &metasound_mode4448s; break;
+ default:
+ av_log(avctx, AV_LOG_ERROR, "This version does not support %d kHz - %d
kbit/s/ch mode.\n", isampf, ibps);
+ return -1;
+ }
+
+ avpriv_float_dsp_init(&tctx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
+ if ((ret = twinvq_init_mdct_win(tctx))) {
+ av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n");
+ twinvq_decode_close(avctx);
+ return ret;
+ }
+ twinvq_init_bitstream_params(tctx, 1);
+
+ memset_float(tctx->bark_hist[0][0], 0.1, FF_ARRAY_ELEMS(tctx->bark_hist));
+
+ return 0;
+}
+
+AVCodec ff_voxware_decoder = {
+ .name = "metasound",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .id = AV_CODEC_ID_VOXWARE,
+ .priv_data_size = sizeof(TwinContext),
+ .init = metasound_decode_init,
+ .close = twinvq_decode_close,
+ .decode = metasound_decode_frame,
+ .capabilities = CODEC_CAP_DR1,
+ .long_name = NULL_IF_CONFIG_SMALL("VoxWare MetaSound"),
+ .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+ AV_SAMPLE_FMT_NONE },
+};