Re: [libav-devel] [PATCH 2/2] VoxWare MetaSound decoder

Martin Storsjö Fri, 19 Jul 2013 01:43:53 -0700

On Fri, 19 Jul 2013, Kostya Shishkov wrote:

---
It's good enough for the only file I care about but maybe it's useful for the
others too.


The source can benefit from heavy diegoing (because it's mostly TwinVQ decoder
bits that were tweaked to be close to the reference decoder, or the ones that
had to be duplicated because of the different bitstream reader).

And eternal shame on Anton for not writing this decoder.
---
Changelog                         |    1 +
configure                         |    1 +
doc/general.texi                  |    2 +
libavcodec/Makefile               |    2 +
libavcodec/allcodecs.c            |    1 +
libavcodec/metasound.c            |  480 ++++++++++++++++
libavcodec/metasound_cb0808.c     |  856 +++++++++++++++++++++++++++++
libavcodec/metasound_cb1616.c     |  620 +++++++++++++++++++++
libavcodec/metasound_cb4432.c     | 1096 +++++++++++++++++++++++++++++++++++++
libavcodec/metasound_cb4448s.c    |  706 ++++++++++++++++++++++++
libavcodec/metasound_data.c       |  113 ++++
libavcodec/metasound_data.h       |   51 ++
libavcodec/metasound_data_fcb.c   |  590 ++++++++++++++++++++
libavcodec/metasound_data_lsp.c   |  475 ++++++++++++++++
libavcodec/metasound_data_shape.c |  540 ++++++++++++++++++
libavcodec/version.h              |    2 +-
16 files changed, 5535 insertions(+), 1 deletion(-)
create mode 100644 libavcodec/metasound.c
create mode 100644 libavcodec/metasound_cb0808.c
create mode 100644 libavcodec/metasound_cb1616.c
create mode 100644 libavcodec/metasound_cb4432.c
create mode 100644 libavcodec/metasound_cb4448s.c
create mode 100644 libavcodec/metasound_data.c
create mode 100644 libavcodec/metasound_data.h
create mode 100644 libavcodec/metasound_data_fcb.c
create mode 100644 libavcodec/metasound_data_lsp.c
create mode 100644 libavcodec/metasound_data_shape.c

diff --git a/Changelog b/Changelog
index 7a2a7c8..052ece2 100644
--- a/Changelog
+++ b/Changelog
@@ -25,6 +25,7 @@ version 10:
- support for WavPack muxing (raw and in Matroska)
- Go2Webinar decoder
- WavPack encoding through libwavpack
+- incomplete VoxWare MetaSound decoder


version 9:
diff --git a/configure b/configure
index d15e3f0..6537ffe 100755
--- a/configure
+++ b/configure
@@ -1600,6 +1600,7 @@ lagarith_decoder_select="dsputil"
ljpeg_encoder_select="aandcttables mpegvideoenc"
loco_decoder_select="golomb"
mdec_decoder_select="dsputil error_resilience mpegvideo"
+metasound_decoder_select="mdct lsp sinewin"
mimic_decoder_select="dsputil hpeldsp"
mjpeg_decoder_select="dsputil hpeldsp"
mjpegb_decoder_select="dsputil hpeldsp"
diff --git a/doc/general.texi b/doc/general.texi
index 2f0e2b9..a204f88 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -818,6 +818,8 @@ following image formats are supported:
@item TwinVQ (VQF flavor)    @tab     @tab  X
@item Vorbis                 @tab  E  @tab  X
    @tab A native but very primitive encoder exists.
+@item VoxWare MetaSound      @tab     @tab  X
+    @tab imperfect and incomplete support
@item WavPack                @tab  E  @tab  X
    @tab supported through external library libwavpack
@item Westwood Audio (SND1)  @tab     @tab  X
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 42ab007..9dc1a94 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -378,6 +378,8 @@ OBJS-$(CONFIG_VORBIS_DECODER)          += vorbisdec.o 
vorbisdsp.o vorbis.o \
                                          vorbis_data.o xiph.o
OBJS-$(CONFIG_VORBIS_ENCODER)          += vorbisenc.o vorbis.o \
                                          vorbis_data.o
+OBJS-$(CONFIG_VOXWARE_DECODER)         += metasound.o metasound_data.o \
+                                          twinvq_common.o
OBJS-$(CONFIG_VP3_DECODER)             += vp3.o
OBJS-$(CONFIG_VP5_DECODER)             += vp5.o vp56.o vp56data.o vp56dsp.o \
                                          vp56rac.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 6bfc042..5896191 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -330,6 +330,7 @@ void avcodec_register_all(void)
    REGISTER_DECODER(TWINVQ,            twinvq);
    REGISTER_DECODER(VMDAUDIO,          vmdaudio);
    REGISTER_ENCDEC (VORBIS,            vorbis);
+    REGISTER_DECODER(VOXWARE,           voxware);
    REGISTER_DECODER(WAVPACK,           wavpack);
    REGISTER_DECODER(WMALOSSLESS,       wmalossless);
    REGISTER_DECODER(WMAPRO,            wmapro);
diff --git a/libavcodec/metasound.c b/libavcodec/metasound.c
new file mode 100644
index 0000000..b2c86f1
--- /dev/null
+++ b/libavcodec/metasound.c
@@ -0,0 +1,480 @@
+/*
+ * VoxWare MetaSound decoder
+ * Copyright (c) 2013 Konstantin Shishkov
+ * based on
+ * TwinVQ decoder
+ * Copyright (c) 2009 Vitor Sessak
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <math.h>
+#include <stdint.h>
+
+#define BITSTREAM_READER_LE
+#include "libavutil/channel_layout.h"
+#include "libavutil/float_dsp.h"
+#include "avcodec.h"
+#include "get_bits.h"
+#include "fft.h"
+#include "internal.h"
+#include "lsp.h"
+#include "sinewin.h"
+
+#include "twinvq_common.h"
+#include "metasound_data.h"
+
+/**
+ * Inverse quantization. Read CB coefficients for cb1 and cb2 from the
+ * bitstream, sum the corresponding vectors and write the result to *out
+ * after permutation.
+ */
+static void dequant(TwinContext *tctx, GetBitContext *gb, float *out,
+                    enum FrameType ftype,
+                    const int16_t *cb0, const int16_t *cb1, int cb_len)
+{
+    int pos = 0;
+    int i, j;
+
+    for (i = 0; i < tctx->n_div[ftype]; i++) {
+        int tmp0, tmp1;
+        int sign0 = 1;
+        int sign1 = 1;
+        const int16_t *tab0, *tab1;
+        int length = tctx->length[ftype][i >= tctx->length_change[ftype]];
+        int bitstream_second_part = (i >= tctx->bits_main_spec_change[ftype]);


The parentheses are pretty redundant here

+
+        int bits = tctx->bits_main_spec[0][ftype][bitstream_second_part];
+        tmp0 = get_bits(gb, bits);
+        if (bits == 7) {
+            if (tmp0 & 0x40)
+                sign0 = -1;
+            tmp0 &= 0x3F;
+        }
+
+        bits = tctx->bits_main_spec[1][ftype][bitstream_second_part];
+
+        tmp1 = get_bits(gb, bits);
+        if (bits == 7) {
+            if (tmp1 & 0x40)
+                sign1 = -1;
+            tmp1 &= 0x3F;
+        }
+
+        tab0 = cb0 + tmp0*cb_len;
+        tab1 = cb1 + tmp1*cb_len;
+
+        for (j = 0; j < length; j++)
+            out[tctx->permut[ftype][pos+j]] = sign0*tab0[j] + sign1*tab1[j];


You could add some spaces around the + here

+
+        pos += length;
+    }
+
+}
+
+/**
+ * Sum to data a periodic peak of a given period, width and shape.
+ *
+ * @param period the period of the peak divised by 400.0
+ */
+static void add_peak(float period, int width, const float *shape,
+                     float ppc_gain, float *speech, int len)
+{
+    int i, j;
+
+    const float *shape_end = shape + len;
+    int center;
+
+    // First peak centered around zero
+    for (i = 0; i < width/2; i++)
+        speech[i] += ppc_gain * *shape++;
+
+    for (i = 1; i < ROUNDED_DIV(len,width) ; i++) {


Spaces after the comma

+        center = (int)(i * period + 0.5);
+        for (j = width/-2; j < (width-1)/2+1; j++)

What's the point in /-2, wouldn't it be more straightforward as -width/2?Or is there some corner case that depends on having it written in thisform?

The rest of this function could also use some spaces around operators aswell - just as you mentioned.

+            speech[j+center] += ppc_gain * *shape++;
+    }
+
+    // For the last block, be careful not to go beyond the end of the buffer
+    center = (int)(i * period + 0.5);
+    for (j = -width/2; j < (width - 1)/2 + 1 && shape < shape_end; j++)
+        speech[j+center] += ppc_gain * *shape++;
+}
+
+static void decode_ppc(TwinContext *tctx, int period_coef, const float *shape,
+                       float ppc_gain, float *speech)
+{
+    const MetasoundModeTab *mtab = tctx->mtab;
+    int isampf = tctx->avctx->sample_rate/1000;
+    int ibps = tctx->avctx->bit_rate/(1000 * tctx->avctx->channels);
+    int width;
+
+    float ratio = (float)mtab->size / isampf;
+    float min_period, max_period, period_range, period;
+    float some_mult;
+
+    if (tctx->avctx->channels == 1) {
+        min_period = log2(ratio * 0.2);
+        max_period = min_period + log2(6);
+    } else {
+        min_period = (int)(    ratio * 0.2 * 400 + 0.5) / 400.0;
+        max_period = (int)(6 * ratio * 0.2 * 400 + 0.5) / 400.0;
+    }
+    period_range = max_period - min_period;
+    period       = min_period + period_coef * period_range / ((1 << 
mtab->ppc_period_bit) - 1);
+    if (tctx->avctx->channels == 1)
+        period = powf(2.0, period);
+    else
+        period = (int)(period * 400 + 0.5) / 400.0;
+
+    switch (isampf) {
+    case  8: some_mult = 2.0; break;
+    case 11: some_mult = 3.0; break;
+    case 16: some_mult = 3.0; break;
+    case 22: some_mult = ibps == 32 ? 2.0 : 4.0; break;
+    case 44: some_mult = 8.0; break;
+    default: some_mult = 4.0;
+    }
+
+    width = (int)(some_mult / (mtab->size / period) * mtab->ppc_shape_len);
+    if (isampf == 22 && ibps == 32)
+        width = (int)((2.0 / period + 1) * width + 0.5);
+
+    add_peak(period, width, shape, ppc_gain, speech, mtab->ppc_shape_len);
+}
+
+static void dec_gain(TwinContext *tctx, GetBitContext *gb, enum FrameType 
ftype,
+                     float *out)
+{
+    const ModeTab *mtab = tctx->mtab;
+    int i, j;
+    int sub = mtab->fmode[ftype].sub;
+    float step     = AMP_MAX     / ((1 <<     GAIN_BITS) - 1);
+    float sub_step = SUB_AMP_MAX / ((1 << SUB_GAIN_BITS) - 1);
+
+    if (ftype == FT_LONG) {
+        for (i = 0; i < tctx->avctx->channels; i++)
+            out[i] = (1./(1<<13)) *
+                mulawinv(step * 0.5 + step * get_bits(gb, GAIN_BITS),
+                         AMP_MAX, MULAW_MU);
+    } else {
+        for (i = 0; i < tctx->avctx->channels; i++) {
+            float val = (1./(1<<23)) *
+                mulawinv(step * 0.5 + step * get_bits(gb, GAIN_BITS),
+                         AMP_MAX, MULAW_MU);
+
+            for (j = 0; j < sub; j++) {
+                out[i*sub + j] =
+                    val*mulawinv(sub_step* 0.5 +
+                                 sub_step* get_bits(gb, SUB_GAIN_BITS),
+                                 SUB_AMP_MAX, MULAW_MU);
+            }
+        }
+    }
+}
+
+static void dec_bark_env(TwinContext *tctx, const uint8_t *in, int use_hist,
+                         int ch, float *out, float gain, enum FrameType ftype)
+{
+    const MetasoundModeTab *mtab = tctx->mtab;
+    int i,j;
+    float *hist = tctx->bark_hist[ftype][ch];
+    float val = ((const float []) {0.4, 0.35, 0.28})[ftype];
+    int bark_n_coef  = mtab->fmode[ftype].bark_n_coef;
+    int fw_cb_len = mtab->fmode[ftype].bark_env_size / bark_n_coef;
+    int idx = 0;
+
+    if (tctx->avctx->channels == 1)
+        val = 0.5;
+    for (i = 0; i < fw_cb_len; i++)
+        for (j = 0; j < bark_n_coef; j++, idx++) {
+            float tmp2 =
+                mtab->fmode[ftype].bark_cb[fw_cb_len*in[j] + i] * (1./2048);
+            float st;
+
+            if (tctx->avctx->channels == 1)
+                st = use_hist ?
+                    tmp2 + val*hist[idx] + 1. : tmp2 + 1.;
+            else
+                st = use_hist ?
+                    (1. - val) * tmp2 + val*hist[idx] + 1. : tmp2 + 1.;
+
+            hist[idx] = tmp2;
+            if (st < 0.1) st = 0.1;
+
+            memset_float(out, st * gain, mtab->fmode[ftype].bark_tab[idx]);
+            out += mtab->fmode[ftype].bark_tab[idx];
+        }
+
+}
+
+static void read_and_decode_spectrum(TwinContext *tctx, GetBitContext *gb,
+                                     float *out, enum FrameType ftype)
+{
+    const MetasoundModeTab *mtab = tctx->mtab;
+    int channels = tctx->avctx->channels;
+    int sub = mtab->fmode[ftype].sub;
+    int block_size = mtab->size / sub;
+    float gain[CHANNELS_MAX*SUBBLOCKS_MAX];
+    float ppc_shape[PPC_SHAPE_LEN_MAX * CHANNELS_MAX * 4];
+    uint8_t bark1[CHANNELS_MAX][SUBBLOCKS_MAX][BARK_N_COEF_MAX];
+    uint8_t bark_use_hist[CHANNELS_MAX][SUBBLOCKS_MAX];
+
+    uint8_t lpc_idx1[CHANNELS_MAX];
+    uint8_t lpc_idx2[CHANNELS_MAX][LSP_SPLIT_MAX];
+    uint8_t lpc_hist_idx[CHANNELS_MAX];
+
+    int i, j, k;
+
+    dequant(tctx, gb, out, ftype,
+            mtab->fmode[ftype].cb0, mtab->fmode[ftype].cb1,
+            mtab->fmode[ftype].cb_len_read);
+
+    for (i = 0; i < channels; i++)
+        for (j = 0; j < sub; j++)
+            for (k = 0; k < mtab->fmode[ftype].bark_n_coef; k++)
+                bark1[i][j][k] =
+                    get_bits(gb, mtab->fmode[ftype].bark_n_bit);
+
+    for (i = 0; i < channels; i++)
+        for (j = 0; j < sub; j++)
+            bark_use_hist[i][j] = get_bits1(gb);
+
+    dec_gain(tctx, gb, ftype, gain);
+
+    for (i = 0; i < channels; i++) {
+        lpc_hist_idx[i] = get_bits(gb, tctx->mtab->lsp_bit0);
+        lpc_idx1    [i] = get_bits(gb, tctx->mtab->lsp_bit1);
+
+        for (j = 0; j < tctx->mtab->lsp_split; j++)
+            lpc_idx2[i][j] = get_bits(gb, tctx->mtab->lsp_bit2);
+    }
+
+    if (ftype == FT_LONG) {
+        int cb_len_p = (tctx->n_div[3] + mtab->ppc_shape_len*channels - 1)/
+            tctx->n_div[3];
+        dequant(tctx, gb, ppc_shape, FT_PPC, mtab->ppc_shape_cb,
+                mtab->ppc_shape_cb + cb_len_p*PPC_SHAPE_CB_SIZE, cb_len_p);
+    }
+
+    for (i = 0; i < channels; i++) {
+        float *chunk = out + mtab->size * i;
+        float lsp[LSP_COEFS_MAX];
+
+        for (j = 0; j < sub; j++) {
+            dec_bark_env(tctx, bark1[i][j], bark_use_hist[i][j], i,
+                         tctx->tmp_buf, gain[sub*i+j], ftype);
+
+            tctx->fdsp.vector_fmul(chunk + block_size*j, chunk + block_size*j,
+                                   tctx->tmp_buf, block_size);
+
+        }
+
+        if (ftype == FT_LONG) {
+            float pgain_base = tctx->avctx->channels == 2 ? 25000. : 20000.;
+            float pgain_step = pgain_base / ((1 << mtab->pgain_bit) - 1);
+            int p_coef = get_bits(gb, tctx->mtab->ppc_period_bit);
+            int g_coef = get_bits(gb, tctx->mtab->pgain_bit);
+            float v = 1./8192*
+                mulawinv(pgain_step*g_coef+ pgain_step/2, pgain_base, 
mtab->peak_per2wid);
+
+            decode_ppc(tctx, p_coef, ppc_shape + i*mtab->ppc_shape_len, v,
+                       chunk);
+        }
+
+        twinvq_decode_lsp(tctx, lpc_idx1[i], lpc_idx2[i], lpc_hist_idx[i],
+                          lsp, tctx->lsp_hist[i]);
+
+        twinvq_dec_lpc_spectrum_inv(tctx, lsp, ftype, tctx->tmp_buf);
+
+        for (j = 0; j < mtab->fmode[ftype].sub; j++) {
+            tctx->fdsp.vector_fmul(chunk, chunk, tctx->tmp_buf, block_size);
+            chunk += block_size;
+        }
+    }
+}
+
+static int metasound_decode_frame(AVCodecContext * avctx, void *data,
+                             int *got_frame_ptr, AVPacket *avpkt)
+{
+    AVFrame *frame     = data;
+    const uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
+    TwinContext *tctx = avctx->priv_data;
+    GetBitContext gb;
+    const MetasoundModeTab *mtab = tctx->mtab;
+    float **out = NULL;
+    enum FrameType ftype;
+    int window_type, ret;
+    static const enum FrameType wtype_to_ftype_table[] = {
+        FT_LONG,   FT_LONG, FT_SHORT, FT_LONG,
+        FT_MEDIUM, FT_LONG, FT_LONG,  FT_MEDIUM, FT_MEDIUM
+    };
+
+    if (buf_size*8 < avctx->bit_rate*mtab->size/avctx->sample_rate) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Frame too small (%d bytes). Truncated file?\n", buf_size);
+        return AVERROR(EINVAL);
+    }
+
+    /* get output buffer */
+    if (tctx->discarded_packets >= 2) {
+        frame->nb_samples = mtab->size;
+        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) {
+            av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+            return ret;
+        }
+        out = (float **)frame->extended_data;
+    }
+
+    init_get_bits(&gb, buf, buf_size * 8);
+    window_type = get_bits(&gb, WINDOW_TYPE_BITS);
+
+    if (window_type > 8) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid window type, broken sample?\n");
+        return -1;
+    }
+
+    ftype = wtype_to_ftype_table[window_type];
+    if (ftype != FT_SHORT)
+        get_bits(&gb, 2);
+
+    read_and_decode_spectrum(tctx, &gb, tctx->spectrum, ftype);
+
+    twinvq_imdct_output(tctx, ftype, window_type, out);
+
+    FFSWAP(float*, tctx->curr_frame, tctx->prev_frame);
+
+    if (tctx->discarded_packets < 2) {
+        tctx->discarded_packets++;
+        *got_frame_ptr = 0;
+        return buf_size;
+    }
+
+    *got_frame_ptr = 1;
+
+    return buf_size;
+}
+
+typedef struct MSProps {
+    uint32_t tag;
+    int      bit_rate;
+    int      channels;
+    int      sample_rate;
+} MSProps;
+
+static const MSProps codec_props[] = {
+    { MKTAG('V','X','0','3'),  6, 1,  8000 },
+    { MKTAG('V','X','0','4'), 12, 2,  8000 },
+
+    { MKTAG('V','O','X','i'),  8, 1,  8000 },
+    { MKTAG('V','O','X','j'), 10, 1, 11025 },
+    { MKTAG('V','O','X','k'), 16, 1, 16000 },
+    { MKTAG('V','O','X','L'), 24, 1, 22050 },
+    { MKTAG('V','O','X','q'), 32, 1, 44100 },
+    { MKTAG('V','O','X','r'), 40, 1, 44100 },
+    { MKTAG('V','O','X','s'), 48, 1, 44100 },
+    { MKTAG('V','O','X','t'), 16, 2,  8000 },
+    { MKTAG('V','O','X','u'), 20, 2, 11025 },
+    { MKTAG('V','O','X','v'), 32, 2, 16000 },
+    { MKTAG('V','O','X','w'), 48, 2, 22050 },
+    { MKTAG('V','O','X','x'), 64, 2, 44100 },
+    { MKTAG('V','O','X','y'), 80, 2, 44100 },
+    { MKTAG('V','O','X','z'), 96, 2, 44100 },
+
+    { 0, 0, 0, 0 }
+};
+
+static av_cold int metasound_decode_init(AVCodecContext *avctx)
+{
+    int ret;
+    TwinContext *tctx = avctx->priv_data;
+    int isampf, ibps;
+    uint32_t tag;
+    const MSProps *props = codec_props;
+
+    tctx->avctx       = avctx;
+    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
+
+    if (!avctx->extradata || avctx->extradata_size < 16) {
+        av_log(avctx, AV_LOG_ERROR, "Missing or incomplete extradata\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    tag = AV_RL32(avctx->extradata + 12);
+
+    for (;;) {
+        if (!props->tag) {
+            av_log(avctx, AV_LOG_ERROR, "Could not find tag %08X\n", tag);
+            return AVERROR_INVALIDDATA;
+        }
+        if (props->tag == tag) {
+            avctx->sample_rate = props->sample_rate;
+            avctx->channels    = props->channels;
+            avctx->bit_rate    = props->bit_rate * 1000;
+            isampf             = avctx->sample_rate / 1000;
+            break;
+        }
+        props++;
+    }
+
+    if (avctx->channels <= 0 || avctx->channels > CHANNELS_MAX) {
+        av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %i\n",
+               avctx->channels);
+        return -1;
+    }
+    avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO :
+                                                   AV_CH_LAYOUT_STEREO;
+
+    ibps = avctx->bit_rate / (1000 * avctx->channels);
+
+    switch ((avctx->channels << 16) + (isampf << 8) + ibps) {
+    case (1 << 16) + ( 8 << 8) +  8: tctx->mtab = &metasound_mode0808;  break;
+    case (1 << 16) + (16 << 8) + 16: tctx->mtab = &metasound_mode1616;  break;
+    case (1 << 16) + (44 << 8) + 32: tctx->mtab = &metasound_mode4432;  break;
+    case (2 << 16) + (44 << 8) + 48: tctx->mtab = &metasound_mode4448s; break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "This version does not support %d kHz - %d 
kbit/s/ch mode.\n", isampf, ibps);
+        return -1;
+    }
+
+    avpriv_float_dsp_init(&tctx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
+    if ((ret = twinvq_init_mdct_win(tctx))) {
+        av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n");
+        twinvq_decode_close(avctx);
+        return ret;
+    }
+    twinvq_init_bitstream_params(tctx, 1);
+
+    memset_float(tctx->bark_hist[0][0], 0.1, FF_ARRAY_ELEMS(tctx->bark_hist));
+
+    return 0;
+}
+
+AVCodec ff_voxware_decoder = {
+    .name           = "metasound",
+    .type           = AVMEDIA_TYPE_AUDIO,
+    .id             = AV_CODEC_ID_VOXWARE,
+    .priv_data_size = sizeof(TwinContext),
+    .init           = metasound_decode_init,
+    .close          = twinvq_decode_close,
+    .decode         = metasound_decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+    .long_name      = NULL_IF_CONFIG_SMALL("VoxWare MetaSound"),
+    .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                      AV_SAMPLE_FMT_NONE },
+};

The tables could perhaps use a little prettyprinting, but OTOH I sincerelydoubt anybody will ever try to read them manually anyway.


Other than this, it looks pretty good to me.

// Martin
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/2] VoxWare MetaSound decoder

Reply via email to