Re: [libav-devel] [PATCH] lavc: add a native Opus decoder.

Kostya Shishkov Mon, 21 Apr 2014 06:29:29 -0700

On Sun, Apr 20, 2014 at 09:38:35PM +0200, Anton Khirnov wrote:
> Initial implementation by Andrew D'Addesio <[email protected]> during
> GSoC 2012.
> 
> Completion by Anton Khirnov <[email protected]>, sponsored by the
> Mozilla Corporation.
> ---
>  Changelog                |    1 +
>  libavcodec/Makefile      |    4 +
>  libavcodec/allcodecs.c   |    2 +
>  libavcodec/opus.c        |  428 +++++++++
>  libavcodec/opus.h        |  424 +++++++++
>  libavcodec/opus_celt.c   | 2185 
> ++++++++++++++++++++++++++++++++++++++++++++++
>  libavcodec/opus_imdct.c  |  264 ++++++
>  libavcodec/opus_parser.c |   74 ++
>  libavcodec/opus_silk.c   | 1592 +++++++++++++++++++++++++++++++++
>  libavcodec/opusdec.c     |  673 ++++++++++++++
>  libavcodec/version.h     |    2 +-
>  tests/Makefile           |    1 +
>  tests/fate/opus.mak      |   21 +
>  13 files changed, 5670 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/opus.c
>  create mode 100644 libavcodec/opus.h
>  create mode 100644 libavcodec/opus_celt.c
>  create mode 100644 libavcodec/opus_imdct.c
>  create mode 100644 libavcodec/opus_parser.c
>  create mode 100644 libavcodec/opus_silk.c
>  create mode 100644 libavcodec/opusdec.c
>  create mode 100644 tests/fate/opus.mak
> 
[SILK part]
> diff --git a/libavcodec/opus_silk.c b/libavcodec/opus_silk.c
> new file mode 100644
> index 0000000..20db273
> --- /dev/null
> +++ b/libavcodec/opus_silk.c
> @@ -0,0 +1,1592 @@
> +/*
> + * Copyright (c) 2012 Andrew D'Addesio
> + * Copyright (c) 2013-2014 Mozilla Corporation
> + *
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +/**
> + * @file
> + * Opus SILK decoder
> + */
> +
> +#include <stdint.h>
> +
> +#include "opus.h"
> +
> +typedef struct SilkFrame {
> +    int coded;
> +    int log_gain;
> +    int16_t nlsf[16];
> +    float    lpc[16];
> +
> +    float output     [2 * SILK_HISTORY];
> +    float lpc_history[2 * SILK_HISTORY];
> +    float residual   [2 * SILK_HISTORY];
> +    int primarylag;
> +
> +    int prev_voiced;
> +} SilkFrame;
> +
> +struct SilkContext {
> +    AVCodecContext *avctx;
> +    int output_channels;
> +
> +    int midonly;
> +    int subframes;
> +    int sflength;
> +    int flength;
> +    int nlsf_interp_factor;
> +
> +    enum OpusBandwidth bandwidth;
> +
> +    SilkFrame frame[2];
> +    float prev_stereo_weights[2];
> +    float stereo_weights[2];
> +
> +    int prev_coded_channels;
> +};
> +
[...]
> +
> +static inline void silk_stabilize_lsf(int16_t nlsf[16], int order, const 
> uint16_t min_delta[17])
> +{
> +    int pass;
> +    for (pass = 0; 1; pass++) {
> +        int i, k, min_diff = 0;
> +        for (i = 0; i < order+1; i++) {
> +            int low  = i != 0     ? nlsf[i-1] : 0;
> +            int high = i != order ? nlsf[i]   : 32768;
> +            int diff = (high - low) - (min_delta[i]);
> +
> +            if (diff < min_diff) {
> +                min_diff = diff;
> +                k = i;
> +
> +                if (pass == 20)
> +                    break;
> +            }
> +        }
> +        if (min_diff == 0) /* no issues; stabilized */
> +            return;
> +
> +        if (pass != 20) {
> +            /* wiggle one or two LSFs */
> +            if (k == 0) {
> +                /* repel away from lower bound */
> +                nlsf[0] = min_delta[0];
> +            } else if (k == order) {
> +                /* repel away from higher bound */
> +                nlsf[order-1] = 32768 - min_delta[order];
> +            } else {
> +                /* repel away from current position */
> +                int min_center = 0, max_center = 32768, center_val;
> +
> +                /* lower extent */
> +                for (i = 0; i < k; i++)
> +                    min_center += min_delta[i];
> +                min_center += min_delta[k] >> 1;
> +
> +                /* upper extent */
> +                for (i = order; i > k; i--)
> +                    max_center -= min_delta[k];
> +                max_center -= min_delta[k] >> 1;
> +
> +                /* move apart */
> +                center_val = nlsf[k-1] + nlsf[k];
> +                center_val = (center_val>>1) + (center_val&1); // rounded 
> divide by 2
> +                if (center_val < min_center)      center_val = min_center;
> +                else if (center_val > max_center) center_val = max_center;
> +                nlsf[k-1] = center_val - (min_delta[k]>>1);
> +                nlsf[k] = nlsf[k-1] + min_delta[k];
> +            }
> +        } else {
> +            /* resort to the fall-back method, the standard method for LSF 
> stabilization */
> +
> +            /* sort; as the LSFs should be nearly sorted, use insertion sort 
> */
> +            for (i = 1; i < order; i++) {
> +                int j, value = nlsf[i];
> +                for (j = i-1; j >= 0 && nlsf[j] > value; j--)
> +                    nlsf[j+1] = nlsf[j];
> +                nlsf[j+1] = value;
> +            }
> +
> +            /* push forwards to increase distance */
> +            if (nlsf[0] < min_delta[0])
> +                nlsf[0] = min_delta[0];
> +            for (i = 1; i < order; i++)
> +                if (nlsf[i] < nlsf[i-1] + min_delta[i])
> +                    nlsf[i] = nlsf[i-1] + min_delta[i];
> +
> +            /* push backwards to increase distance */
> +            if (nlsf[order-1] > 32768 - min_delta[order])
> +                nlsf[order-1] = 32768 - min_delta[order];
> +            for (i = order-2; i >= 0; i--)
> +                if (nlsf[i] > nlsf[i+1] - min_delta[i+1])
> +                    nlsf[i] = nlsf[i+1] - min_delta[i+1];
> +
> +            return;
> +        }
> +    }


it's rather obvious from the code that the retry loop should run up to 20
times and in case there's no convergence a tail should be executed

> +}
> +
> +static inline int silk_is_lpc_stable(const int16_t lpc[16], int order)
> +{
> +    int k, j, DC_resp = 0;
> +    int32_t lpc32[2][16];       // Q24
> +    int totalinvgain = 1 << 30; // 1.0 in Q30
> +    int32_t *row = lpc32[0], *prevrow;
> +
> +    /* initialize the first row for the Levinson recursion */
> +    for (k = 0; k < order; k++) {
> +        DC_resp += lpc[k];
> +        row[k] = lpc[k] << 12;
> +    }
> +
> +    if (DC_resp >= 4096)
> +        return 0;
> +
> +    /* check if prediction gain pushes any coefficients too far */
> +    for (k = order - 1; 1; k--) {
> +        int rc;      // Q31; reflection coefficient
> +        int gaindiv; // Q30; inverse of the gain (the divisor)
> +        int gain;    // gain for this reflection coefficient
> +        int fbits;   // fractional bits used for the gain
> +        int error;   // Q29; estimate of the error of our partial estimate 
> of 1/gaindiv
> +
> +        if (FFABS(row[k]) > 16773022)
> +            return 0;
> +
> +        rc      = -(row[k] << 7);
> +        gaindiv = (1<<30) - MULH(rc, rc);
> +
> +        totalinvgain = MULH(totalinvgain, gaindiv) << 2;
> +        if (k == 0)
> +            return (totalinvgain >= 107374);
> +
> +        /* approximate 1.0/gaindiv */
> +        fbits = ilog(gaindiv);
> +        gain  = ((1<<29) - 1) / (gaindiv >> (fbits+1-16)); // Q<fbits-16>
> +        error = (1<<29) - MULL(gaindiv << (15+16-fbits), gain, 16);
> +        gain  = ((gain << 16) + (error*gain >> 13));

Diego will cry when he sees this

> +
> +        /* switch to the next row of the LPC coefficients */
> +        prevrow = row;
> +        row = lpc32[k & 1];
> +
> +        for (j = 0; j < k; j++) {
> +            int x = prevrow[j] - ROUND_MULL(prevrow[k-j-1], rc, 31);
> +            row[j] = ROUND_MULL(x, gain, fbits);
> +        }
> +    }
> +}
> +
> +static void silk_lsp2poly(const int32_t lsp[16], int32_t pol[16], int 
> half_order)
> +{
> +    int i, j;
> +
> +    pol[0] = 65536; // 1.0 in Q16
> +    pol[1] = -lsp[0];
> +
> +    for (i = 1; i < half_order; i++) {
> +        pol[i+1] = (pol[i-1] << 1) - ROUND_MULL(lsp[2*i], pol[i], 16);
> +        for (j = i; j > 1; j--)
> +            pol[j] += pol[j-2] - ROUND_MULL(lsp[2*i], pol[j-1], 16);
> +
> +        pol[1] -= lsp[2*i];
> +    }
> +}

I somewhat wonder if one of the existing functions can be used instead

> +
> +static void silk_lsf2lpc(const int16_t nlsf[16], float lpcf[16], int order)
> +{
> +    int i, k;
> +    int32_t lsp[16];     // Q17; 2*cos(LSF)
> +    int32_t p[9], q[9];  // Q16
> +    int32_t lpc32[16];   // Q17
> +    int16_t lpc[16];     // Q12
> +
> +    /* convert the LSFs to LSPs, i.e. 2*cos(LSF) */
> +    for (k = 0; k < order; k++) {
> +        int index = nlsf[k] >> 8;
> +        int offset = nlsf[k] & 255;
> +        int k2 = (order == 10) ? silk_lsf_ordering_nbmb[k] : 
> silk_lsf_ordering_wb[k];
> +
> +        /* interpolate and round */
> +        lsp[k2]  = silk_cosine[index] << 8;
> +        lsp[k2] += (silk_cosine[index+1] - silk_cosine[index])*offset;
> +        lsp[k2]  = (lsp[k2] + 4) >> 3;
> +    }
> +
> +    silk_lsp2poly(lsp  , p, order>>1);
> +    silk_lsp2poly(lsp+1, q, order>>1);
> +
> +    /* reconstruct A(z) */
> +    for (k = 0; k < order>>1; k++) {
> +        lpc32[k]         = -p[k+1] - p[k] - q[k+1] + q[k];
> +        lpc32[order-k-1] = -p[k+1] - p[k] + q[k+1] - q[k];
> +    }
> +
> +    /* limit the range of the LPC coefficients to each fit within an int16_t 
> */
> +    for (i = 0; i < 10; i++) {
> +        int j;
> +        unsigned int maxabs = 0;
> +        for (j = 0, k = 0; j < order; j++) {
> +            unsigned int x = FFABS(lpc32[k]);
> +            if (x > maxabs) {
> +                maxabs = x; // Q17
> +                k      = j;
> +            }
> +        }
> +
> +        maxabs = (maxabs + 16) >> 5; // convert to Q12
> +
> +        if (maxabs > 32767) {
> +            /* perform bandwidth expansion */
> +            unsigned int chirp, chirp_base; // Q16
> +            maxabs = FFMIN(maxabs, 163838); // anything above this overflows 
> chirp's numerator
> +            chirp_base = chirp = 65470 - ((maxabs - 32767) << 14) / ((maxabs 
> * (k+1)) >> 2);
> +
> +            for (k = 0; k < order; k++) {
> +                lpc32[k] = ROUND_MULL(lpc32[k], chirp, 16);
> +                chirp    = (chirp_base * chirp + 32768) >> 16;
> +            }
> +        } else break;
> +    }
> +
> +    if (i == 10) {
> +        /* time's up: just clamp */
> +        for (k = 0; k < order; k++) {
> +            int x = (lpc32[k] + 16) >> 5;
> +            lpc[k] = av_clip_int16(x);
> +            lpc32[k] = lpc[k] << 5; // shortcut mandated by the spec; drops 
> lower 5 bits
> +        }
> +    } else {
> +        for (k = 0; k < order; k++)
> +            lpc[k] = (lpc32[k] + 16) >> 5;
> +    }
> +
> +    /* if the prediction gain causes the LPC filter to become unstable,
> +       apply further bandwidth expansion on the Q17 coefficients */
> +    for (i = 1; i <= 16 && !silk_is_lpc_stable(lpc, order); i++) {
> +        unsigned int chirp, chirp_base;
> +        chirp_base = chirp = 65536 - (1 << i);
> +
> +        for (k = 0; k < order; k++) {
> +            lpc32[k] = ROUND_MULL(lpc32[k], chirp, 16);
> +            lpc[k]   = (lpc32[k] + 16) >> 5;
> +            chirp    = (chirp_base * chirp + 32768) >> 16;
> +        }
> +    }
> +
> +    for (i = 0; i < order; i++)
> +        lpcf[i] = (float)lpc[i] / 4096.0f;
> +}
> +
> +static inline void silk_decode_lpc(SilkContext *s, SilkFrame *frame,
> +                                   OpusRangeCoder *rc,
> +                                   float lpc_leadin[16], float lpc[16],
> +                                   int *lpc_order, int *has_lpc_leadin, int 
> voiced)
> +{
> +    int i;
> +    int order;                   // order of the LP polynomial; 10 for NB/MB 
> and 16 for WB
> +    int8_t  lsf_i1, lsf_i2[16];  // stage-1 and stage-2 codebook indices
> +    int16_t lsf_res[16];         // residual as a Q10 value
> +    int16_t nlsf[16];            // Q15
> +
> +    *lpc_order = order = (s->bandwidth != OPUS_BANDWIDTH_WIDEBAND) ? 10 : 16;

you use s->bandwidth != OPUS_BANDWIDTH_WIDEBAND so often here that it deserves
a local flag

> +
> +    /* obtain LSF stage-1 and stage-2 indices */
> +    lsf_i1 = opus_rc_getsymbol(rc, silk_model_lsf_s1[s->bandwidth ==
> +                                       OPUS_BANDWIDTH_WIDEBAND][voiced]);
> +    for (i = 0; i < order; i++) {
> +        int index = (s->bandwidth != OPUS_BANDWIDTH_WIDEBAND)
> +                    ? silk_lsf_s2_model_sel_nbmb[lsf_i1][i]
> +                    : silk_lsf_s2_model_sel_wb[lsf_i1][i];
> +        lsf_i2[i] = opus_rc_getsymbol(rc, silk_model_lsf_s2[index]) - 4;
> +        if (lsf_i2[i] == -4)     lsf_i2[i] -= opus_rc_getsymbol(rc, 
> silk_model_lsf_s2_ext);
> +        else if (lsf_i2[i] == 4) lsf_i2[i] += opus_rc_getsymbol(rc, 
> silk_model_lsf_s2_ext);
> +    }
> +
> +    /* reverse the backwards-prediction step */
> +    for (i = order - 1; i >= 0; i--) {
> +        int qstep = (s->bandwidth != OPUS_BANDWIDTH_WIDEBAND) ? 11796 : 9830;
> +
> +        lsf_res[i] = lsf_i2[i] << 10;
> +        if (lsf_i2[i] < 0)      lsf_res[i] += 102;
> +        else if (lsf_i2[i] > 0) lsf_res[i] -= 102;
> +        lsf_res[i] = (lsf_res[i] * qstep) >> 16;
> +
> +        if (i+1 < order) {
> +            int weight = (s->bandwidth != OPUS_BANDWIDTH_WIDEBAND)
> +                         ? 
> silk_lsf_pred_weights_nbmb[silk_lsf_weight_sel_nbmb[lsf_i1][i]][i]
> +                         : 
> silk_lsf_pred_weights_wb[silk_lsf_weight_sel_wb[lsf_i1][i]][i];
> +            lsf_res[i] += (lsf_res[i+1] * weight) >> 8;
> +        }
> +    }
> +
> +    /* reconstruct the NLSF coefficients from the supplied indices */
> +    for (i = 0; i < order; i++) {
> +        const uint8_t * codebook = (s->bandwidth != OPUS_BANDWIDTH_WIDEBAND)
> +                                   ? silk_lsf_codebook_nbmb[lsf_i1]
> +                                   : silk_lsf_codebook_wb[lsf_i1];
> +        int cur, prev, next, weight_sq, weight, ipart, fpart, y, value;
> +
> +        /* find the weight of the residual */
> +        /* TODO: precompute */
> +        cur = codebook[i];
> +        prev = i ? codebook[i-1] : 0;
> +        next = i+1 < order ? codebook[i+1] : 256;
> +        weight_sq = (1024/(cur - prev) + 1024/(next - cur)) << 16;
> +
> +        /* approximate square-root with mandated fixed-point arithmetic */
> +        ipart = ilog(weight_sq);
> +        fpart = (weight_sq >> (ipart-8)) & 127;
> +        y = ((ipart&1) ? 32768 : 46214) >> ((32-ipart)>>1);
> +        weight = y + ((213*fpart*y) >> 16);
> +
> +        value = (cur << 7) + (lsf_res[i] << 14) / weight;
> +        nlsf[i] = av_clip(value, 0, 32767);
> +    }
> +
> +    /* stabilize the NLSF coefficients */
> +    silk_stabilize_lsf(nlsf, order, (s->bandwidth != OPUS_BANDWIDTH_WIDEBAND)
> +                                    ? silk_lsf_min_spacing_nbmb : 
> silk_lsf_min_spacing_wb);
> +
> +    /* produce an interpolation for the first 2 subframes, */
> +    /* and then convert both sets of NLSFs to LPC coefficients */
> +    *has_lpc_leadin = 0;
> +    if (s->subframes == 4) {
> +        int offset = opus_rc_getsymbol(rc, 
> silk_model_lsf_interpolation_offset);
> +        if (offset != 4 && frame->coded) {
> +            *has_lpc_leadin = 1;
> +            if (offset != 0) {
> +                int16_t nlsf_leadin[16];
> +                for (i = 0; i < order; i++)
> +                    nlsf_leadin[i] = frame->nlsf[i] +
> +                        ((nlsf[i] - frame->nlsf[i]) * offset >> 2);
> +                silk_lsf2lpc(nlsf_leadin, lpc_leadin, order);
> +            } else  /* avoid re-computation for a (roughly) 1-in-4 
> occurrence */
> +                memcpy(lpc_leadin, frame->lpc, 16*sizeof(float));
> +        } else
> +            offset = 4;
> +        s->nlsf_interp_factor = offset;
> +
> +        silk_lsf2lpc(nlsf, lpc, order);
> +    } else {
> +        s->nlsf_interp_factor = 4;
> +        silk_lsf2lpc(nlsf, lpc, order);
> +    }
> +
> +    memcpy(frame->nlsf, nlsf, order * sizeof(nlsf[0]));
> +    memcpy(frame->lpc,  lpc,  order * sizeof(lpc[0]));
> +}
> +
> +static inline void silk_count_children(OpusRangeCoder *rc, int model, 
> int32_t total,
> +                                       int32_t child[2])
> +{
> +    if (total != 0) {
> +        child[0] = opus_rc_getsymbol(rc,
> +                       silk_model_pulse_location[model] + (((total-1 + 5) * 
> (total-1)) >> 1));
> +        child[1] = total - child[0];
> +    } else {
> +        child[0] = 0;
> +        child[1] = 0;
> +    }
> +}
> +
> +static inline void silk_decode_excitation(SilkContext *s, OpusRangeCoder *rc,
> +                                          float excitationf[320],
> +                                          int qoffset_high, int active, int 
> voiced)
> +{
> +    int i;
> +    uint32_t seed;
> +    int shellblocks;
> +    int ratelevel;
> +    uint8_t pulsecount[20];     // total pulses in each shell block
> +    uint8_t lsbcount[20] = {0}; // raw lsbits defined for each pulse in each 
> shell block
> +    int32_t excitation[320];    // Q23
> +
> +    /* excitation parameters */
> +    seed = opus_rc_getsymbol(rc, silk_model_lcg_seed);
> +    shellblocks = silk_shell_blocks[s->bandwidth][s->subframes >> 2];
> +    ratelevel = opus_rc_getsymbol(rc, silk_model_exc_rate[voiced]);
> +
> +    for (i = 0; i < shellblocks; i++) {
> +        pulsecount[i] = opus_rc_getsymbol(rc, 
> silk_model_pulse_count[ratelevel]);
> +        if (pulsecount[i] == 17) {
> +            while (pulsecount[i] == 17 && ++lsbcount[i] != 10)
> +                pulsecount[i] = opus_rc_getsymbol(rc, 
> silk_model_pulse_count[9]);
> +            if (lsbcount[i] == 10)
> +                pulsecount[i] = opus_rc_getsymbol(rc, 
> silk_model_pulse_count[10]);
> +        }
> +    }
> +
> +    /* decode pulse locations using PVQ */
> +    for (i = 0; i < shellblocks; i++) {
> +        if (pulsecount[i] != 0) {
> +            int a, b, c, d;
> +            int32_t * location = excitation + 16*i;
> +            int32_t branch[4][2];
> +            branch[0][0] = pulsecount[i];
> +
> +            /* unrolled tail recursion */
> +            for (a = 0; a < 1; a++) {
> +                silk_count_children(rc, 0, branch[0][a], branch[1]);
> +                for (b = 0; b < 2; b++) {
> +                    silk_count_children(rc, 1, branch[1][b], branch[2]);
> +                    for (c = 0; c < 2; c++) {
> +                        silk_count_children(rc, 2, branch[2][c], branch[3]);
> +                        for (d = 0; d < 2; d++) {
> +                            silk_count_children(rc, 3, branch[3][d], 
> location);
> +                            location += 2;
> +                        }
> +                    }
> +                }
> +            }
> +        } else
> +            memset(excitation + 16*i, 0, 16*sizeof(int32_t));
> +    }
> +
> +    /* decode least significant bits */
> +    for (i = 0; i < shellblocks << 4; i++) {
> +        int bit;
> +        for (bit = 0; bit < lsbcount[i >> 4]; bit++)
> +            excitation[i] = (excitation[i] << 1) |
> +                            opus_rc_getsymbol(rc, silk_model_excitation_lsb);
> +    }
> +
> +    /* decode signs */
> +    for (i = 0; i < shellblocks << 4; i++) {
> +        if (excitation[i] != 0) {
> +            int sign = opus_rc_getsymbol(rc, 
> silk_model_excitation_sign[active+
> +                                         
> voiced][qoffset_high][FFMIN(pulsecount[i >> 4], 6)]);
> +            if (sign == 0)
> +                excitation[i] *= -1;
> +        }
> +    }
> +
> +    /* assemble the excitation */
> +    for (i = 0; i < shellblocks << 4; i++) {
> +        int value = excitation[i];
> +        excitation[i] = (value << 8) | 
> silk_quant_offset[voiced][qoffset_high];
> +        if (value < 0)      excitation[i] += 20;
> +        else if (value > 0) excitation[i] -= 20;
> +
> +        /* invert samples pseudorandomly */
> +        seed = 196314165*seed + 907633515;
> +        if (seed & 0x80000000)
> +            excitation[i] *= -1;
> +        seed += value;
> +
> +        excitationf[i] = (float)excitation[i] / 8388608.0f;

useless cast

> +    }
> +}
> +
> +static void silk_decode_frame(SilkContext *s, OpusRangeCoder *rc,
> +                              int frame_num, int channel, int 
> coded_channels, int active, int active1)
> +{
> +    /* per frame */
> +    int voiced;       // combines with active to indicate inactive, active, 
> or active+voiced
> +    int qoffset_high;
> +    int order;                             // order of the LPC coefficients
> +    float lpc_leadin[16], lpc_body[16];
> +    int has_lpc_leadin;
> +    float ltpscale;
> +
> +    /* per subframe */
> +    struct {
> +        float gain;
> +        int pitchlag;
> +        float ltptaps[5];
> +    } sf[4];
> +
> +    SilkFrame * const frame = s->frame + channel;
> +
> +    int i;
> +
> +    /* obtain stereo weights */
> +    if (coded_channels == 2 && channel == 0) {
> +        int n, wi[2], ws[2], w[2];
> +        n     = opus_rc_getsymbol(rc, silk_model_stereo_s1);
> +        wi[0] = opus_rc_getsymbol(rc, silk_model_stereo_s2) + 3 * (n / 5);
> +        ws[0] = opus_rc_getsymbol(rc, silk_model_stereo_s3);
> +        wi[1] = opus_rc_getsymbol(rc, silk_model_stereo_s2) + 3 * (n % 5);
> +        ws[1] = opus_rc_getsymbol(rc, silk_model_stereo_s3);
> +
> +        for (i = 0; i < 2; i++)
> +            w[i] = silk_stereo_weights[wi[i]] +
> +                   (((silk_stereo_weights[wi[i]+1] - 
> silk_stereo_weights[wi[i]]) * 6554) >> 16)
> +                    * (ws[i]*2 + 1);
> +
> +        s->stereo_weights[0] = (w[0] - w[1]) / 8192.0;
> +        s->stereo_weights[1] = w[1]          / 8192.0;
> +
> +        /* and read the mid-only flag */
> +        s->midonly = active1 ? 0 : opus_rc_getsymbol(rc, 
> silk_model_mid_only);
> +    }
> +
> +    /* obtain frame type */
> +    if (!active) {
> +        qoffset_high = opus_rc_getsymbol(rc, silk_model_frame_type_inactive);
> +        voiced = 0;
> +    } else {
> +        int type = opus_rc_getsymbol(rc, silk_model_frame_type_active);
> +        qoffset_high = type & 1;
> +        voiced = type >> 1;
> +    }
> +
> +    /* obtain subframe quantization gains */
> +    for (i = 0; i < s->subframes; i++) {
> +        int log_gain;     //Q7
> +        int ipart, fpart, lingain;
> +
> +        if (i == 0 && (frame_num == 0 || !frame->coded)) {
> +            /* gain is coded absolute */
> +            int x = opus_rc_getsymbol(rc, silk_model_gain_highbits[active + 
> voiced]);
> +            log_gain = (x<<3) | opus_rc_getsymbol(rc, 
> silk_model_gain_lowbits);
> +
> +            if (frame->coded)
> +                log_gain = FFMAX(log_gain, frame->log_gain - 16);
> +        } else {
> +            /* gain is coded relative */
> +            int delta_gain = opus_rc_getsymbol(rc, silk_model_gain_delta);
> +            log_gain = av_clip(FFMAX((delta_gain<<1) - 16,
> +                                     frame->log_gain + delta_gain - 4), 0, 
> 63);
> +        }
> +
> +        frame->log_gain = log_gain;
> +
> +        /* approximate 2**(x/128) with a Q7 (i.e. non-integer) input */
> +        log_gain = (log_gain * 0x1D1C71 >> 16) + 2090;
> +        ipart = log_gain >> 7;
> +        fpart = log_gain & 127;
> +        lingain = (1<<ipart) + ((-174 * fpart * (128-fpart) >>16) + fpart) * 
> ((1<<ipart) >> 7);
> +        sf[i].gain = (float)lingain / 65536.0f;
> +    }
> +
> +    /* obtain LPC filter coefficients */
> +    silk_decode_lpc(s, frame, rc, lpc_leadin, lpc_body, &order, 
> &has_lpc_leadin, voiced);
> +
> +    /* obtain pitch lags, if this is a voiced frame */
> +    if (voiced) {
> +        int lag_absolute = (!frame_num || !frame->prev_voiced);
> +        int primarylag;         // primary pitch lag for the entire SILK 
> frame
> +        int ltpfilter;
> +        const int8_t * offsets;
> +
> +        if (!lag_absolute) {
> +            int delta = opus_rc_getsymbol(rc, silk_model_pitch_delta);
> +            if (delta)
> +                primarylag = frame->primarylag + delta - 9;
> +            else
> +                lag_absolute = 1;
> +        }
> +
> +        if (lag_absolute) {
> +            /* primary lag is coded absolute */
> +            int highbits, lowbits;
> +            const uint16_t *model[] = {
> +                silk_model_pitch_lowbits_nb, silk_model_pitch_lowbits_mb,
> +                silk_model_pitch_lowbits_wb
> +            };
> +            highbits = opus_rc_getsymbol(rc, silk_model_pitch_highbits);
> +            lowbits  = opus_rc_getsymbol(rc, model[s->bandwidth]);
> +
> +            primarylag = silk_pitch_min_lag[s->bandwidth] +
> +                         highbits*silk_pitch_scale[s->bandwidth] + lowbits;
> +        }
> +        frame->primarylag = primarylag;
> +
> +        if (s->subframes == 2)
> +            offsets = (s->bandwidth == OPUS_BANDWIDTH_NARROWBAND)
> +                     ? silk_pitch_offset_nb10ms[opus_rc_getsymbol(rc,
> +                                                
> silk_model_pitch_contour_nb10ms)]
> +                     : silk_pitch_offset_mbwb10ms[opus_rc_getsymbol(rc,
> +                                                
> silk_model_pitch_contour_mbwb10ms)];
> +        else
> +            offsets = (s->bandwidth == OPUS_BANDWIDTH_NARROWBAND)
> +                     ? silk_pitch_offset_nb20ms[opus_rc_getsymbol(rc,
> +                                                
> silk_model_pitch_contour_nb20ms)]
> +                     : silk_pitch_offset_mbwb20ms[opus_rc_getsymbol(rc,
> +                                                
> silk_model_pitch_contour_mbwb20ms)];
> +
> +        for (i = 0; i < s->subframes; i++)
> +            sf[i].pitchlag = av_clip(primarylag + offsets[i],
> +                                     silk_pitch_min_lag[s->bandwidth],
> +                                     silk_pitch_max_lag[s->bandwidth]);
> +
> +        /* obtain LTP filter coefficients */
> +        ltpfilter = opus_rc_getsymbol(rc, silk_model_ltp_filter);
> +        for (i = 0; i < s->subframes; i++) {
> +            int index, j;
> +            const uint16_t *filter_sel[] = {
> +                silk_model_ltp_filter0_sel, silk_model_ltp_filter1_sel,
> +                silk_model_ltp_filter2_sel
> +            };
> +            const int8_t (*filter_taps[])[5] = {
> +                silk_ltp_filter0_taps, silk_ltp_filter1_taps, 
> silk_ltp_filter2_taps
> +            };
> +            index = opus_rc_getsymbol(rc, filter_sel[ltpfilter]);
> +            for (j = 0; j < 5; j++)
> +                sf[i].ltptaps[j] = (float)filter_taps[ltpfilter][index][j] / 
> 128.0f;
> +        }
> +    }
> +
> +    /* obtain LTP scale factor */
> +    if (voiced && frame_num == 0)
> +        ltpscale = silk_ltp_scale_factor[opus_rc_getsymbol(rc,
> +                                         silk_model_ltp_scale_index)] / 
> 16384.0f;
> +    else ltpscale = 15565.0f/16384.0f;
> +
> +    /* generate the excitation signal for the entire frame */
> +    silk_decode_excitation(s, rc, frame->residual + SILK_HISTORY, 
> qoffset_high,
> +                           active, voiced);
> +
> +    /* skip synthesising the side channel if we want mono-only */
> +    if (s->output_channels == channel)
> +        return;
> +
> +    /* generate the output signal */
> +    for (i = 0; i < s->subframes; i++) {
> +        const float * lpc_coeff = (i < 2 && has_lpc_leadin) ? lpc_leadin : 
> lpc_body;
> +        float *dst    = frame->output      + SILK_HISTORY + i * s->sflength;
> +        float *resptr = frame->residual    + SILK_HISTORY + i * s->sflength;
> +        float *lpc    = frame->lpc_history + SILK_HISTORY + i * s->sflength;
> +        int j, k;
> +
> +        if (voiced) {
> +            int out_end;
> +            float scale;
> +
> +            if (i < 2 || s->nlsf_interp_factor == 4) {
> +                out_end = -i * s->sflength;
> +                scale   = ltpscale;
> +            } else {
> +                out_end = -(i - 2) * s->sflength;
> +                scale   = 1.0f;
> +            }
> +
> +            /* when the LPC coefficients change, a re-whitening filter is 
> used */
> +            /* to produce a residual that accounts for the change */
> +            for (j = - sf[i].pitchlag - order - 2; j < out_end; j++) {
> +                resptr[j] = dst[j];
> +                for (k = 0; k < order; k++)
> +                    resptr[j] -= lpc_coeff[k] * dst[j - k - 1];
> +                resptr[j] = av_clipf(resptr[j], -1.0f, 1.0f) * scale / 
> sf[i].gain;
> +            }
> +
> +            for (j = out_end; j < 0; j++) {
> +                resptr[j] = lpc[j];
> +                for (k = 0; k < order; k++)
> +                    resptr[j] -= lpc_coeff[k] * lpc[j - k - 1];
> +                resptr[j] = av_clipf(resptr[j], -1.0f, 1.0f) / sf[i].gain;
> +            }
> +
> +            /* LTP synthesis */
> +            for (j = 0; j < s->sflength; j++) {
> +                for (k = 0; k < 5; k++)
> +                    resptr[j] += sf[i].ltptaps[k] * resptr[j - 
> sf[i].pitchlag + 2 - k];
> +            }
> +        }
> +
> +        /* LPC synthesis */
> +        for (j = 0; j < s->sflength; j++) {
> +            float pred = 0.0;
> +            for (k = 1; k <= order; k++)
> +                pred += lpc_coeff[k - 1] * lpc[j - k];
> +
> +            lpc[j] = pred + resptr[j] * sf[i].gain;
> +            dst[j] = av_clipf(lpc[j], -1.0f, 1.0f);
> +        }
> +    }
> +
> +    frame->prev_voiced = voiced;
> +    memmove(frame->lpc_history, frame->lpc_history + s->flength, 
> SILK_HISTORY * sizeof(float));
> +    memmove(frame->output,      frame->output      + s->flength, 
> SILK_HISTORY * sizeof(float));
> +
> +    frame->coded = 1;
> +}
> +
> +static void silk_unmix_ms(SilkContext *s, float *l, float *r)
> +{
> +    float *mid    = s->frame[0].output + SILK_HISTORY - s->flength;
> +    float *side   = s->frame[1].output + SILK_HISTORY - s->flength;
> +    float w0_prev = s->prev_stereo_weights[0];
> +    float w1_prev = s->prev_stereo_weights[1];
> +    float w0      = s->stereo_weights[0];
> +    float w1      = s->stereo_weights[1];
> +    int n1        = silk_stereo_interp_len[s->bandwidth];
> +    int i;
> +
> +    for (i = 0; i < n1; i++) {
> +        float interp0 = w0_prev + i * (w0 - w0_prev) / n1;
> +        float interp1 = w1_prev + i * (w1 - w1_prev) / n1;
> +        float p0      = 0.25 * (mid[i - 2] + 2 * mid[i - 1] + mid[i]);
> +
> +        l[i] = av_clipf((1 + interp1) * mid[i - 1] + side[i - 1] + interp0 * 
> p0, -1.0, 1.0);
> +        r[i] = av_clipf((1 - interp1) * mid[i - 1] - side[i - 1] - interp0 * 
> p0, -1.0, 1.0);
> +    }
> +
> +    for (; i < s->flength; i++) {
> +        float p0 = 0.25 * (mid[i - 2] + 2 * mid[i - 1] + mid[i]);
> +
> +        l[i] = av_clipf((1 + w1) * mid[i - 1] + side[i - 1] + w0 * p0, -1.0, 
> 1.0);
> +        r[i] = av_clipf((1 - w1) * mid[i - 1] - side[i - 1] - w0 * p0, -1.0, 
> 1.0);
> +    }
> +
> +    memcpy(s->prev_stereo_weights, s->stereo_weights, 
> sizeof(s->stereo_weights));
> +}
> +
> +static void silk_flush_frame(SilkFrame *frame)
> +{
> +    if (!frame->coded)
> +        return;
> +
> +    memset(frame->output,      0, sizeof(frame->output));
> +    memset(frame->lpc_history, 0, sizeof(frame->lpc_history));
> +
> +    memset(frame->lpc,  0, sizeof(frame->lpc));
> +    memset(frame->nlsf, 0, sizeof(frame->nlsf));
> +
> +    frame->log_gain = 0;
> +
> +    frame->primarylag  = 0;
> +    frame->prev_voiced = 0;
> +    frame->coded       = 0;
> +}
> +
> +int ff_silk_decode_superframe(SilkContext *s, OpusRangeCoder *rc,
> +                              float *output[2],
> +                              enum OpusBandwidth bandwidth,
> +                              int coded_channels,
> +                              int duration_ms)
> +{
> +    int active[2][6], redundancy[2];
> +    int nb_frames, i, j;
> +
> +    if (bandwidth > OPUS_BANDWIDTH_WIDEBAND ||
> +        coded_channels > 2 || duration_ms > 60) {
> +        av_log(s->avctx, AV_LOG_ERROR, "Invalid parameters passed "
> +               "to the SILK decoder.\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    nb_frames = 1 + (duration_ms > 20) + (duration_ms > 40);
> +    s->subframes = duration_ms / nb_frames / 5;         // 5ms subframes
> +    s->sflength  = 20 * (bandwidth + 2);
> +    s->flength   = s->sflength * s->subframes;
> +    s->bandwidth = bandwidth;
> +
> +    /* make sure to flush the side channel when switching from mono to 
> stereo */
> +    if (coded_channels > s->prev_coded_channels)
> +        silk_flush_frame(&s->frame[1]);
> +    s->prev_coded_channels = coded_channels;
> +
> +    /* read the LP-layer header bits */
> +    for (i = 0; i < coded_channels; i++) {
> +        for (j = 0; j < nb_frames; j++)
> +            active[i][j] = opus_rc_p2model(rc, 1);
> +
> +        redundancy[i] = opus_rc_p2model(rc, 1);
> +        if (redundancy[i]) {
> +            av_log(s->avctx, AV_LOG_ERROR, "LBRR frames present; this is 
> unsupported\n");
> +            return AVERROR_PATCHWELCOME;
> +        }
> +    }
> +
> +    for (i = 0; i < nb_frames; i++) {
> +        for (j = 0; j < coded_channels && !s->midonly; j++)
> +            silk_decode_frame(s, rc, i, j, coded_channels, active[j][i], 
> active[1][i]);
> +
> +        /* reset the side channel if it is not coded */
> +        if (s->midonly && s->frame[1].coded)
> +            silk_flush_frame(&s->frame[1]);
> +
> +        if (coded_channels == 1 || s->output_channels == 1) {
> +            for (j = 0; j < s->output_channels; j++) {
> +                memcpy(output[j] + i * s->flength,
> +                       s->frame[0].output + SILK_HISTORY - s->flength - 2,
> +                       s->flength * sizeof(float));
> +            }
> +        } else {
> +            silk_unmix_ms(s, output[0] + i * s->flength, output[1] + i * 
> s->flength);
> +        }
> +
> +        s->midonly        = 0;
> +    }
> +
> +    return nb_frames * s->flength;
> +}
> +
> +void ff_silk_free(SilkContext **ps)
> +{
> +    av_freep(ps);
> +}
> +
> +void ff_silk_flush(SilkContext *s)
> +{
> +    silk_flush_frame(&s->frame[0]);
> +    silk_flush_frame(&s->frame[1]);
> +
> +    memset(s->prev_stereo_weights, 0, sizeof(s->prev_stereo_weights));
> +}
> +
> +int ff_silk_init(AVCodecContext *avctx, SilkContext **ps, int 
> output_channels)
> +{
> +    SilkContext *s;
> +
> +    if (output_channels != 1 && output_channels != 2) {
> +        av_log(avctx, AV_LOG_ERROR, "Invalid number of output channels: 
> %d\n",
> +               output_channels);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    s = av_mallocz(sizeof(*s));
> +    if (!s)
> +        return AVERROR(ENOMEM);
> +
> +    s->avctx           = avctx;
> +    s->output_channels = output_channels;
> +
> +    ff_silk_flush(s);
> +
> +    *ps = s;
> +
> +    return 0;
> +}
> diff --git a/libavcodec/opusdec.c b/libavcodec/opusdec.c
> new file mode 100644
> index 0000000..eb8cc04
> --- /dev/null
> +++ b/libavcodec/opusdec.c
> @@ -0,0 +1,673 @@
> +/*
> + * Opus decoder
> + * Copyright (c) 2012 Andrew D'Addesio
> + * Copyright (c) 2013-2014 Mozilla Corporation
> + *
> + * This file is part of Libav.
> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +/**
> + * @file
> + * Opus decoder
> + * @author Andrew D'Addesio, Anton Khirnov
> + *
> + * Codec homepage: http://opus-codec.org/
> + * Specification: http://tools.ietf.org/html/rfc6716
> + * Ogg Opus specification: 
> https://tools.ietf.org/html/draft-ietf-codec-oggopus-03
> + *
> + * Ogg-contained .opus files can be produced with opus-tools:
> + * http://git.xiph.org/?p=opus-tools.git
> + */
> +
> +#include <stdint.h>
> +
> +#include "libavutil/attributes.h"
> +#include "libavutil/audio_fifo.h"
> +#include "libavutil/channel_layout.h"
> +#include "libavutil/opt.h"
> +
> +#include "libavresample/avresample.h"
> +
> +#include "avcodec.h"
> +#include "celp_filters.h"
> +#include "fft.h"
> +#include "get_bits.h"
> +#include "internal.h"
> +#include "mathops.h"
> +#include "opus.h"
> +
> +static const uint16_t silk_frame_duration_ms[16] = {
> +    10, 20, 40, 60,
> +    10, 20, 40, 60,
> +    10, 20, 40, 60,
> +    10, 20,
> +    10, 20,
> +};
> +
> +/* number of samples of silence to feed to the resampler
> + * at the beginning */
> +static const int silk_resample_delay[] = {
> +    4, 8, 11, 11, 11
> +};
> +
> +static const uint8_t celt_band_end[] = { 13, 17, 17, 19, 21 };
> +
> +static int get_silk_samplerate(int config)
> +{
> +    if (config < 4)
> +        return 8000;
> +    else if (config < 8)
> +        return 12000;
> +    return 16000;
> +}
> +
> +/**
> + * Range decoder
> + */
> +static int opus_rc_init(OpusRangeCoder *rc, const uint8_t *data, int size)
> +{
> +    int ret = init_get_bits8(&rc->gb, data, size);
> +    if (ret < 0)
> +        return ret;
> +
> +    rc->range = 128;
> +    rc->value = 127 - get_bits(&rc->gb, 7);
> +    rc->total_read_bits = 9;

why 9?

[...]
the rest looks passable

In general it's a horribly designed codec based on speech coding technologies
which make it even less fun to review.

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] lavc: add a native Opus decoder.

Reply via email to