Re: [libav-devel] [PATCH 2/2] dca: Add support for bit-exact reconstruction.

Alexandra Hájková Sat, 02 Jan 2016 09:04:52 -0800

This patch is broken for upsamle (96kHz XLL). It was not intended.

On Fri, Jan 1, 2016 at 4:01 PM, Alexandra Hájková
<[email protected]> wrote:
> Add bit-exact QMF and LFE filters, which makes it possible for XLL
> streams to be decoded losslessly. The new -force_lossy option allows
> to keep lossy mode for the XLL streams. This option was added to the
> fate-dca-xll test to keep its decoded output the same as the refference.
> ---
> This patch was tested with libdcadec (by foo86) used as refference decoder.
>
>  doc/decoders.texi     |  11 ++
>  libavcodec/dca.h      |   9 +-
>  libavcodec/dca_exss.c |   9 +
>  libavcodec/dcadec.c   | 159 +++++++++++++---
>  libavcodec/dcadsp.c   | 487 
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  libavcodec/dcadsp.h   |  15 +-
>  tests/fate/audio.mak  |   2 +-
>  7 files changed, 660 insertions(+), 32 deletions(-)
>
> diff --git a/doc/decoders.texi b/doc/decoders.texi
> index 99d2008..9de3243 100644
> --- a/doc/decoders.texi
> +++ b/doc/decoders.texi
> @@ -53,4 +53,15 @@ Loud sounds are fully compressed.  Soft sounds are 
> enhanced.
>
>  @end table
>
> +@section dca
> +Fixed-point reconstruction for any kind of input might be
> +forced by using @code{-request_sample_fmt @var{s32p}} option.
> +
> +@table @option
> +
> +@item -force_lossy 1
> +Force lossy mode for the XLL streams.
> +
> +@end table
> +
>  @c man end AUDIO DECODERS
> diff --git a/libavcodec/dca.h b/libavcodec/dca.h
> index 787a9c7..67945d5 100644
> --- a/libavcodec/dca.h
> +++ b/libavcodec/dca.h
> @@ -156,6 +156,7 @@ typedef struct DCAChan {
>      /* Half size is sufficient for core decoding, but for 96 kHz data
>       * we need QMF with 64 subbands and 1024 samples. */
>      DECLARE_ALIGNED(32, float, subband_fir_hist)[1024];
> +    DECLARE_ALIGNED(32, int, subband_hist)[1024];
>      DECLARE_ALIGNED(32, float, subband_fir_noidea)[64];
>
>      /* Primary audio coding side information */
> @@ -220,7 +221,8 @@ typedef struct DCAContext {
>      uint16_t core_downmix_codes[DCA_PRIM_CHANNELS_MAX + 1][4];   ///< 
> embedded downmix coefficients (9-bit codes)
>
>
> -    float lfe_data[2 * DCA_LFE_MAX * (DCA_BLOCKS_MAX + 4)];      ///< Low 
> frequency effect data
> +    int lfe_data[2 * DCA_LFE_MAX * (DCA_BLOCKS_MAX + 4)];      ///< Low 
> frequency effect data
> +    float lfe_data_flt[2 * DCA_LFE_MAX * (DCA_BLOCKS_MAX + 4)];
>      int lfe_scale_factor;
>
>      /* Subband samples history (for ADPCM) */
> @@ -230,7 +232,7 @@ typedef struct DCAContext {
>
>      int output;                 ///< type of output
>
> -    float *samples_chanptr[DCA_PRIM_CHANNELS_MAX + 1];
> +    void *samples_chanptr[DCA_PRIM_CHANNELS_MAX + 1];
>      float *extra_channels[DCA_PRIM_CHANNELS_MAX + 1];
>      uint8_t *extra_channels_buffer;
>      unsigned int extra_channels_buffer_size;
> @@ -247,6 +249,9 @@ typedef struct DCAContext {
>      int core_ext_mask;          ///< present extensions in the core substream
>      int exss_ext_mask;          ///< Non-core extensions
>
> +    int fixed;                  ///< force using fixedpoint QMF
> +    int lossy;                  ///< force lossy decoding for the XLL stream
> +
>      /* XCh extension information */
>      int xch_present;            ///< XCh extension present and valid
>      int xch_base_channel;       ///< index of first (only) channel 
> containing XCH data
> diff --git a/libavcodec/dca_exss.c b/libavcodec/dca_exss.c
> index 2895e20..648c126 100644
> --- a/libavcodec/dca_exss.c
> +++ b/libavcodec/dca_exss.c
> @@ -22,6 +22,7 @@
>  #include "libavutil/log.h"
>
>  #include "dca.h"
> +#include "dcadata.h"
>  #include "dca_syncwords.h"
>  #include "get_bits.h"
>
> @@ -343,6 +344,14 @@ void ff_dca_exss_parse_header(DCAContext *s)
>                             "DTS-XLL: ignoring XLL extension\n");
>                      break;
>                  }
> +                av_log(s->avctx, AV_LOG_ERROR,
> +                       "bps = %d\n", 
> ff_dca_bits_per_sample[s->source_pcm_res]);
> +
> +                /* Do not change the sample format for the case XLL stream 
> is decoded
> +                 * in a lossy mode. */
> +                if (!s->lossy)
> +                    s->avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
> +                s->avctx->bits_per_raw_sample = 
> ff_dca_bits_per_sample[s->source_pcm_res];
>                  av_log(s->avctx, AV_LOG_DEBUG,
>                         "DTS-XLL: decoding XLL extension\n");
>                  if (ff_dca_xll_decode_header(s)        == 0 &&
> diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
> index 43992dd..da0ae90 100644
> --- a/libavcodec/dcadec.c
> +++ b/libavcodec/dcadec.c
> @@ -44,6 +44,7 @@
>  #include "dcadata.h"
>  #include "dcadsp.h"
>  #include "dcahuff.h"
> +#include "dcamath.h"
>  #include "fft.h"
>  #include "fmtconvert.h"
>  #include "get_bits.h"
> @@ -520,7 +521,8 @@ static int dca_subframe_header(DCAContext *s, int 
> base_channel, int block_index)
>          /* LFE samples */
>          int lfe_samples    = 2 * s->lfe * (4 + block_index);
>          int lfe_end_sample = 2 * s->lfe * (4 + block_index + 
> s->subsubframes[s->current_subframe]);
> -        float lfe_scale;
> +        float lfe_scale_flt;
> +        int lfe_scale;
>
>          for (j = lfe_samples; j < lfe_end_sample; j++) {
>              /* Signed 8 bits int */
> @@ -532,10 +534,14 @@ static int dca_subframe_header(DCAContext *s, int 
> base_channel, int block_index)
>          s->lfe_scale_factor = ff_dca_scale_factor_quant7[get_bits(&s->gb, 
> 7)];
>
>          /* Quantization step size * scale factor */
> -        lfe_scale = 0.035 * s->lfe_scale_factor;
> +        lfe_scale_flt = 0.035 * s->lfe_scale_factor;
> +        /* 4697620 is 24-bit fixedpoint representation of 0.035 */
> +        lfe_scale     = dca_norm((int64_t)4697620 * s->lfe_scale_factor, 23);
>
> -        for (j = lfe_samples; j < lfe_end_sample; j++)
> -            s->lfe_data[j] *= lfe_scale;
> +        for (j = lfe_samples; j < lfe_end_sample; j++) {
> +            s->lfe_data_flt[j] = lfe_scale_flt * s->lfe_data[j];
> +            s->lfe_data[j]     = dca_clip23((s->lfe_data[j] * lfe_scale) >> 
> 4);
> +        }
>      }
>
>      return 0;
> @@ -932,11 +938,24 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>      return 0;
>  }
>
> -static int dca_filter_channels(DCAContext *s, int block_index, int upsample)
> +static int dca_qmf_filters(DCAContext *s, int block_index, int upsample)
>  {
>      int k;
>
> -    if (upsample) {
> +    // for the 96 kHz lossless
> +    if (s->fixed && upsample) {
> +        int **subband_samples_hi = NULL;
> +
> +        for (k = 0; k < s->audio_header.prim_channels; k++) {
> +            int (*subband_samples)[SAMPLES_PER_SUBBAND] =
> +                s->dca_chan[k].subband_samples[block_index];
> +            int *samples_out = s->samples_chanptr[s->channel_order_tab[k]];
> +
> +            qmf_64_subbands_fixed(subband_samples, subband_samples_hi,
> +                                  s->dca_chan[k].subband_hist, samples_out, 
> 8);
> +        }
> +      // for the 96 kHz lossy
> +    } else if (upsample) {
>          LOCAL_ALIGNED(32, float, samples, [DCA_SUBBANDS_X96K], 
> [SAMPLES_PER_SUBBAND]);
>
>          if (!s->qmf64_table) {
> @@ -945,7 +964,6 @@ static int dca_filter_channels(DCAContext *s, int 
> block_index, int upsample)
>                  return AVERROR(ENOMEM);
>          }
>
> -        /* 64 subbands QMF */
>          for (k = 0; k < s->audio_header.prim_channels; k++) {
>              int32_t (*subband_samples)[SAMPLES_PER_SUBBAND] =
>                       s->dca_chan[k].subband_samples[block_index];
> @@ -959,6 +977,20 @@ static int dca_filter_channels(DCAContext *s, int 
> block_index, int upsample)
>                                  /* Upsampling needs a factor 2 here. */
>                                  M_SQRT2 / 32768.0);
>          }
> +      // for the 48 kHz lossless
> +    } else if (s->fixed) {
> +        for (k = 0; k < s->audio_header.prim_channels; k++) {
> +            int (*subband_samples)[SAMPLES_PER_SUBBAND] =
> +                s->dca_chan[k].subband_samples[block_index];
> +            int **subband_samples_hi = NULL;
> +            int *samples_out = s->samples_chanptr[s->channel_order_tab[k]];
> +
> +            qmf_32_subbands_fixed(subband_samples, subband_samples_hi,
> +                                  s->dca_chan[k].subband_hist,
> +                                  samples_out, SAMPLES_PER_SUBBAND,
> +                                  s->multirate_inter);
> +        }
> +      // for the 48 kHz lossy
>      } else {
>          /* 32 subbands QMF */
>          LOCAL_ALIGNED(32, float, samples, [DCA_SUBBANDS], 
> [SAMPLES_PER_SUBBAND]);
> @@ -977,11 +1009,21 @@ static int dca_filter_channels(DCAContext *s, int 
> block_index, int upsample)
>          }
>      }
>
> -    /* Generate LFE samples for this subsubframe FIXME!!! */
> -    if (s->lfe) {
> +    return 0;
> +}
> +
> +static void dca_generate_lfe(DCAContext *s, int block_index, int upsample)
> +{
> +    if (s->fixed) {
> +        int *samples = s->samples_chanptr[ff_dca_lfe_index[s->amode]];
> +        int synth_x96 = 0; // X96 synthesis flag should be set if X96 would 
> be implemented
> +        int *lfe = s->lfe_data + 2 * s->lfe * (block_index + 4);
> +
> +        lfe_interpolation_fir_fixed(samples, lfe, 2 * s->lfe, synth_x96);
> +    } else {
>          float *samples = s->samples_chanptr[ff_dca_lfe_index[s->amode]];
>          lfe_interpolation_fir(s,
> -                              s->lfe_data + 2 * s->lfe * (block_index + 4),
> +                              s->lfe_data_flt + 2 * s->lfe * (block_index + 
> 4),
>                                samples);
>          if (upsample) {
>              unsigned i;
> @@ -994,13 +1036,45 @@ static int dca_filter_channels(DCAContext *s, int 
> block_index, int upsample)
>              samples[1] = samples[0];
>          }
>      }
> +}
> +
> +static int dca_filter_channels(DCAContext *s, int block_index, int upsample)
> +{
> +    int ret, k;
> +
> +    /* Choose suitable QMF filter. */
> +    ret = dca_qmf_filters(s, block_index, upsample);
> +    if (ret < 0)
> +        return ret;
> +
> +    /* Generate LFE samples for this subsubframe FIXME!!! */
> +    if (s->lfe)
> +        dca_generate_lfe(s, block_index, upsample);
> +
> +    /* Fixed-point QMF outputs 24-bit samples but libavcodec
> +     * supports 32-bit */
> +    if (s->fixed) {
> +        int nb_chans = s->lfe ? s->audio_header.prim_channels + 1 :
> +            s->audio_header.prim_channels;
> +        int subbands = upsample ? DCA_SUBBANDS_X96K : DCA_SUBBANDS;
> +        int nb_samples = SAMPLES_PER_SUBBAND * subbands;
> +
> +        for (k = 0; k < nb_chans; k++) {
> +            int *samples = s->samples_chanptr[k];
> +            int i;
> +
> +            for (i = 0; i < nb_samples; i++)
> +                samples[i] <<= 8;
> +        }
> +    }
>
>      /* FIXME: This downmixing is probably broken with upsample.
> -     * Probably totally broken also with XLL in general. */
> -    /* Downmixing to Stereo */
> -    if (s->audio_header.prim_channels + !!s->lfe > 2 &&
> -        s->avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
> -        dca_downmix(s->samples_chanptr, s->amode, !!s->lfe, s->downmix_coef,
> +     * Downmixing to Stereo. */
> +    if ((!s->fixed && s->audio_header.prim_channels + !!s->lfe > 2 &&
> +         s->avctx->request_channel_layout == AV_CH_LAYOUT_STEREO)) {
> +        float **samples = (float **)s->samples_chanptr;
> +
> +        dca_downmix(samples, s->amode, !!s->lfe, s->downmix_coef,
>                      s->channel_order_tab);
>      }
>
> @@ -1355,6 +1429,15 @@ static int set_channel_layout(AVCodecContext *avctx, 
> int channels, int num_core_
>      return 0;
>  }
>
> +/* Multiply int vector src with scalar mul and add it to destination vector 
> dst. */
> +static void vector_by_scalar(int *dst, const int *src, int mul, int len)
> +{
> +    int i;
> +
> +    for (i = 0; i < len; i++)
> +        dst[i] += src[i] * (int64_t)mul + 0x8000 >> 16;
> +}
> +
>  /**
>   * Main frame decoding function
>   * FIXME add arguments
> @@ -1369,7 +1452,6 @@ static int dca_decode_frame(AVCodecContext *avctx, void 
> *data,
>      int lfe_samples;
>      int num_core_channels = 0;
>      int i, ret;
> -    float  **samples_flt;
>      DCAContext *s = avctx->priv_data;
>      int channels, full_channels;
>      int upsample = 0;
> @@ -1437,6 +1519,8 @@ static int dca_decode_frame(AVCodecContext *avctx, void 
> *data,
>                     xll_nb_samples, frame->nb_samples);
>              s->exss_ext_mask &= ~DCA_EXT_EXSS_XLL;
>          } else {
> +            if (!s->lossy)
> +                s->fixed = 1;
>              if (2 * frame->nb_samples == xll_nb_samples) {
>                  av_log(s->avctx, AV_LOG_INFO,
>                         "XLL: upsampling core channels by a factor of 2\n");
> @@ -1463,7 +1547,6 @@ static int dca_decode_frame(AVCodecContext *avctx, void 
> *data,
>          av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
>          return ret;
>      }
> -    samples_flt = (float **) frame->extended_data;
>
>      /* allocate buffer for extra channels if downmixing */
>      if (avctx->channels < full_channels) {
> @@ -1477,7 +1560,6 @@ static int dca_decode_frame(AVCodecContext *avctx, void 
> *data,
>                         &s->extra_channels_buffer_size, ret);
>          if (!s->extra_channels_buffer)
>              return AVERROR(ENOMEM);
> -
>          ret = av_samples_fill_arrays((uint8_t **) s->extra_channels, NULL,
>                                       s->extra_channels_buffer,
>                                       full_channels - channels,
> @@ -1491,27 +1573,40 @@ static int dca_decode_frame(AVCodecContext *avctx, 
> void *data,
>          int ch;
>          unsigned block = upsample ? 512 : 256;
>          for (ch = 0; ch < channels; ch++)
> -            s->samples_chanptr[ch] = samples_flt[ch] + i * block;
> -        for (; ch < full_channels; ch++)
> +            s->samples_chanptr[ch] = (int *)frame->extended_data[ch] + i * 
> block;
> +        for (; ch < full_channels; ch++) {
>              s->samples_chanptr[ch] = s->extra_channels[ch - channels] + i * 
> block;
> +        }
>
>          dca_filter_channels(s, i, upsample);
>
>          /* If this was marked as a DTS-ES stream we need to subtract back- */
>          /* channel from SL & SR to remove matrixed back-channel signal */
>          if ((s->source_pcm_res & 1) && s->xch_present) {
> -            float *back_chan = 
> s->samples_chanptr[s->channel_order_tab[s->xch_base_channel]];
> -            float *lt_chan   = 
> s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 2]];
> -            float *rt_chan   = 
> s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 1]];
> -            s->fdsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
> -            s->fdsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
> +            if (s->fixed) {
> +                int *back_chan = 
> s->samples_chanptr[s->channel_order_tab[s->xch_base_channel]];
> +                int *lt_chan   = 
> s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 2]];
> +                int *rt_chan   = 
> s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 1]];
> +                vector_by_scalar(lt_chan, back_chan,
> +                                 (int)(M_SQRT1_2 * -0x10000), 256);
> +                vector_by_scalar(rt_chan, back_chan,
> +                                 (int)(M_SQRT1_2 * -0x10000), 256);
> +            } else {
> +                float *back_chan = 
> s->samples_chanptr[s->channel_order_tab[s->xch_base_channel]];
> +                float *lt_chan   = 
> s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 2]];
> +                float *rt_chan   = 
> s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 1]];
> +                s->fdsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 
> 256);
> +                s->fdsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 
> 256);
> +            }
>          }
>      }
>
>      /* update lfe history */
>      lfe_samples = 2 * s->lfe * (s->sample_blocks / SAMPLES_PER_SUBBAND);
> -    for (i = 0; i < 2 * s->lfe * 4; i++)
> -        s->lfe_data[i] = s->lfe_data[i + lfe_samples];
> +    for (i = 0; i < 2 * s->lfe * 4; i++) {
> +        s->lfe_data_flt[i] = s->lfe_data_flt[i + lfe_samples];
> +        s->lfe_data[i]     = s->lfe_data[i + lfe_samples];
> +    }
>
>      if (s->exss_ext_mask & DCA_EXT_EXSS_XLL) {
>          ret = ff_dca_xll_decode_audio(s, frame);
> @@ -1551,7 +1646,13 @@ static av_cold int dca_decode_init(AVCodecContext 
> *avctx)
>      ff_dcadsp_init(&s->dcadsp);
>      ff_fmt_convert_init(&s->fmt_conv, avctx);
>
> -    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
> +    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_S32P) {
> +        s->fixed = 1;
> +        avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
> +    } else {
> +        s->fixed = 0;
> +        avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
> +    }
>
>      /* allow downmixing to stereo */
>      if (avctx->channels > 2 &&
> @@ -1574,6 +1675,7 @@ static av_cold int dca_decode_end(AVCodecContext *avctx)
>  static const AVOption options[] = {
>      { "disable_xch", "disable decoding of the XCh extension", 
> offsetof(DCAContext, xch_disable), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, 
> AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM },
>      { "disable_xll", "disable decoding of the XLL extension", 
> offsetof(DCAContext, xll_disable), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, 
> AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM },
> +    { "force_lossy", "force lossy XLL decoding",              
> offsetof(DCAContext, lossy), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, 
> AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM },
>      { NULL },
>  };
>
> @@ -1595,6 +1697,7 @@ AVCodec ff_dca_decoder = {
>      .close           = dca_decode_end,
>      .capabilities    = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
>      .sample_fmts     = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
> +                                                       AV_SAMPLE_FMT_S32P,
>                                                         AV_SAMPLE_FMT_NONE },
>      .profiles        = NULL_IF_CONFIG_SMALL(ff_dca_profiles),
>      .priv_class      = &dca_decoder_class,
> diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
> index beec200..3bef29f 100644
> --- a/libavcodec/dcadsp.c
> +++ b/libavcodec/dcadsp.c
> @@ -1,6 +1,7 @@
>  /*
>   * Copyright (c) 2004 Gildas Bazin
>   * Copyright (c) 2010 Mans Rullgard <[email protected]>
> + * Copyright (c) 2015 foo86
>   *
>   * This file is part of Libav.
>   *
> @@ -17,14 +18,21 @@
>   * You should have received a copy of the GNU Lesser General Public
>   * License along with Libav; if not, write to the Free Software
>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + *
> + * The functions idct_perform32_fixed, qmf_32_subbands_fixed, 
> idct_perform64_fixed,
> + * qmf_64_subbands_fixed, lfe_interpolation_fir_fixed and the auxiliary 
> functions
> + * they are using (mod*, sub*, clp*) are adapted from libdcadec,
> + * https://github.com/foo86/dcadec/tree/master/libdcadec.
>   */
>
> +#include <stdio.h>
>  #include "config.h"
>
>  #include "libavutil/attributes.h"
>  #include "libavutil/intreadwrite.h"
>
>  #include "dcadsp.h"
> +#include "dcadata.h"
>  #include "dcamath.h"
>
>  static void decode_hf_c(int32_t dst[DCA_SUBBANDS][SAMPLES_PER_SUBBAND],
> @@ -132,3 +140,482 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
>      if (ARCH_X86)
>          ff_dcadsp_init_x86(s);
>  }
> +
> +static void sum_a(const int * restrict input, int * restrict output, int len)
> +{
> +    int i;
> +
> +    for (i = 0; i < len; i++)
> +        output[i] = input[2 * i] + input[2 * i + 1];
> +}
> +
> +static void sum_b(const int * restrict input, int * restrict output, int len)
> +{
> +    int i;
> +
> +    output[0] = input[0];
> +    for (i = 1; i < len; i++)
> +        output[i] = input[2 * i] + input[2 * i - 1];
> +}
> +
> +static void sum_c(const int * restrict input, int * restrict output, int len)
> +{
> +    int i;
> +
> +    for (i = 0; i < len; i++)
> +        output[i] = input[2 * i];
> +}
> +
> +static void sum_d(const int * restrict input, int * restrict output, int len)
> +{
> +    int i;
> +
> +    output[0] = input[1];
> +    for (i = 1; i < len; i++)
> +        output[i] = input[2 * i - 1] + input[2 * i + 1];
> +}
> +
> +static void clp_v(int *input, int len)
> +{
> +    int i;
> +
> +    for (i = 0; i < len; i++)
> +        input[i] = dca_clip23(input[i]);
> +}
> +
> +static void dct_a(const int * restrict input, int * restrict output)
> +{
> +    int i, j;
> +    static const int cos_mod[8][8] = {
> +        { 8348215,  8027397,  7398092,  6484482,  5321677,  3954362,  
> 2435084,   822227 },
> +        { 8027397,  5321677,   822227, -3954362, -7398092, -8348215, 
> -6484482, -2435084 },
> +        { 7398092,   822227, -6484482, -8027397, -2435084,  5321677,  
> 8348215,  3954362 },
> +        { 6484482, -3954362, -8027397,   822227,  8348215,  2435084, 
> -7398092, -5321677 },
> +        { 5321677, -7398092, -2435084,  8348215,  -822227, -8027397,  
> 3954362,  6484482 },
> +        { 3954362, -8348215,  5321677,  2435084, -8027397,  6484482,   
> 822227, -7398092 },
> +        { 2435084, -6484482,  8348215, -7398092,  3954362,   822227, 
> -5321677,  8027397 },
> +        {  822227, -2435084,  3954362, -5321677,  6484482, -7398092,  
> 8027397, -8348215 }
> +    };
> +
> +    for (i = 0; i < 8; i++) {
> +        int64_t res = INT64_C(0);
> +        for (j = 0; j < 8; j++)
> +            res += (int64_t)cos_mod[i][j] * input[j];
> +        output[i] = dca_norm(res, 23);
> +    }
> +}
> +
> +static void dct_b(const int * restrict input, int * restrict output)
> +{
> +    int i, j;
> +    static const int cos_mod[8][7] = {
> +        {  8227423,  7750063,  6974873,  5931642,  4660461,  3210181,  
> 1636536 },
> +        {  6974873,  3210181, -1636536, -5931642, -8227423, -7750063, 
> -4660461 },
> +        {  4660461, -3210181, -8227423, -5931642,  1636536,  7750063,  
> 6974873 },
> +        {  1636536, -7750063, -4660461,  5931642,  6974873, -3210181, 
> -8227423 },
> +        { -1636536, -7750063,  4660461,  5931642, -6974873, -3210181,  
> 8227423 },
> +        { -4660461, -3210181,  8227423, -5931642, -1636536,  7750063, 
> -6974873 },
> +        { -6974873,  3210181,  1636536, -5931642,  8227423, -7750063,  
> 4660461 },
> +        { -8227423,  7750063, -6974873,  5931642, -4660461,  3210181, 
> -1636536 }
> +    };
> +
> +    for (i = 0; i < 8; i++) {
> +        int64_t res = (int64_t)input[0] * (1 << 23);
> +        for (j = 0; j < 7; j++)
> +            res += (int64_t)cos_mod[i][j] * input[1 + j];
> +        output[i] = dca_norm(res, 23);
> +    }
> +}
> +
> +static void mod_a(const int * restrict input, int * restrict output)
> +{
> +    int i, k;
> +    static const int cos_mod[16] = {
> +        4199362,   4240198,   4323885,   4454708,
> +        4639772,   4890013,   5221943,   5660703,
> +        -6245623,  -7040975,  -8158494,  -9809974,
> +        -12450076, -17261920, -28585092, -85479984
> +    };
> +
> +    for (i = 0; i < 8; i++)
> +        output[i] = dca_norm((int64_t)cos_mod[i] * (input[i] + input[8 + 
> i]), 23);
> +
> +    for (i = 8, k = 7; i < 16; i++, k--)
> +        output[i] = dca_norm((int64_t)cos_mod[i] * (input[k] - input[8 + 
> k]), 23);
> +}
> +
> +static void mod_b(int * restrict input, int * restrict output)
> +{
> +    int i, k;
> +    static const int cos_mod[8] = {
> +        4214598,  4383036,  4755871,  5425934,
> +        6611520,  8897610, 14448934, 42791536
> +    };
> +
> +    for (i = 0; i < 8; i++)
> +        input[8 + i] = dca_norm((int64_t)cos_mod[i] * input[8 + i], 23);
> +
> +    for (i = 0; i < 8; i++)
> +        output[i] = input[i] + input[8 + i];
> +
> +    for (i = 8, k = 7; i < 16; i++, k--)
> +        output[i] = input[k] - input[8 + k];
> +}
> +
> +static void mod_c(const int * restrict input, int * restrict output)
> +{
> +    int i, k;
> +    static const int cos_mod[32] = {
> +        1048892,  1051425,   1056522,   1064244,
> +        1074689,  1087987,   1104313,   1123884,
> +        1146975,  1173922,   1205139,   1241133,
> +        1282529,  1330095,   1384791,   1447815,
> +        -1520688, -1605358,  -1704360,  -1821051,
> +        -1959964, -2127368,  -2332183,  -2587535,
> +        -2913561, -3342802,  -3931480,  -4785806,
> +        -6133390, -8566050, -14253820, -42727120
> +    };
> +
> +    for (i = 0; i < 16; i++)
> +        output[i] = dca_norm((int64_t)cos_mod[i] * (input[i] + input[16 + 
> i]), 23);
> +
> +    for (i = 16, k = 15; i < 32; i++, k--)
> +        output[i] = dca_norm((int64_t)cos_mod[i] * (input[k] - input[16 + 
> k]), 23);
> +}
> +
> +void idct_perform32_fixed(int * restrict input, int * restrict output)
> +{
> +    int mag = 0;
> +    int shift, round;
> +    int i;
> +
> +    for (i = 0; i < 32; i++)
> +        mag += abs(input[i]);
> +
> +    shift = mag > 0x400000 ? 2 : 0;
> +    round = shift > 0 ? 1 << (shift - 1) : 0;
> +
> +    for (i = 0; i < 32; i++)
> +        input[i] = (input[i] + round) >> shift;
> +
> +    sum_a(input, output +  0, 16);
> +    sum_b(input, output + 16, 16);
> +    clp_v(output, 32);
> +
> +    sum_a(output +  0, input +  0, 8);
> +    sum_b(output +  0, input +  8, 8);
> +    sum_c(output + 16, input + 16, 8);
> +    sum_d(output + 16, input + 24, 8);
> +    clp_v(input, 32);
> +
> +    dct_a(input +  0, output +  0);
> +    dct_b(input +  8, output +  8);
> +    dct_b(input + 16, output + 16);
> +    dct_b(input + 24, output + 24);
> +    clp_v(output, 32);
> +
> +    mod_a(output +  0, input +  0);
> +    mod_b(output + 16, input + 16);
> +    clp_v(input, 32);
> +
> +    mod_c(input, output);
> +
> +    for (i = 0; i < 32; i++)
> +        output[i] = dca_clip23(output[i] * (1 << shift));
> +}
> +
> +void qmf_32_subbands_fixed(int subband_samples[32][8], int 
> **subband_samples_hi, int *history,
> +                           int *pcm_samples, int nb_samples, int swich)
> +{
> +    const int32_t *filter_coeff;
> +    int input[32];
> +    int output[32];
> +    int sample;
> +
> +    // Select filter
> +    if (!swich)
> +        filter_coeff = ff_dca_fir_32bands_nonperfect_fixed;
> +    else
> +        filter_coeff = ff_dca_fir_32bands_perfect_fixed;
> +
> +    for (sample = 0; sample < nb_samples; sample++) {
> +        int i, j, k;
> +
> +        // Load in one sample from each subband
> +        for (i = 0; i < 32; i++) {
> +            input[i] = subband_samples[i][sample];
> +        }
> +
> +        // Inverse DCT
> +        idct_perform32_fixed(input, output);
> +
> +        // Store history
> +        for (i = 0, k = 31; i < 16; i++, k--) {
> +            history[     i] = dca_clip23(output[i] - output[k]);
> +            history[16 + i] = dca_clip23(output[i] + output[k]);
> +        }
> +
> +        // One subband sample generates 32 interpolated ones
> +        for (i = 0; i < 16; i++) {
> +            // Clear accumulation
> +            int64_t res = INT64_C(0);
> +
> +            // Accumulate
> +            for (j = 32; j < 512; j += 64)
> +                res += (int64_t)history[16 + i + j] * filter_coeff[i + j];
> +            res = dca_round(res, 21);
> +            for (j =  0; j < 512; j += 64)
> +                res += (int64_t)history[     i + j] * filter_coeff[i + j];
> +
> +            // Save interpolated samples
> +            pcm_samples[sample * 32 + i] = dca_clip23(dca_norm(res, 21)); // 
> * (1.0f / (1 << 24));
> +        }
> +
> +        for (i = 16, k = 15; i < 32; i++, k--) {
> +            // Clear accumulation
> +            int64_t res = INT64_C(0);
> +
> +            // Accumulate
> +            for (j = 32; j < 512; j += 64)
> +                res += (int64_t)history[16 + k + j] * filter_coeff[i + j];
> +            res = dca_round(res, 21);
> +            for (j =  0; j < 512; j += 64)
> +                res += (int64_t)history[     k + j] * filter_coeff[i + j];
> +
> +            // Save interpolated samples
> +            pcm_samples[sample * 32 + i] = dca_clip23(dca_norm(res, 21)); // 
> * (1.0f / (1 << 24));
> +        }
> +
> +        // Shift history
> +        for (i = 511; i >= 32; i--)
> +            history[i] = history[i - 32];
> +    }
> +}
> +
> +static void mod64_a(const int * restrict input, int * restrict output)
> +{
> +    int i, k;
> +    static const int cos_mod[32] = {
> +        4195568,   4205700,   4226086,    4256977,
> +        4298755,   4351949,   4417251,    4495537,
> +        4587901,   4695690,   4820557,    4964534,
> +        5130115,   5320382,   5539164,    5791261,
> +        -6082752,  -6421430,  -6817439,   -7284203,
> +        -7839855,  -8509474,  -9328732,  -10350140,
> +        -11654242, -13371208, -15725922,  -19143224,
> +        -24533560, -34264200, -57015280, -170908480
> +    };
> +
> +    for (i = 0; i < 16; i++)
> +        output[i] = dca_norm((int64_t)cos_mod[i] * (input[i] + input[16 + 
> i]), 23);
> +
> +    for (i = 16, k = 15; i < 32; i++, k--)
> +        output[i] = dca_norm((int64_t)cos_mod[i] * (input[k] - input[16 + 
> k]), 23);
> +}
> +
> +static void mod64_b(int * restrict input, int * restrict output)
> +{
> +    int i, k;
> +    static const int cos_mod[16] = {
> +        4199362,  4240198,  4323885,  4454708,
> +        4639772,  4890013,  5221943,  5660703,
> +        6245623,  7040975,  8158494,  9809974,
> +        12450076, 17261920, 28585092, 85479984
> +    };
> +
> +    for (i = 0; i < 16; i++)
> +        input[16 + i] = dca_norm((int64_t)cos_mod[i] * input[16 + i], 23);
> +
> +    for (i = 0; i < 16; i++)
> +        output[i] = input[i] + input[16 + i];
> +
> +    for (i = 16, k = 15; i < 32; i++, k--)
> +        output[i] = input[k] - input[16 + k];
> +}
> +
> +static void mod64_c(const int * restrict input, int * restrict output)
> +{
> +    int i, k;
> +    static const int cos_mod[64] = {
> +        741511,    741958,    742853,    744199,
> +        746001,    748262,    750992,    754197,
> +        757888,    762077,    766777,    772003,
> +        777772,    784105,    791021,    798546,
> +        806707,    815532,    825054,    835311,
> +        846342,    858193,    870912,    884554,
> +        899181,    914860,    931667,    949686,
> +        969011,    989747,   1012012,   1035941,
> +        -1061684,  -1089412,  -1119320,  -1151629,
> +        -1186595,  -1224511,  -1265719,  -1310613,
> +        -1359657,  -1413400,  -1472490,  -1537703,
> +        -1609974,  -1690442,  -1780506,  -1881904,
> +        -1996824,  -2128058,  -2279225,  -2455101,
> +        -2662128,  -2909200,  -3208956,  -3579983,
> +        -4050785,  -4667404,  -5509372,  -6726913,
> +        -8641940, -12091426, -20144284, -60420720
> +    };
> +
> +    for (i = 0; i < 32; i++)
> +        output[i] = dca_norm((int64_t)cos_mod[i] * (input[i] + input[32 + 
> i]), 23);
> +
> +    for (i = 32, k = 31; i < 64; i++, k--)
> +        output[i] = dca_norm((int64_t)cos_mod[i] * (input[k] - input[32 + 
> k]), 23);
> +}
> +
> +void idct_perform64_fixed(int * restrict input, int * restrict output)
> +{
> +    int mag = 0;
> +    int shift;
> +    int round;
> +    int i;
> +
> +    for (i = 0; i < 64; i++)
> +        mag += abs(input[i]);
> +
> +    shift = mag > 0x400000 ? 2 : 0;
> +    round = shift > 0 ? 1 << (shift - 1) : 0;
> +
> +    for (i = 0; i < 64; i++)
> +        input[i] = (input[i] + round) >> shift;
> +
> +    sum_a(input, output +  0, 32);
> +    sum_b(input, output + 32, 32);
> +    clp_v(output, 64);
> +
> +    sum_a(output +  0, input +  0, 16);
> +    sum_b(output +  0, input + 16, 16);
> +    sum_c(output + 32, input + 32, 16);
> +    sum_d(output + 32, input + 48, 16);
> +    clp_v(input, 64);
> +
> +    sum_a(input +  0, output +  0, 8);
> +    sum_b(input +  0, output +  8, 8);
> +    sum_c(input + 16, output + 16, 8);
> +    sum_d(input + 16, output + 24, 8);
> +    sum_c(input + 32, output + 32, 8);
> +    sum_d(input + 32, output + 40, 8);
> +    sum_c(input + 48, output + 48, 8);
> +    sum_d(input + 48, output + 56, 8);
> +    clp_v(output, 64);
> +
> +    dct_a(output +  0, input +  0);
> +    dct_b(output +  8, input +  8);
> +    dct_b(output + 16, input + 16);
> +    dct_b(output + 24, input + 24);
> +    dct_b(output + 32, input + 32);
> +    dct_b(output + 40, input + 40);
> +    dct_b(output + 48, input + 48);
> +    dct_b(output + 56, input + 56);
> +    clp_v(input, 64);
> +
> +    mod_a(input +  0, output +  0);
> +    mod_b(input + 16, output + 16);
> +    mod_b(input + 32, output + 32);
> +    mod_b(input + 48, output + 48);
> +    clp_v(output, 64);
> +
> +    mod64_a(output +  0, input +  0);
> +    mod64_b(output + 32, input + 32);
> +    clp_v(input, 64);
> +
> +    mod64_c(input, output);
> +
> +    for (i = 0; i < 64; i++)
> +        output[i] = dca_clip23(output[i] * (1 << shift));
> +}
> +
> +void qmf_64_subbands_fixed(int subband_samples[64][8], int 
> **subband_samples_hi, int *history,
> +                           int *pcm_samples, int nb_samples)
> +{
> +    int output[64];
> +    int sample;
> +
> +    // Interpolation begins
> +    for (sample = 0; sample < nb_samples; sample++) {
> +        int i, j, k;
> +
> +        // Load in one sample from each subband
> +        int input[64];
> +        if (subband_samples_hi) {
> +            // Full 64 subbands, first 32 are residual coded
> +            for (i =  0; i < 32; i++)
> +                input[i] = subband_samples[i][sample] + 
> subband_samples_hi[i][sample];
> +            for (i = 32; i < 64; i++)
> +                input[i] = subband_samples_hi[i][sample];
> +        } else {
> +            // Only first 32 subbands
> +            for (i =  0; i < 32; i++)
> +                input[i] = subband_samples[i][sample];
> +            for (i = 32; i < 64; i++)
> +                input[i] = 0;
> +        }
> +
> +        // Inverse DCT
> +        idct_perform64_fixed(input, output);
> +
> +        // Store history
> +        for (i = 0, k = 63; i < 32; i++, k--) {
> +            history[     i] = dca_clip23(output[i] - output[k]);
> +            history[32 + i] = dca_clip23(output[i] + output[k]);
> +        }
> +
> +        // One subband sample generates 64 interpolated ones
> +        for (i = 0; i < 32; i++) {
> +            // Clear accumulation
> +            int64_t res = INT64_C(0);
> +
> +            // Accumulate
> +            for (j = 64; j < 1024; j += 128)
> +                res += (int64_t)history[32 + i + j] * ff_dca_band_fir_x96[i 
> + j];
> +            res = dca_round(res, 20);
> +            for (j =  0; j < 1024; j += 128)
> +                res += (int64_t)history[     i + j] * ff_dca_band_fir_x96[i 
> + j];
> +
> +            // Save interpolated samples
> +            pcm_samples[sample * 64 + i] = dca_clip23(dca_norm(res, 20));
> +        }
> +
> +        for (i = 32, k = 31; i < 64; i++, k--) {
> +            // Clear accumulation
> +            int64_t res = INT64_C(0);
> +
> +            // Accumulate
> +            for (j = 64; j < 1024; j += 128)
> +                res += (int64_t)history[32 + k + j] * ff_dca_band_fir_x96[i 
> + j];
> +            res = dca_round(res, 20);
> +            for (j =  0; j < 1024; j += 128)
> +                res += (int64_t)history[     k + j] * ff_dca_band_fir_x96[i 
> + j];
> +
> +            // Save interpolated samples
> +            pcm_samples[sample * 64 + i] = dca_clip23(dca_norm(res, 20));
> +        }
> +
> +        // Shift history
> +        for (i = 1023; i >= 64; i--)
> +            history[i] = history[i - 64];
> +    }
> +}
> +
> +void lfe_interpolation_fir_fixed(int *pcm_samples, int *lfe_samples,
> +                                 int nb_samples, int synth_x96)
> +{
> +    int dec_factor = 64;
> +    int i, j, k;
> +
> +    // Interpolation
> +    for (i = 0; i < nb_samples; i++) {
> +        // One decimated sample generates 64 or 128 interpolated ones
> +        for (j = 0; j < dec_factor; j++) {
> +            // Clear accumulation
> +            int64_t res = INT64_C(0);
> +
> +            // Accumulate
> +            for (k = 0; k < 512 / dec_factor; k++)
> +                res += (int64_t)ff_dca_lfe_fir_64_fixed[k * dec_factor + j] *
> +                        lfe_samples[i - k];
> +
> +            // Save interpolated samples
> +            pcm_samples[(i * dec_factor + j) << synth_x96] = 
> dca_clip23(dca_norm(res, 23));
> +        }
> +    }
> +}
> diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
> index 9ea89ea..0c0e6c8 100644
> --- a/libavcodec/dcadsp.h
> +++ b/libavcodec/dcadsp.h
> @@ -14,6 +14,10 @@
>   * You should have received a copy of the GNU Lesser General Public
>   * License along with Libav; if not, write to the Free Software
>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + *
> + * The functions idct_perform32_fixed, qmf_32_subbands_fixed, 
> idct_perform64_fixed,
> + * qmf_64_subbands_fixed and the auxiliary functions they are using are 
> adapted
> + * from libdcadec, https://github.com/foo86/dcadec/tree/master/libdcadec.
>   */
>
>  #ifndef AVCODEC_DCADSP_H
> @@ -28,7 +32,7 @@
>
>
>  typedef struct DCADSPContext {
> -    void (*lfe_fir[2])(float *out, const float *in, const float *coefs);
> +    void (*lfe_fir[2])(void *out, const float *in, const float *coefs);
>      void (*qmf_32_subbands)(float 
> samples_in[DCA_SUBBANDS][SAMPLES_PER_SUBBAND], int sb_act,
>                              SynthFilterContext *synth, FFTContext *imdct,
>                              float synth_buf_ptr[512],
> @@ -48,4 +52,13 @@ void ff_dcadsp_init_aarch64(DCADSPContext *s);
>  void ff_dcadsp_init_arm(DCADSPContext *s);
>  void ff_dcadsp_init_x86(DCADSPContext *s);
>
> +void idct_perform32_fixed(int * restrict input, int * restrict output);
> +void qmf_32_subbands_fixed(int subband_samples[32][8], int 
> **subband_samples_hi,
> +                           int *history, int *pcm_samples, int nb_samples, 
> int swich);
> +void idct_perform64_fixed(int * restrict input, int * restrict output);
> +void qmf_64_subbands_fixed(int subband_samples[64][8], int 
> **subband_samples_hi,
> +                           int *history, int *pcm_samples, int nb_samples);
> +void lfe_interpolation_fir_fixed(int *pcm_samples, int *lfe_samples,
> +                                 int nb_samples, int synth_x96);
> +
>  #endif /* AVCODEC_DCADSP_H */
> diff --git a/tests/fate/audio.mak b/tests/fate/audio.mak
> index cf11e9d..5f04418 100644
> --- a/tests/fate/audio.mak
> +++ b/tests/fate/audio.mak
> @@ -22,7 +22,7 @@ fate-dca-core: CMP = oneoff
>  fate-dca-core: REF = $(SAMPLES)/dts/dts.pcm
>
>  FATE_DCA-$(CONFIG_DTS_DEMUXER) += fate-dca-xll
> -fate-dca-xll: CMD = pcm -disable_xll 0 -i 
> $(TARGET_SAMPLES)/dts/master_audio_7.1_24bit.dts
> +fate-dca-xll: CMD = pcm -disable_xll 0 -force_lossy 1 -i 
> $(TARGET_SAMPLES)/dts/master_audio_7.1_24bit.dts
>  fate-dca-xll: CMP = oneoff
>  fate-dca-xll: REF = $(SAMPLES)/dts/master_audio_7.1_24bit_2.pcm
>
> --
> 2.1.4
>


_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/2] dca: Add support for bit-exact reconstruction.

Reply via email to