Add bit-exact QMF and LFE filters, which makes it possible for XLL
streams to be decoded losslessly. The new -force_lossy option allows
to keep lossy mode for the XLL streams. This option was added to the
fate-dca-xll test to keep its decoded output the same as the refference.
---
This patch was tested with libdcadec (by foo86) used as refference decoder.

 doc/decoders.texi     |  11 ++
 libavcodec/dca.h      |   9 +-
 libavcodec/dca_exss.c |   9 +
 libavcodec/dcadec.c   | 159 +++++++++++++---
 libavcodec/dcadsp.c   | 487 ++++++++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/dcadsp.h   |  15 +-
 tests/fate/audio.mak  |   2 +-
 7 files changed, 660 insertions(+), 32 deletions(-)

diff --git a/doc/decoders.texi b/doc/decoders.texi
index 99d2008..9de3243 100644
--- a/doc/decoders.texi
+++ b/doc/decoders.texi
@@ -53,4 +53,15 @@ Loud sounds are fully compressed.  Soft sounds are enhanced.
 
 @end table
 
+@section dca
+Fixed-point reconstruction for any kind of input might be
+forced by using @code{-request_sample_fmt @var{s32p}} option.
+
+@table @option
+
+@item -force_lossy 1
+Force lossy mode for the XLL streams.
+
+@end table
+
 @c man end AUDIO DECODERS
diff --git a/libavcodec/dca.h b/libavcodec/dca.h
index 787a9c7..67945d5 100644
--- a/libavcodec/dca.h
+++ b/libavcodec/dca.h
@@ -156,6 +156,7 @@ typedef struct DCAChan {
     /* Half size is sufficient for core decoding, but for 96 kHz data
      * we need QMF with 64 subbands and 1024 samples. */
     DECLARE_ALIGNED(32, float, subband_fir_hist)[1024];
+    DECLARE_ALIGNED(32, int, subband_hist)[1024];
     DECLARE_ALIGNED(32, float, subband_fir_noidea)[64];
 
     /* Primary audio coding side information */
@@ -220,7 +221,8 @@ typedef struct DCAContext {
     uint16_t core_downmix_codes[DCA_PRIM_CHANNELS_MAX + 1][4];   ///< embedded 
downmix coefficients (9-bit codes)
 
 
-    float lfe_data[2 * DCA_LFE_MAX * (DCA_BLOCKS_MAX + 4)];      ///< Low 
frequency effect data
+    int lfe_data[2 * DCA_LFE_MAX * (DCA_BLOCKS_MAX + 4)];      ///< Low 
frequency effect data
+    float lfe_data_flt[2 * DCA_LFE_MAX * (DCA_BLOCKS_MAX + 4)];
     int lfe_scale_factor;
 
     /* Subband samples history (for ADPCM) */
@@ -230,7 +232,7 @@ typedef struct DCAContext {
 
     int output;                 ///< type of output
 
-    float *samples_chanptr[DCA_PRIM_CHANNELS_MAX + 1];
+    void *samples_chanptr[DCA_PRIM_CHANNELS_MAX + 1];
     float *extra_channels[DCA_PRIM_CHANNELS_MAX + 1];
     uint8_t *extra_channels_buffer;
     unsigned int extra_channels_buffer_size;
@@ -247,6 +249,9 @@ typedef struct DCAContext {
     int core_ext_mask;          ///< present extensions in the core substream
     int exss_ext_mask;          ///< Non-core extensions
 
+    int fixed;                  ///< force using fixedpoint QMF
+    int lossy;                  ///< force lossy decoding for the XLL stream
+
     /* XCh extension information */
     int xch_present;            ///< XCh extension present and valid
     int xch_base_channel;       ///< index of first (only) channel containing 
XCH data
diff --git a/libavcodec/dca_exss.c b/libavcodec/dca_exss.c
index 2895e20..648c126 100644
--- a/libavcodec/dca_exss.c
+++ b/libavcodec/dca_exss.c
@@ -22,6 +22,7 @@
 #include "libavutil/log.h"
 
 #include "dca.h"
+#include "dcadata.h"
 #include "dca_syncwords.h"
 #include "get_bits.h"
 
@@ -343,6 +344,14 @@ void ff_dca_exss_parse_header(DCAContext *s)
                            "DTS-XLL: ignoring XLL extension\n");
                     break;
                 }
+                av_log(s->avctx, AV_LOG_ERROR,
+                       "bps = %d\n", 
ff_dca_bits_per_sample[s->source_pcm_res]);
+
+                /* Do not change the sample format for the case XLL stream is 
decoded
+                 * in a lossy mode. */
+                if (!s->lossy)
+                    s->avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
+                s->avctx->bits_per_raw_sample = 
ff_dca_bits_per_sample[s->source_pcm_res];
                 av_log(s->avctx, AV_LOG_DEBUG,
                        "DTS-XLL: decoding XLL extension\n");
                 if (ff_dca_xll_decode_header(s)        == 0 &&
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
index 43992dd..da0ae90 100644
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@@ -44,6 +44,7 @@
 #include "dcadata.h"
 #include "dcadsp.h"
 #include "dcahuff.h"
+#include "dcamath.h"
 #include "fft.h"
 #include "fmtconvert.h"
 #include "get_bits.h"
@@ -520,7 +521,8 @@ static int dca_subframe_header(DCAContext *s, int 
base_channel, int block_index)
         /* LFE samples */
         int lfe_samples    = 2 * s->lfe * (4 + block_index);
         int lfe_end_sample = 2 * s->lfe * (4 + block_index + 
s->subsubframes[s->current_subframe]);
-        float lfe_scale;
+        float lfe_scale_flt;
+        int lfe_scale;
 
         for (j = lfe_samples; j < lfe_end_sample; j++) {
             /* Signed 8 bits int */
@@ -532,10 +534,14 @@ static int dca_subframe_header(DCAContext *s, int 
base_channel, int block_index)
         s->lfe_scale_factor = ff_dca_scale_factor_quant7[get_bits(&s->gb, 7)];
 
         /* Quantization step size * scale factor */
-        lfe_scale = 0.035 * s->lfe_scale_factor;
+        lfe_scale_flt = 0.035 * s->lfe_scale_factor;
+        /* 4697620 is 24-bit fixedpoint representation of 0.035 */
+        lfe_scale     = dca_norm((int64_t)4697620 * s->lfe_scale_factor, 23);
 
-        for (j = lfe_samples; j < lfe_end_sample; j++)
-            s->lfe_data[j] *= lfe_scale;
+        for (j = lfe_samples; j < lfe_end_sample; j++) {
+            s->lfe_data_flt[j] = lfe_scale_flt * s->lfe_data[j];
+            s->lfe_data[j]     = dca_clip23((s->lfe_data[j] * lfe_scale) >> 4);
+        }
     }
 
     return 0;
@@ -932,11 +938,24 @@ static int dca_subsubframe(DCAContext *s, int 
base_channel, int block_index)
     return 0;
 }
 
-static int dca_filter_channels(DCAContext *s, int block_index, int upsample)
+static int dca_qmf_filters(DCAContext *s, int block_index, int upsample)
 {
     int k;
 
-    if (upsample) {
+    // for the 96 kHz lossless
+    if (s->fixed && upsample) {
+        int **subband_samples_hi = NULL;
+
+        for (k = 0; k < s->audio_header.prim_channels; k++) {
+            int (*subband_samples)[SAMPLES_PER_SUBBAND] =
+                s->dca_chan[k].subband_samples[block_index];
+            int *samples_out = s->samples_chanptr[s->channel_order_tab[k]];
+
+            qmf_64_subbands_fixed(subband_samples, subband_samples_hi,
+                                  s->dca_chan[k].subband_hist, samples_out, 8);
+        }
+      // for the 96 kHz lossy
+    } else if (upsample) {
         LOCAL_ALIGNED(32, float, samples, [DCA_SUBBANDS_X96K], 
[SAMPLES_PER_SUBBAND]);
 
         if (!s->qmf64_table) {
@@ -945,7 +964,6 @@ static int dca_filter_channels(DCAContext *s, int 
block_index, int upsample)
                 return AVERROR(ENOMEM);
         }
 
-        /* 64 subbands QMF */
         for (k = 0; k < s->audio_header.prim_channels; k++) {
             int32_t (*subband_samples)[SAMPLES_PER_SUBBAND] =
                      s->dca_chan[k].subband_samples[block_index];
@@ -959,6 +977,20 @@ static int dca_filter_channels(DCAContext *s, int 
block_index, int upsample)
                                 /* Upsampling needs a factor 2 here. */
                                 M_SQRT2 / 32768.0);
         }
+      // for the 48 kHz lossless
+    } else if (s->fixed) {
+        for (k = 0; k < s->audio_header.prim_channels; k++) {
+            int (*subband_samples)[SAMPLES_PER_SUBBAND] =
+                s->dca_chan[k].subband_samples[block_index];
+            int **subband_samples_hi = NULL;
+            int *samples_out = s->samples_chanptr[s->channel_order_tab[k]];
+
+            qmf_32_subbands_fixed(subband_samples, subband_samples_hi,
+                                  s->dca_chan[k].subband_hist,
+                                  samples_out, SAMPLES_PER_SUBBAND,
+                                  s->multirate_inter);
+        }
+      // for the 48 kHz lossy
     } else {
         /* 32 subbands QMF */
         LOCAL_ALIGNED(32, float, samples, [DCA_SUBBANDS], 
[SAMPLES_PER_SUBBAND]);
@@ -977,11 +1009,21 @@ static int dca_filter_channels(DCAContext *s, int 
block_index, int upsample)
         }
     }
 
-    /* Generate LFE samples for this subsubframe FIXME!!! */
-    if (s->lfe) {
+    return 0;
+}
+
+static void dca_generate_lfe(DCAContext *s, int block_index, int upsample)
+{
+    if (s->fixed) {
+        int *samples = s->samples_chanptr[ff_dca_lfe_index[s->amode]];
+        int synth_x96 = 0; // X96 synthesis flag should be set if X96 would be 
implemented
+        int *lfe = s->lfe_data + 2 * s->lfe * (block_index + 4);
+
+        lfe_interpolation_fir_fixed(samples, lfe, 2 * s->lfe, synth_x96);
+    } else {
         float *samples = s->samples_chanptr[ff_dca_lfe_index[s->amode]];
         lfe_interpolation_fir(s,
-                              s->lfe_data + 2 * s->lfe * (block_index + 4),
+                              s->lfe_data_flt + 2 * s->lfe * (block_index + 4),
                               samples);
         if (upsample) {
             unsigned i;
@@ -994,13 +1036,45 @@ static int dca_filter_channels(DCAContext *s, int 
block_index, int upsample)
             samples[1] = samples[0];
         }
     }
+}
+
+static int dca_filter_channels(DCAContext *s, int block_index, int upsample)
+{
+    int ret, k;
+
+    /* Choose suitable QMF filter. */
+    ret = dca_qmf_filters(s, block_index, upsample);
+    if (ret < 0)
+        return ret;
+
+    /* Generate LFE samples for this subsubframe FIXME!!! */
+    if (s->lfe)
+        dca_generate_lfe(s, block_index, upsample);
+
+    /* Fixed-point QMF outputs 24-bit samples but libavcodec
+     * supports 32-bit */
+    if (s->fixed) {
+        int nb_chans = s->lfe ? s->audio_header.prim_channels + 1 :
+            s->audio_header.prim_channels;
+        int subbands = upsample ? DCA_SUBBANDS_X96K : DCA_SUBBANDS;
+        int nb_samples = SAMPLES_PER_SUBBAND * subbands;
+
+        for (k = 0; k < nb_chans; k++) {
+            int *samples = s->samples_chanptr[k];
+            int i;
+
+            for (i = 0; i < nb_samples; i++)
+                samples[i] <<= 8;
+        }
+    }
 
     /* FIXME: This downmixing is probably broken with upsample.
-     * Probably totally broken also with XLL in general. */
-    /* Downmixing to Stereo */
-    if (s->audio_header.prim_channels + !!s->lfe > 2 &&
-        s->avctx->request_channel_layout == AV_CH_LAYOUT_STEREO) {
-        dca_downmix(s->samples_chanptr, s->amode, !!s->lfe, s->downmix_coef,
+     * Downmixing to Stereo. */
+    if ((!s->fixed && s->audio_header.prim_channels + !!s->lfe > 2 &&
+         s->avctx->request_channel_layout == AV_CH_LAYOUT_STEREO)) {
+        float **samples = (float **)s->samples_chanptr;
+
+        dca_downmix(samples, s->amode, !!s->lfe, s->downmix_coef,
                     s->channel_order_tab);
     }
 
@@ -1355,6 +1429,15 @@ static int set_channel_layout(AVCodecContext *avctx, int 
channels, int num_core_
     return 0;
 }
 
+/* Multiply int vector src with scalar mul and add it to destination vector 
dst. */
+static void vector_by_scalar(int *dst, const int *src, int mul, int len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        dst[i] += src[i] * (int64_t)mul + 0x8000 >> 16;
+}
+
 /**
  * Main frame decoding function
  * FIXME add arguments
@@ -1369,7 +1452,6 @@ static int dca_decode_frame(AVCodecContext *avctx, void 
*data,
     int lfe_samples;
     int num_core_channels = 0;
     int i, ret;
-    float  **samples_flt;
     DCAContext *s = avctx->priv_data;
     int channels, full_channels;
     int upsample = 0;
@@ -1437,6 +1519,8 @@ static int dca_decode_frame(AVCodecContext *avctx, void 
*data,
                    xll_nb_samples, frame->nb_samples);
             s->exss_ext_mask &= ~DCA_EXT_EXSS_XLL;
         } else {
+            if (!s->lossy)
+                s->fixed = 1;
             if (2 * frame->nb_samples == xll_nb_samples) {
                 av_log(s->avctx, AV_LOG_INFO,
                        "XLL: upsampling core channels by a factor of 2\n");
@@ -1463,7 +1547,6 @@ static int dca_decode_frame(AVCodecContext *avctx, void 
*data,
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         return ret;
     }
-    samples_flt = (float **) frame->extended_data;
 
     /* allocate buffer for extra channels if downmixing */
     if (avctx->channels < full_channels) {
@@ -1477,7 +1560,6 @@ static int dca_decode_frame(AVCodecContext *avctx, void 
*data,
                        &s->extra_channels_buffer_size, ret);
         if (!s->extra_channels_buffer)
             return AVERROR(ENOMEM);
-
         ret = av_samples_fill_arrays((uint8_t **) s->extra_channels, NULL,
                                      s->extra_channels_buffer,
                                      full_channels - channels,
@@ -1491,27 +1573,40 @@ static int dca_decode_frame(AVCodecContext *avctx, void 
*data,
         int ch;
         unsigned block = upsample ? 512 : 256;
         for (ch = 0; ch < channels; ch++)
-            s->samples_chanptr[ch] = samples_flt[ch] + i * block;
-        for (; ch < full_channels; ch++)
+            s->samples_chanptr[ch] = (int *)frame->extended_data[ch] + i * 
block;
+        for (; ch < full_channels; ch++) {
             s->samples_chanptr[ch] = s->extra_channels[ch - channels] + i * 
block;
+        }
 
         dca_filter_channels(s, i, upsample);
 
         /* If this was marked as a DTS-ES stream we need to subtract back- */
         /* channel from SL & SR to remove matrixed back-channel signal */
         if ((s->source_pcm_res & 1) && s->xch_present) {
-            float *back_chan = 
s->samples_chanptr[s->channel_order_tab[s->xch_base_channel]];
-            float *lt_chan   = 
s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 2]];
-            float *rt_chan   = 
s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 1]];
-            s->fdsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 256);
-            s->fdsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 256);
+            if (s->fixed) {
+                int *back_chan = 
s->samples_chanptr[s->channel_order_tab[s->xch_base_channel]];
+                int *lt_chan   = 
s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 2]];
+                int *rt_chan   = 
s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 1]];
+                vector_by_scalar(lt_chan, back_chan,
+                                 (int)(M_SQRT1_2 * -0x10000), 256);
+                vector_by_scalar(rt_chan, back_chan,
+                                 (int)(M_SQRT1_2 * -0x10000), 256);
+            } else {
+                float *back_chan = 
s->samples_chanptr[s->channel_order_tab[s->xch_base_channel]];
+                float *lt_chan   = 
s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 2]];
+                float *rt_chan   = 
s->samples_chanptr[s->channel_order_tab[s->xch_base_channel - 1]];
+                s->fdsp.vector_fmac_scalar(lt_chan, back_chan, -M_SQRT1_2, 
256);
+                s->fdsp.vector_fmac_scalar(rt_chan, back_chan, -M_SQRT1_2, 
256);
+            }
         }
     }
 
     /* update lfe history */
     lfe_samples = 2 * s->lfe * (s->sample_blocks / SAMPLES_PER_SUBBAND);
-    for (i = 0; i < 2 * s->lfe * 4; i++)
-        s->lfe_data[i] = s->lfe_data[i + lfe_samples];
+    for (i = 0; i < 2 * s->lfe * 4; i++) {
+        s->lfe_data_flt[i] = s->lfe_data_flt[i + lfe_samples];
+        s->lfe_data[i]     = s->lfe_data[i + lfe_samples];
+    }
 
     if (s->exss_ext_mask & DCA_EXT_EXSS_XLL) {
         ret = ff_dca_xll_decode_audio(s, frame);
@@ -1551,7 +1646,13 @@ static av_cold int dca_decode_init(AVCodecContext *avctx)
     ff_dcadsp_init(&s->dcadsp);
     ff_fmt_convert_init(&s->fmt_conv, avctx);
 
-    avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
+    if (avctx->request_sample_fmt == AV_SAMPLE_FMT_S32P) {
+        s->fixed = 1;
+        avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
+    } else {
+        s->fixed = 0;
+        avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
+    }
 
     /* allow downmixing to stereo */
     if (avctx->channels > 2 &&
@@ -1574,6 +1675,7 @@ static av_cold int dca_decode_end(AVCodecContext *avctx)
 static const AVOption options[] = {
     { "disable_xch", "disable decoding of the XCh extension", 
offsetof(DCAContext, xch_disable), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, 
AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM },
     { "disable_xll", "disable decoding of the XLL extension", 
offsetof(DCAContext, xll_disable), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, 
AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM },
+    { "force_lossy", "force lossy XLL decoding",              
offsetof(DCAContext, lossy), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, 
AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM },
     { NULL },
 };
 
@@ -1595,6 +1697,7 @@ AVCodec ff_dca_decoder = {
     .close           = dca_decode_end,
     .capabilities    = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
     .sample_fmts     = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
+                                                       AV_SAMPLE_FMT_S32P,
                                                        AV_SAMPLE_FMT_NONE },
     .profiles        = NULL_IF_CONFIG_SMALL(ff_dca_profiles),
     .priv_class      = &dca_decoder_class,
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
index beec200..3bef29f 100644
--- a/libavcodec/dcadsp.c
+++ b/libavcodec/dcadsp.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004 Gildas Bazin
  * Copyright (c) 2010 Mans Rullgard <[email protected]>
+ * Copyright (c) 2015 foo86
  *
  * This file is part of Libav.
  *
@@ -17,14 +18,21 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with Libav; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * The functions idct_perform32_fixed, qmf_32_subbands_fixed, 
idct_perform64_fixed,
+ * qmf_64_subbands_fixed, lfe_interpolation_fir_fixed and the auxiliary 
functions
+ * they are using (mod*, sub*, clp*) are adapted from libdcadec,
+ * https://github.com/foo86/dcadec/tree/master/libdcadec.
  */
 
+#include <stdio.h>
 #include "config.h"
 
 #include "libavutil/attributes.h"
 #include "libavutil/intreadwrite.h"
 
 #include "dcadsp.h"
+#include "dcadata.h"
 #include "dcamath.h"
 
 static void decode_hf_c(int32_t dst[DCA_SUBBANDS][SAMPLES_PER_SUBBAND],
@@ -132,3 +140,482 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
     if (ARCH_X86)
         ff_dcadsp_init_x86(s);
 }
+
+static void sum_a(const int * restrict input, int * restrict output, int len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        output[i] = input[2 * i] + input[2 * i + 1];
+}
+
+static void sum_b(const int * restrict input, int * restrict output, int len)
+{
+    int i;
+
+    output[0] = input[0];
+    for (i = 1; i < len; i++)
+        output[i] = input[2 * i] + input[2 * i - 1];
+}
+
+static void sum_c(const int * restrict input, int * restrict output, int len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        output[i] = input[2 * i];
+}
+
+static void sum_d(const int * restrict input, int * restrict output, int len)
+{
+    int i;
+
+    output[0] = input[1];
+    for (i = 1; i < len; i++)
+        output[i] = input[2 * i - 1] + input[2 * i + 1];
+}
+
+static void clp_v(int *input, int len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        input[i] = dca_clip23(input[i]);
+}
+
+static void dct_a(const int * restrict input, int * restrict output)
+{
+    int i, j;
+    static const int cos_mod[8][8] = {
+        { 8348215,  8027397,  7398092,  6484482,  5321677,  3954362,  2435084, 
  822227 },
+        { 8027397,  5321677,   822227, -3954362, -7398092, -8348215, -6484482, 
-2435084 },
+        { 7398092,   822227, -6484482, -8027397, -2435084,  5321677,  8348215, 
 3954362 },
+        { 6484482, -3954362, -8027397,   822227,  8348215,  2435084, -7398092, 
-5321677 },
+        { 5321677, -7398092, -2435084,  8348215,  -822227, -8027397,  3954362, 
 6484482 },
+        { 3954362, -8348215,  5321677,  2435084, -8027397,  6484482,   822227, 
-7398092 },
+        { 2435084, -6484482,  8348215, -7398092,  3954362,   822227, -5321677, 
 8027397 },
+        {  822227, -2435084,  3954362, -5321677,  6484482, -7398092,  8027397, 
-8348215 }
+    };
+
+    for (i = 0; i < 8; i++) {
+        int64_t res = INT64_C(0);
+        for (j = 0; j < 8; j++)
+            res += (int64_t)cos_mod[i][j] * input[j];
+        output[i] = dca_norm(res, 23);
+    }
+}
+
+static void dct_b(const int * restrict input, int * restrict output)
+{
+    int i, j;
+    static const int cos_mod[8][7] = {
+        {  8227423,  7750063,  6974873,  5931642,  4660461,  3210181,  1636536 
},
+        {  6974873,  3210181, -1636536, -5931642, -8227423, -7750063, -4660461 
},
+        {  4660461, -3210181, -8227423, -5931642,  1636536,  7750063,  6974873 
},
+        {  1636536, -7750063, -4660461,  5931642,  6974873, -3210181, -8227423 
},
+        { -1636536, -7750063,  4660461,  5931642, -6974873, -3210181,  8227423 
},
+        { -4660461, -3210181,  8227423, -5931642, -1636536,  7750063, -6974873 
},
+        { -6974873,  3210181,  1636536, -5931642,  8227423, -7750063,  4660461 
},
+        { -8227423,  7750063, -6974873,  5931642, -4660461,  3210181, -1636536 
}
+    };
+
+    for (i = 0; i < 8; i++) {
+        int64_t res = (int64_t)input[0] * (1 << 23);
+        for (j = 0; j < 7; j++)
+            res += (int64_t)cos_mod[i][j] * input[1 + j];
+        output[i] = dca_norm(res, 23);
+    }
+}
+
+static void mod_a(const int * restrict input, int * restrict output)
+{
+    int i, k;
+    static const int cos_mod[16] = {
+        4199362,   4240198,   4323885,   4454708,
+        4639772,   4890013,   5221943,   5660703,
+        -6245623,  -7040975,  -8158494,  -9809974,
+        -12450076, -17261920, -28585092, -85479984
+    };
+
+    for (i = 0; i < 8; i++)
+        output[i] = dca_norm((int64_t)cos_mod[i] * (input[i] + input[8 + i]), 
23);
+
+    for (i = 8, k = 7; i < 16; i++, k--)
+        output[i] = dca_norm((int64_t)cos_mod[i] * (input[k] - input[8 + k]), 
23);
+}
+
+static void mod_b(int * restrict input, int * restrict output)
+{
+    int i, k;
+    static const int cos_mod[8] = {
+        4214598,  4383036,  4755871,  5425934,
+        6611520,  8897610, 14448934, 42791536
+    };
+
+    for (i = 0; i < 8; i++)
+        input[8 + i] = dca_norm((int64_t)cos_mod[i] * input[8 + i], 23);
+
+    for (i = 0; i < 8; i++)
+        output[i] = input[i] + input[8 + i];
+
+    for (i = 8, k = 7; i < 16; i++, k--)
+        output[i] = input[k] - input[8 + k];
+}
+
+static void mod_c(const int * restrict input, int * restrict output)
+{
+    int i, k;
+    static const int cos_mod[32] = {
+        1048892,  1051425,   1056522,   1064244,
+        1074689,  1087987,   1104313,   1123884,
+        1146975,  1173922,   1205139,   1241133,
+        1282529,  1330095,   1384791,   1447815,
+        -1520688, -1605358,  -1704360,  -1821051,
+        -1959964, -2127368,  -2332183,  -2587535,
+        -2913561, -3342802,  -3931480,  -4785806,
+        -6133390, -8566050, -14253820, -42727120
+    };
+
+    for (i = 0; i < 16; i++)
+        output[i] = dca_norm((int64_t)cos_mod[i] * (input[i] + input[16 + i]), 
23);
+
+    for (i = 16, k = 15; i < 32; i++, k--)
+        output[i] = dca_norm((int64_t)cos_mod[i] * (input[k] - input[16 + k]), 
23);
+}
+
+void idct_perform32_fixed(int * restrict input, int * restrict output)
+{
+    int mag = 0;
+    int shift, round;
+    int i;
+
+    for (i = 0; i < 32; i++)
+        mag += abs(input[i]);
+
+    shift = mag > 0x400000 ? 2 : 0;
+    round = shift > 0 ? 1 << (shift - 1) : 0;
+
+    for (i = 0; i < 32; i++)
+        input[i] = (input[i] + round) >> shift;
+
+    sum_a(input, output +  0, 16);
+    sum_b(input, output + 16, 16);
+    clp_v(output, 32);
+
+    sum_a(output +  0, input +  0, 8);
+    sum_b(output +  0, input +  8, 8);
+    sum_c(output + 16, input + 16, 8);
+    sum_d(output + 16, input + 24, 8);
+    clp_v(input, 32);
+
+    dct_a(input +  0, output +  0);
+    dct_b(input +  8, output +  8);
+    dct_b(input + 16, output + 16);
+    dct_b(input + 24, output + 24);
+    clp_v(output, 32);
+
+    mod_a(output +  0, input +  0);
+    mod_b(output + 16, input + 16);
+    clp_v(input, 32);
+
+    mod_c(input, output);
+
+    for (i = 0; i < 32; i++)
+        output[i] = dca_clip23(output[i] * (1 << shift));
+}
+
+void qmf_32_subbands_fixed(int subband_samples[32][8], int 
**subband_samples_hi, int *history,
+                           int *pcm_samples, int nb_samples, int swich)
+{
+    const int32_t *filter_coeff;
+    int input[32];
+    int output[32];
+    int sample;
+
+    // Select filter
+    if (!swich)
+        filter_coeff = ff_dca_fir_32bands_nonperfect_fixed;
+    else
+        filter_coeff = ff_dca_fir_32bands_perfect_fixed;
+
+    for (sample = 0; sample < nb_samples; sample++) {
+        int i, j, k;
+
+        // Load in one sample from each subband
+        for (i = 0; i < 32; i++) {
+            input[i] = subband_samples[i][sample];
+        }
+
+        // Inverse DCT
+        idct_perform32_fixed(input, output);
+
+        // Store history
+        for (i = 0, k = 31; i < 16; i++, k--) {
+            history[     i] = dca_clip23(output[i] - output[k]);
+            history[16 + i] = dca_clip23(output[i] + output[k]);
+        }
+
+        // One subband sample generates 32 interpolated ones
+        for (i = 0; i < 16; i++) {
+            // Clear accumulation
+            int64_t res = INT64_C(0);
+
+            // Accumulate
+            for (j = 32; j < 512; j += 64)
+                res += (int64_t)history[16 + i + j] * filter_coeff[i + j];
+            res = dca_round(res, 21);
+            for (j =  0; j < 512; j += 64)
+                res += (int64_t)history[     i + j] * filter_coeff[i + j];
+
+            // Save interpolated samples
+            pcm_samples[sample * 32 + i] = dca_clip23(dca_norm(res, 21)); // * 
(1.0f / (1 << 24));
+        }
+
+        for (i = 16, k = 15; i < 32; i++, k--) {
+            // Clear accumulation
+            int64_t res = INT64_C(0);
+
+            // Accumulate
+            for (j = 32; j < 512; j += 64)
+                res += (int64_t)history[16 + k + j] * filter_coeff[i + j];
+            res = dca_round(res, 21);
+            for (j =  0; j < 512; j += 64)
+                res += (int64_t)history[     k + j] * filter_coeff[i + j];
+
+            // Save interpolated samples
+            pcm_samples[sample * 32 + i] = dca_clip23(dca_norm(res, 21)); // * 
(1.0f / (1 << 24));
+        }
+
+        // Shift history
+        for (i = 511; i >= 32; i--)
+            history[i] = history[i - 32];
+    }
+}
+
+static void mod64_a(const int * restrict input, int * restrict output)
+{
+    int i, k;
+    static const int cos_mod[32] = {
+        4195568,   4205700,   4226086,    4256977,
+        4298755,   4351949,   4417251,    4495537,
+        4587901,   4695690,   4820557,    4964534,
+        5130115,   5320382,   5539164,    5791261,
+        -6082752,  -6421430,  -6817439,   -7284203,
+        -7839855,  -8509474,  -9328732,  -10350140,
+        -11654242, -13371208, -15725922,  -19143224,
+        -24533560, -34264200, -57015280, -170908480
+    };
+
+    for (i = 0; i < 16; i++)
+        output[i] = dca_norm((int64_t)cos_mod[i] * (input[i] + input[16 + i]), 
23);
+
+    for (i = 16, k = 15; i < 32; i++, k--)
+        output[i] = dca_norm((int64_t)cos_mod[i] * (input[k] - input[16 + k]), 
23);
+}
+
+static void mod64_b(int * restrict input, int * restrict output)
+{
+    int i, k;
+    static const int cos_mod[16] = {
+        4199362,  4240198,  4323885,  4454708,
+        4639772,  4890013,  5221943,  5660703,
+        6245623,  7040975,  8158494,  9809974,
+        12450076, 17261920, 28585092, 85479984
+    };
+
+    for (i = 0; i < 16; i++)
+        input[16 + i] = dca_norm((int64_t)cos_mod[i] * input[16 + i], 23);
+
+    for (i = 0; i < 16; i++)
+        output[i] = input[i] + input[16 + i];
+
+    for (i = 16, k = 15; i < 32; i++, k--)
+        output[i] = input[k] - input[16 + k];
+}
+
+static void mod64_c(const int * restrict input, int * restrict output)
+{
+    int i, k;
+    static const int cos_mod[64] = {
+        741511,    741958,    742853,    744199,
+        746001,    748262,    750992,    754197,
+        757888,    762077,    766777,    772003,
+        777772,    784105,    791021,    798546,
+        806707,    815532,    825054,    835311,
+        846342,    858193,    870912,    884554,
+        899181,    914860,    931667,    949686,
+        969011,    989747,   1012012,   1035941,
+        -1061684,  -1089412,  -1119320,  -1151629,
+        -1186595,  -1224511,  -1265719,  -1310613,
+        -1359657,  -1413400,  -1472490,  -1537703,
+        -1609974,  -1690442,  -1780506,  -1881904,
+        -1996824,  -2128058,  -2279225,  -2455101,
+        -2662128,  -2909200,  -3208956,  -3579983,
+        -4050785,  -4667404,  -5509372,  -6726913,
+        -8641940, -12091426, -20144284, -60420720
+    };
+
+    for (i = 0; i < 32; i++)
+        output[i] = dca_norm((int64_t)cos_mod[i] * (input[i] + input[32 + i]), 
23);
+
+    for (i = 32, k = 31; i < 64; i++, k--)
+        output[i] = dca_norm((int64_t)cos_mod[i] * (input[k] - input[32 + k]), 
23);
+}
+
+void idct_perform64_fixed(int * restrict input, int * restrict output)
+{
+    int mag = 0;
+    int shift;
+    int round;
+    int i;
+
+    for (i = 0; i < 64; i++)
+        mag += abs(input[i]);
+
+    shift = mag > 0x400000 ? 2 : 0;
+    round = shift > 0 ? 1 << (shift - 1) : 0;
+
+    for (i = 0; i < 64; i++)
+        input[i] = (input[i] + round) >> shift;
+
+    sum_a(input, output +  0, 32);
+    sum_b(input, output + 32, 32);
+    clp_v(output, 64);
+
+    sum_a(output +  0, input +  0, 16);
+    sum_b(output +  0, input + 16, 16);
+    sum_c(output + 32, input + 32, 16);
+    sum_d(output + 32, input + 48, 16);
+    clp_v(input, 64);
+
+    sum_a(input +  0, output +  0, 8);
+    sum_b(input +  0, output +  8, 8);
+    sum_c(input + 16, output + 16, 8);
+    sum_d(input + 16, output + 24, 8);
+    sum_c(input + 32, output + 32, 8);
+    sum_d(input + 32, output + 40, 8);
+    sum_c(input + 48, output + 48, 8);
+    sum_d(input + 48, output + 56, 8);
+    clp_v(output, 64);
+
+    dct_a(output +  0, input +  0);
+    dct_b(output +  8, input +  8);
+    dct_b(output + 16, input + 16);
+    dct_b(output + 24, input + 24);
+    dct_b(output + 32, input + 32);
+    dct_b(output + 40, input + 40);
+    dct_b(output + 48, input + 48);
+    dct_b(output + 56, input + 56);
+    clp_v(input, 64);
+
+    mod_a(input +  0, output +  0);
+    mod_b(input + 16, output + 16);
+    mod_b(input + 32, output + 32);
+    mod_b(input + 48, output + 48);
+    clp_v(output, 64);
+
+    mod64_a(output +  0, input +  0);
+    mod64_b(output + 32, input + 32);
+    clp_v(input, 64);
+
+    mod64_c(input, output);
+
+    for (i = 0; i < 64; i++)
+        output[i] = dca_clip23(output[i] * (1 << shift));
+}
+
+void qmf_64_subbands_fixed(int subband_samples[64][8], int 
**subband_samples_hi, int *history,
+                           int *pcm_samples, int nb_samples)
+{
+    int output[64];
+    int sample;
+
+    // Interpolation begins
+    for (sample = 0; sample < nb_samples; sample++) {
+        int i, j, k;
+
+        // Load in one sample from each subband
+        int input[64];
+        if (subband_samples_hi) {
+            // Full 64 subbands, first 32 are residual coded
+            for (i =  0; i < 32; i++)
+                input[i] = subband_samples[i][sample] + 
subband_samples_hi[i][sample];
+            for (i = 32; i < 64; i++)
+                input[i] = subband_samples_hi[i][sample];
+        } else {
+            // Only first 32 subbands
+            for (i =  0; i < 32; i++)
+                input[i] = subband_samples[i][sample];
+            for (i = 32; i < 64; i++)
+                input[i] = 0;
+        }
+
+        // Inverse DCT
+        idct_perform64_fixed(input, output);
+
+        // Store history
+        for (i = 0, k = 63; i < 32; i++, k--) {
+            history[     i] = dca_clip23(output[i] - output[k]);
+            history[32 + i] = dca_clip23(output[i] + output[k]);
+        }
+
+        // One subband sample generates 64 interpolated ones
+        for (i = 0; i < 32; i++) {
+            // Clear accumulation
+            int64_t res = INT64_C(0);
+
+            // Accumulate
+            for (j = 64; j < 1024; j += 128)
+                res += (int64_t)history[32 + i + j] * ff_dca_band_fir_x96[i + 
j];
+            res = dca_round(res, 20);
+            for (j =  0; j < 1024; j += 128)
+                res += (int64_t)history[     i + j] * ff_dca_band_fir_x96[i + 
j];
+
+            // Save interpolated samples
+            pcm_samples[sample * 64 + i] = dca_clip23(dca_norm(res, 20));
+        }
+
+        for (i = 32, k = 31; i < 64; i++, k--) {
+            // Clear accumulation
+            int64_t res = INT64_C(0);
+
+            // Accumulate
+            for (j = 64; j < 1024; j += 128)
+                res += (int64_t)history[32 + k + j] * ff_dca_band_fir_x96[i + 
j];
+            res = dca_round(res, 20);
+            for (j =  0; j < 1024; j += 128)
+                res += (int64_t)history[     k + j] * ff_dca_band_fir_x96[i + 
j];
+
+            // Save interpolated samples
+            pcm_samples[sample * 64 + i] = dca_clip23(dca_norm(res, 20));
+        }
+
+        // Shift history
+        for (i = 1023; i >= 64; i--)
+            history[i] = history[i - 64];
+    }
+}
+
+void lfe_interpolation_fir_fixed(int *pcm_samples, int *lfe_samples,
+                                 int nb_samples, int synth_x96)
+{
+    int dec_factor = 64;
+    int i, j, k;
+
+    // Interpolation
+    for (i = 0; i < nb_samples; i++) {
+        // One decimated sample generates 64 or 128 interpolated ones
+        for (j = 0; j < dec_factor; j++) {
+            // Clear accumulation
+            int64_t res = INT64_C(0);
+
+            // Accumulate
+            for (k = 0; k < 512 / dec_factor; k++)
+                res += (int64_t)ff_dca_lfe_fir_64_fixed[k * dec_factor + j] *
+                        lfe_samples[i - k];
+
+            // Save interpolated samples
+            pcm_samples[(i * dec_factor + j) << synth_x96] = 
dca_clip23(dca_norm(res, 23));
+        }
+    }
+}
diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
index 9ea89ea..0c0e6c8 100644
--- a/libavcodec/dcadsp.h
+++ b/libavcodec/dcadsp.h
@@ -14,6 +14,10 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with Libav; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * The functions idct_perform32_fixed, qmf_32_subbands_fixed, 
idct_perform64_fixed,
+ * qmf_64_subbands_fixed and the auxiliary functions they are using are adapted
+ * from libdcadec, https://github.com/foo86/dcadec/tree/master/libdcadec.
  */
 
 #ifndef AVCODEC_DCADSP_H
@@ -28,7 +32,7 @@
 
 
 typedef struct DCADSPContext {
-    void (*lfe_fir[2])(float *out, const float *in, const float *coefs);
+    void (*lfe_fir[2])(void *out, const float *in, const float *coefs);
     void (*qmf_32_subbands)(float 
samples_in[DCA_SUBBANDS][SAMPLES_PER_SUBBAND], int sb_act,
                             SynthFilterContext *synth, FFTContext *imdct,
                             float synth_buf_ptr[512],
@@ -48,4 +52,13 @@ void ff_dcadsp_init_aarch64(DCADSPContext *s);
 void ff_dcadsp_init_arm(DCADSPContext *s);
 void ff_dcadsp_init_x86(DCADSPContext *s);
 
+void idct_perform32_fixed(int * restrict input, int * restrict output);
+void qmf_32_subbands_fixed(int subband_samples[32][8], int 
**subband_samples_hi,
+                           int *history, int *pcm_samples, int nb_samples, int 
swich);
+void idct_perform64_fixed(int * restrict input, int * restrict output);
+void qmf_64_subbands_fixed(int subband_samples[64][8], int 
**subband_samples_hi,
+                           int *history, int *pcm_samples, int nb_samples);
+void lfe_interpolation_fir_fixed(int *pcm_samples, int *lfe_samples,
+                                 int nb_samples, int synth_x96);
+
 #endif /* AVCODEC_DCADSP_H */
diff --git a/tests/fate/audio.mak b/tests/fate/audio.mak
index cf11e9d..5f04418 100644
--- a/tests/fate/audio.mak
+++ b/tests/fate/audio.mak
@@ -22,7 +22,7 @@ fate-dca-core: CMP = oneoff
 fate-dca-core: REF = $(SAMPLES)/dts/dts.pcm
 
 FATE_DCA-$(CONFIG_DTS_DEMUXER) += fate-dca-xll
-fate-dca-xll: CMD = pcm -disable_xll 0 -i 
$(TARGET_SAMPLES)/dts/master_audio_7.1_24bit.dts
+fate-dca-xll: CMD = pcm -disable_xll 0 -force_lossy 1 -i 
$(TARGET_SAMPLES)/dts/master_audio_7.1_24bit.dts
 fate-dca-xll: CMP = oneoff
 fate-dca-xll: REF = $(SAMPLES)/dts/master_audio_7.1_24bit_2.pcm
 
-- 
2.1.4


_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to