[libav-devel] [PATCH] dca: change the core to work with integer coefficients.

Alexandra Hájková Tue, 15 Dec 2015 06:54:50 -0800

The DCA core decoder converts integer coefficients read from the
bitstream to floats just after reading them (along with dequantization).
All the other steps of the audio reconstruction are done with floats
which makes the output for the DTS lossless extension (XLL)
actually lossy.
This patch changes the DCA core to work with integer coefficients
until QMF. At this point the integer coefficients are converted to floats.
The coefficients for the LFE channel (lfe_data) are not touched.
This is the first step for the really lossless XLL decoding.
---
Applied comments from Janne and Diego:
mainly: some functions moved to dcadsp.c
        int32_to_float function was added to fmtconvert.c.
The patch was examined with perf record and the main slow downs
are caused by dequantize() and int32_to_float().


This patch breaks dca-xll but the waveforms of its sample was
compared in audacity with "before patch" state and are the same,
the samples also sounds the same for my ears. The output coefficients
may be slightly different because the conversion to float happens in
the different conditions. I suggest to consider the change acceptable.

 libavcodec/dca.h        |  6 ++--
 libavcodec/dcadec.c     | 95 +++++++++++++++++++++++++++----------------------
 libavcodec/dcadsp.c     | 34 ++++++++++++++++++
 libavcodec/dcadsp.h     |  6 ++++
 libavcodec/fmtconvert.c |  9 +++++
 libavcodec/fmtconvert.h |  7 ++++
 6 files changed, 111 insertions(+), 46 deletions(-)

diff --git a/libavcodec/dca.h b/libavcodec/dca.h
index 6548d75..d754287 100644
--- a/libavcodec/dca.h
+++ b/libavcodec/dca.h
@@ -139,7 +139,7 @@ typedef struct DCAAudioHeader {
     int scalefactor_huffman[DCA_PRIM_CHANNELS_MAX]; ///< scale factor code book
     int bitalloc_huffman[DCA_PRIM_CHANNELS_MAX];    ///< bit allocation 
quantizer select
     int quant_index_huffman[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX]; ///< 
quantization index codebook select
-    float scalefactor_adj[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX];   ///< scale 
factor adjustment
+    int scalefactor_adj[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX];     ///< scale 
factor adjustment
 
     int subframes;              ///< number of subframes
     int total_channels;         ///< number of channels including extensions
@@ -147,10 +147,10 @@ typedef struct DCAAudioHeader {
 } DCAAudioHeader;
 
 typedef struct DCAChan {
-    DECLARE_ALIGNED(32, float, 
subband_samples)[DCA_BLOCKS_MAX][DCA_SUBBANDS][8];
+    DECLARE_ALIGNED(32, int, subband_samples)[DCA_BLOCKS_MAX][DCA_SUBBANDS][8];
 
     /* Subband samples history (for ADPCM) */
-    DECLARE_ALIGNED(16, float, subband_samples_hist)[DCA_SUBBANDS][4];
+    DECLARE_ALIGNED(32, int32_t, subband_samples_hist)[DCA_SUBBANDS][4];
     int hist_index;
 
     /* Half size is sufficient for core decoding, but for 96 kHz data
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
index aca6ed3..c641142 100644
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@@ -226,7 +226,7 @@ static inline void get_array(GetBitContext *gb, int *dst, 
int len, int bits)
 static int dca_parse_audio_coding_header(DCAContext *s, int base_channel)
 {
     int i, j;
-    static const float adj_table[4] = { 1.0, 1.1250, 1.2500, 1.4375 };
+    static const int adj_table[4] = { 16, 18, 20, 23 };
     static const int bitlen[11] = { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3 };
     static const int thr[11]    = { 0, 1, 3, 3, 3, 3, 7, 7, 7, 7, 7 };
 
@@ -265,7 +265,7 @@ static int dca_parse_audio_coding_header(DCAContext *s, int 
base_channel)
     /* Get scale factor adjustment */
     for (j = 0; j < 11; j++)
         for (i = base_channel; i < s->audio_header.prim_channels; i++)
-            s->audio_header.scalefactor_adj[i][j] = 1;
+            s->audio_header.scalefactor_adj[i][j] = 16;
 
     for (j = 1; j < 11; j++)
         for (i = base_channel; i < s->audio_header.prim_channels; i++)
@@ -790,10 +790,7 @@ static int dca_subsubframe(DCAContext *s, int 
base_channel, int block_index)
 {
     int k, l;
     int subsubframe = s->current_subsubframe;
-
-    const float *quant_step_table;
-
-    LOCAL_ALIGNED_16(int32_t, block, [SAMPLES_PER_SUBBAND * DCA_SUBBANDS]);
+    const int *quant_step_table;
 
     /*
      * Audio data
@@ -801,13 +798,13 @@ static int dca_subsubframe(DCAContext *s, int 
base_channel, int block_index)
 
     /* Select quantization step size table */
     if (s->bit_rate_index == 0x1f)
-        quant_step_table = ff_dca_lossless_quant_d;
+        quant_step_table = ff_dca_lossless_quant;
     else
-        quant_step_table = ff_dca_lossy_quant_d;
+        quant_step_table = ff_dca_lossy_quant;
 
     for (k = base_channel; k < s->audio_header.prim_channels; k++) {
-        float (*subband_samples)[8] = 
s->dca_chan[k].subband_samples[block_index];
-        float rscale[DCA_SUBBANDS];
+        int (*subband_samples)[8] = 
s->dca_chan[k].subband_samples[block_index];
+        int64_t rscale[DCA_SUBBANDS];
 
         if (get_bits_left(&s->gb) < 0)
             return AVERROR_INVALIDDATA;
@@ -818,7 +815,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, 
int block_index)
             /* Select the mid-tread linear quantizer */
             int abits = s->dca_chan[k].bitalloc[l];
 
-            float quant_step_size = quant_step_table[abits];
+            int quant_step_size = quant_step_table[abits];
 
             /*
              * Determine quantization index code book and its type
@@ -832,13 +829,14 @@ static int dca_subsubframe(DCAContext *s, int 
base_channel, int block_index)
              */
             if (!abits) {
                 rscale[l] = 0;
-                memset(block + SAMPLES_PER_SUBBAND * l, 0, SAMPLES_PER_SUBBAND 
* sizeof(block[0]));
+                memset(subband_samples[l], 0, SAMPLES_PER_SUBBAND *
+                       sizeof(subband_samples[l][0]));
             } else {
                 /* Deal with transients */
                 int sfi = s->dca_chan[k].transition_mode[l] &&
                     subsubframe >= s->dca_chan[k].transition_mode[l];
-                rscale[l] = quant_step_size * 
s->dca_chan[k].scale_factor[l][sfi] *
-                            s->audio_header.scalefactor_adj[k][sel];
+                rscale[l] = (s->dca_chan[k].scale_factor[l][sfi] *
+                            s->audio_header.scalefactor_adj[k][sel] + 8) >> 4;
 
                 if (abits >= 11 || !dca_smpl_bitalloc[abits].vlc[sel].table) {
                     if (abits <= 7) {
@@ -851,7 +849,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, 
int block_index)
                         block_code1 = get_bits(&s->gb, size);
                         block_code2 = get_bits(&s->gb, size);
                         err         = decode_blockcodes(block_code1, 
block_code2,
-                                                        levels, block + 
SAMPLES_PER_SUBBAND * l);
+                                                        levels, 
subband_samples[l]);
                         if (err) {
                             av_log(s->avctx, AV_LOG_ERROR,
                                    "ERROR: block code look-up failed\n");
@@ -860,20 +858,18 @@ static int dca_subsubframe(DCAContext *s, int 
base_channel, int block_index)
                     } else {
                         /* no coding */
                         for (m = 0; m < SAMPLES_PER_SUBBAND; m++)
-                            block[SAMPLES_PER_SUBBAND * l + m] = 
get_sbits(&s->gb, abits - 3);
+                            subband_samples[l][m] = get_sbits(&s->gb, abits - 
3);
                     }
                 } else {
                     /* Huffman coded */
                     for (m = 0; m < SAMPLES_PER_SUBBAND; m++)
-                        block[SAMPLES_PER_SUBBAND * l + m] = 
get_bitalloc(&s->gb,
-                                                        
&dca_smpl_bitalloc[abits], sel);
+                        subband_samples[l][m] = get_bitalloc(&s->gb,
+                                                             
&dca_smpl_bitalloc[abits], sel);
                 }
             }
+            s->dcadsp.dequantize(subband_samples[l], quant_step_size, 
rscale[l]);
         }
 
-        s->fmt_conv.int32_to_float_fmul_array8(&s->fmt_conv, 
subband_samples[0],
-                                               block, rscale, 
SAMPLES_PER_SUBBAND * s->audio_header.vq_start_subband[k]);
-
         for (l = 0; l < s->audio_header.vq_start_subband[k]; l++) {
             int m;
             /*
@@ -883,25 +879,25 @@ static int dca_subsubframe(DCAContext *s, int 
base_channel, int block_index)
                 int n;
                 if (s->predictor_history)
                     subband_samples[l][0] += 
(ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][0] *
-                                                 
s->dca_chan[k].subband_samples_hist[l][3] +
-                                                 
ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][1] *
-                                                 
s->dca_chan[k].subband_samples_hist[l][2] +
-                                                 
ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][2] *
-                                                 
s->dca_chan[k].subband_samples_hist[l][1] +
-                                                 
ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][3] *
-                                                 
s->dca_chan[k].subband_samples_hist[l][0]) *
-                                                (1.0f / 8192);
+                                              
(int64_t)s->dca_chan[k].subband_samples_hist[l][3] +
+                                              
ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][1] *
+                                              
(int64_t)s->dca_chan[k].subband_samples_hist[l][2] +
+                                              
ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][2] *
+                                              
(int64_t)s->dca_chan[k].subband_samples_hist[l][1] +
+                                              
ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][3] *
+                                              
(int64_t)s->dca_chan[k].subband_samples_hist[l][0]) +
+                                              (1 << 12) >> 13;
                 for (m = 1; m < SAMPLES_PER_SUBBAND; m++) {
-                    float sum = 
ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][0] *
-                                subband_samples[l][m - 1];
+                    int64_t sum = 
ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][0] *
+                                  (int64_t)subband_samples[l][m - 1];
                     for (n = 2; n <= 4; n++)
                         if (m >= n)
                             sum += 
ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][n - 1] *
-                                   subband_samples[l][m - n];
+                                   (int64_t)subband_samples[l][m - n];
                         else if (s->predictor_history)
                             sum += 
ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][n - 1] *
-                                   s->dca_chan[k].subband_samples_hist[l][m - 
n + 4];
-                    subband_samples[l][m] += sum * 1.0f / 8192;
+                                   
(int64_t)s->dca_chan[k].subband_samples_hist[l][m - n + 4];
+                    subband_samples[l][m] += (int)(sum + (1 << 12) >> 13);
                 }
             }
 
@@ -921,11 +917,12 @@ static int dca_subsubframe(DCAContext *s, int 
base_channel, int block_index)
                 s->debug_flag |= 0x01;
             }
 
-            s->dcadsp.decode_hf(subband_samples, s->dca_chan[k].high_freq_vq,
-                                ff_dca_high_freq_vq, subsubframe * 
SAMPLES_PER_SUBBAND,
-                                s->dca_chan[k].scale_factor,
-                                s->audio_header.vq_start_subband[k],
-                                s->audio_header.subband_activity[k]);
+            s->dcadsp.decode_hf_int(subband_samples, 
s->dca_chan[k].high_freq_vq,
+                                    ff_dca_high_freq_vq, subsubframe * 
SAMPLES_PER_SUBBAND,
+                                    s->dca_chan[k].scale_factor,
+                                    s->audio_header.vq_start_subband[k],
+                                    s->audio_header.subband_activity[k]);
+
         }
     }
 
@@ -945,6 +942,8 @@ static int dca_filter_channels(DCAContext *s, int 
block_index, int upsample)
     int k;
 
     if (upsample) {
+        LOCAL_ALIGNED_16(float, samples, [64], [SAMPLES_PER_SUBBAND]);
+
         if (!s->qmf64_table) {
             s->qmf64_table = qmf64_precompute();
             if (!s->qmf64_table)
@@ -953,21 +952,31 @@ static int dca_filter_channels(DCAContext *s, int 
block_index, int upsample)
 
         /* 64 subbands QMF */
         for (k = 0; k < s->audio_header.prim_channels; k++) {
-            float (*subband_samples)[SAMPLES_PER_SUBBAND] = 
s->dca_chan[k].subband_samples[block_index];
+            int (*subband_samples)[SAMPLES_PER_SUBBAND] =
+                s->dca_chan[k].subband_samples[block_index];
+
+            s->fmt_conv.int32_to_float(samples[0], subband_samples[0],
+                                       64 * SAMPLES_PER_SUBBAND);
 
             if (s->channel_order_tab[k] >= 0)
-                qmf_64_subbands(s, k, subband_samples,
+                qmf_64_subbands(s, k, samples,
                                 s->samples_chanptr[s->channel_order_tab[k]],
                                 /* Upsampling needs a factor 2 here. */
                                 M_SQRT2 / 32768.0);
         }
     } else {
         /* 32 subbands QMF */
+        LOCAL_ALIGNED_16(float, samples, [32], [SAMPLES_PER_SUBBAND]);
+
         for (k = 0; k < s->audio_header.prim_channels; k++) {
-            float (*subband_samples)[SAMPLES_PER_SUBBAND] = 
s->dca_chan[k].subband_samples[block_index];
+            int (*subband_samples)[SAMPLES_PER_SUBBAND] =
+                s->dca_chan[k].subband_samples[block_index];
+
+            s->fmt_conv.int32_to_float(samples[0], subband_samples[0],
+                                       32 * SAMPLES_PER_SUBBAND);
 
             if (s->channel_order_tab[k] >= 0)
-                qmf_32_subbands(s, k, subband_samples,
+                qmf_32_subbands(s, k, samples,
                                 s->samples_chanptr[s->channel_order_tab[k]],
                                 M_SQRT1_2 / 32768.0);
         }
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
index 34b5da2..c1d3076 100644
--- a/libavcodec/dcadsp.c
+++ b/libavcodec/dcadsp.c
@@ -25,6 +25,7 @@
 #include "libavutil/intreadwrite.h"
 
 #include "dcadsp.h"
+#include "dcamath.h"
 
 static void decode_hf_c(float dst[DCA_SUBBANDS][8],
                         const int32_t vq_num[DCA_SUBBANDS],
@@ -44,6 +45,21 @@ static void decode_hf_c(float dst[DCA_SUBBANDS][8],
     }
 }
 
+static void decode_hf_int_c(int dst[DCA_SUBBANDS][8],
+                            const int32_t vq_num[DCA_SUBBANDS],
+                            const int8_t hf_vq[1024][32], intptr_t vq_offset,
+                            int32_t scale[DCA_SUBBANDS][2],
+                            intptr_t start, intptr_t end)
+{
+    int i, j;
+
+    for (j = start; j < end; j++) {
+        const int8_t *ptr = &hf_vq[vq_num[j]][vq_offset];
+        for (i = 0; i < 8; i++)
+            dst[j][i] = ptr[i] * scale[j][0] + 8 >> 4;
+    }
+}
+
 static inline void dca_lfe_fir(float *out, const float *in, const float *coefs,
                                int decifactor)
 {
@@ -93,6 +109,22 @@ static void dca_qmf_32_subbands(float samples_in[32][8], 
int sb_act,
     }
 }
 
+static void dequantize_c(int *samples, int step_size, int scale)
+{
+    int64_t step = (int64_t)step_size * scale;
+    int shift, i;
+    int32_t step_scale;
+
+    if (step > (1 << 23))
+        shift = av_log2(step >> 23) + 1;
+    else
+        shift = 0;
+    step_scale = (int32_t)(step >> shift);
+
+    for (i = 0; i < 8; i++)
+        samples[i] = dca_clip23(dca_norm((int64_t)samples[i] * step_scale, 22 
- shift));
+}
+
 static void dca_lfe_fir0_c(float *out, const float *in, const float *coefs)
 {
     dca_lfe_fir(out, in, coefs, 32);
@@ -109,6 +141,8 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
     s->lfe_fir[1]      = dca_lfe_fir1_c;
     s->qmf_32_subbands = dca_qmf_32_subbands;
     s->decode_hf       = decode_hf_c;
+    s->decode_hf_int   = decode_hf_int_c;
+    s->dequantize      = dequantize_c;
 
     if (ARCH_ARM)
         ff_dcadsp_init_arm(s);
diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
index 0fa75a5..f290633 100644
--- a/libavcodec/dcadsp.h
+++ b/libavcodec/dcadsp.h
@@ -37,6 +37,12 @@ typedef struct DCADSPContext {
                       const int8_t hf_vq[1024][32], intptr_t vq_offset,
                       int32_t scale[DCA_SUBBANDS][2],
                       intptr_t start, intptr_t end);
+    void (*decode_hf_int)(int dst[DCA_SUBBANDS][8],
+                          const int32_t vq_num[DCA_SUBBANDS],
+                          const int8_t hf_vq[1024][32], intptr_t vq_offset,
+                          int32_t scale[DCA_SUBBANDS][2],
+                          intptr_t start, intptr_t end);
+    void (*dequantize)(int *samples, int step_size, int scale);
 } DCADSPContext;
 
 void ff_dcadsp_init(DCADSPContext *s);
diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
index 2dff704..0416b40 100644
--- a/libavcodec/fmtconvert.c
+++ b/libavcodec/fmtconvert.c
@@ -32,6 +32,14 @@ static void int32_to_float_fmul_scalar_c(float *dst, const 
int32_t *src,
         dst[i] = src[i] * mul;
 }
 
+static void int32_to_float_c(float *dst, const int32_t *src, int len)
+{
+    int i;
+
+    for (i = 0; i < len; i++)
+        dst[i] = (float)src[i];
+}
+
 static void int32_to_float_fmul_array8_c(FmtConvertContext *c, float *dst,
                                          const int32_t *src, const float *mul,
                                          int len)
@@ -44,6 +52,7 @@ static void int32_to_float_fmul_array8_c(FmtConvertContext 
*c, float *dst,
 av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
 {
     c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
+    c->int32_to_float             = int32_to_float_c;
     c->int32_to_float_fmul_array8 = int32_to_float_fmul_array8_c;
 
     if (ARCH_ARM) ff_fmt_convert_init_arm(c, avctx);
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
index 7de890b..3de1817 100644
--- a/libavcodec/fmtconvert.h
+++ b/libavcodec/fmtconvert.h
@@ -37,6 +37,13 @@ typedef struct FmtConvertContext {
      */
     void (*int32_to_float_fmul_scalar)(float *dst, const int32_t *src,
                                        float mul, int len);
+    /**
+     * Convert an array of int32_t to float.
+     * @param dst destination array of float.
+     * @param src source array of int32_t.
+     * @param len number of elements to convert.
+     */
+    void (*int32_to_float)(float *dst, const int32_t *src, int len);
 
     /**
      * Convert an array of int32_t to float and multiply by a float value from 
another array,
-- 
2.1.4

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] dca: change the core to work with integer coefficients.

Reply via email to