Re: [libav-devel] [PATCH] dca: change the core to work with integer coefficients.

Janne Grunau Wed, 16 Dec 2015 10:35:45 -0800

On 2015-12-16 12:47:25 +0100, Alexandra Hájková wrote:
> The DCA core decoder converts integer coefficients read from the
> bitstream to floats just after reading them (along with dequantization).
> All the other steps of the audio reconstruction are done with floats
> which makes the output for the DTS lossless extension (XLL)
> actually lossy.
> This patch changes the DCA core to work with integer coefficients
> until QMF. At this point the integer coefficients are converted to floats.
> The coefficients for the LFE channel (lfe_data) are not touched.
> This is the first step for the really lossless XLL decoding.
> ---
> the patch with applied review comments from the last Janne's review
> 
>  libavcodec/dca.h        |   8 ++--
>  libavcodec/dcadec.c     | 111 
> +++++++++++++++++++++++++-----------------------
>  libavcodec/dcadsp.c     |  34 +++++++++++++++
>  libavcodec/dcadsp.h     |   6 +++
>  libavcodec/fmtconvert.c |   9 ++++
>  libavcodec/fmtconvert.h |  10 +++++
>  tests/fate/audio.mak    |   2 +-
>  7 files changed, 122 insertions(+), 58 deletions(-)
> 
> diff --git a/libavcodec/dca.h b/libavcodec/dca.h
> index 6548d75..a85470d 100644
> --- a/libavcodec/dca.h
> +++ b/libavcodec/dca.h
> @@ -138,8 +138,8 @@ typedef struct DCAAudioHeader {
>      int transient_huffman[DCA_PRIM_CHANNELS_MAX];   ///< transient mode code 
> book
>      int scalefactor_huffman[DCA_PRIM_CHANNELS_MAX]; ///< scale factor code 
> book
>      int bitalloc_huffman[DCA_PRIM_CHANNELS_MAX];    ///< bit allocation 
> quantizer select
> -    int quant_index_huffman[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX]; ///< 
> quantization index codebook select
> -    float scalefactor_adj[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX];   ///< 
> scale factor adjustment
> +    int quant_index_huffman[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX];  ///< 
> quantization index codebook select
> +    uint32_t scalefactor_adj[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX]; ///< 
> scale factor adjustment
>  
>      int subframes;              ///< number of subframes
>      int total_channels;         ///< number of channels including extensions
> @@ -147,10 +147,10 @@ typedef struct DCAAudioHeader {
>  } DCAAudioHeader;
>  
>  typedef struct DCAChan {
> -    DECLARE_ALIGNED(32, float, 
> subband_samples)[DCA_BLOCKS_MAX][DCA_SUBBANDS][8];
> +    DECLARE_ALIGNED(32, int32_t, 
> subband_samples)[DCA_BLOCKS_MAX][DCA_SUBBANDS][8];
>  
>      /* Subband samples history (for ADPCM) */
> -    DECLARE_ALIGNED(16, float, subband_samples_hist)[DCA_SUBBANDS][4];
> +    DECLARE_ALIGNED(32, int32_t, subband_samples_hist)[DCA_SUBBANDS][4];
>      int hist_index;
>  
>      /* Half size is sufficient for core decoding, but for 96 kHz data
> diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
> index aca6ed3..2932626 100644
> --- a/libavcodec/dcadec.c
> +++ b/libavcodec/dcadec.c
> @@ -226,7 +226,7 @@ static inline void get_array(GetBitContext *gb, int *dst, 
> int len, int bits)
>  static int dca_parse_audio_coding_header(DCAContext *s, int base_channel)
>  {
>      int i, j;
> -    static const float adj_table[4] = { 1.0, 1.1250, 1.2500, 1.4375 };
> +    static const uint8_t adj_table[4] = { 16, 18, 20, 23 };
>      static const int bitlen[11] = { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3 };
>      static const int thr[11]    = { 0, 1, 3, 3, 3, 3, 7, 7, 7, 7, 7 };
>  
> @@ -265,7 +265,7 @@ static int dca_parse_audio_coding_header(DCAContext *s, 
> int base_channel)
>      /* Get scale factor adjustment */
>      for (j = 0; j < 11; j++)
>          for (i = base_channel; i < s->audio_header.prim_channels; i++)
> -            s->audio_header.scalefactor_adj[i][j] = 1;
> +            s->audio_header.scalefactor_adj[i][j] = 16;
>  
>      for (j = 1; j < 11; j++)
>          for (i = base_channel; i < s->audio_header.prim_channels; i++)
> @@ -790,10 +790,7 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>  {
>      int k, l;
>      int subsubframe = s->current_subsubframe;
> -
> -    const float *quant_step_table;
> -
> -    LOCAL_ALIGNED_16(int32_t, block, [SAMPLES_PER_SUBBAND * DCA_SUBBANDS]);
> +    const uint32_t *quant_step_table;
>  
>      /*
>       * Audio data
> @@ -801,13 +798,12 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>  
>      /* Select quantization step size table */
>      if (s->bit_rate_index == 0x1f)
> -        quant_step_table = ff_dca_lossless_quant_d;
> +        quant_step_table = ff_dca_lossless_quant;
>      else
> -        quant_step_table = ff_dca_lossy_quant_d;
> +        quant_step_table = ff_dca_lossy_quant;
>  
>      for (k = base_channel; k < s->audio_header.prim_channels; k++) {
> -        float (*subband_samples)[8] = 
> s->dca_chan[k].subband_samples[block_index];
> -        float rscale[DCA_SUBBANDS];
> +        int32_t (*subband_samples)[8] = 
> s->dca_chan[k].subband_samples[block_index];
>  
>          if (get_bits_left(&s->gb) < 0)
>              return AVERROR_INVALIDDATA;
> @@ -818,27 +814,25 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>              /* Select the mid-tread linear quantizer */
>              int abits = s->dca_chan[k].bitalloc[l];
>  
> -            float quant_step_size = quant_step_table[abits];
> -
> -            /*
> -             * Determine quantization index code book and its type
> -             */
> -
> -            /* Select quantization index code book */
> -            int sel = s->audio_header.quant_index_huffman[k][abits];
> +            uint32_t quant_step_size = quant_step_table[abits];
>  
>              /*
>               * Extract bits from the bit stream
>               */
> -            if (!abits) {
> -                rscale[l] = 0;
> -                memset(block + SAMPLES_PER_SUBBAND * l, 0, 
> SAMPLES_PER_SUBBAND * sizeof(block[0]));
> -            } else {
> +            if (!abits)
> +                memset(subband_samples[l], 0, SAMPLES_PER_SUBBAND *
> +                       sizeof(subband_samples[l][0]));
> +            else {
> +                uint64_t rscale;


uint32_t is large enough for rscale

>                  /* Deal with transients */
>                  int sfi = s->dca_chan[k].transition_mode[l] &&
>                      subsubframe >= s->dca_chan[k].transition_mode[l];
> -                rscale[l] = quant_step_size * 
> s->dca_chan[k].scale_factor[l][sfi] *
> -                            s->audio_header.scalefactor_adj[k][sel];
> +                /* Determine quantization index code book and its type.
> +                   Select quantization index code book */
> +                int sel = s->audio_header.quant_index_huffman[k][abits];
> +
> +                rscale = (s->dca_chan[k].scale_factor[l][sfi] *
> +                          s->audio_header.scalefactor_adj[k][sel] + 8) >> 4;
>  
>                  if (abits >= 11 || !dca_smpl_bitalloc[abits].vlc[sel].table) 
> {
>                      if (abits <= 7) {
> @@ -851,7 +845,7 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>                          block_code1 = get_bits(&s->gb, size);
>                          block_code2 = get_bits(&s->gb, size);
>                          err         = decode_blockcodes(block_code1, 
> block_code2,
> -                                                        levels, block + 
> SAMPLES_PER_SUBBAND * l);
> +                                                        levels, 
> subband_samples[l]);
>                          if (err) {
>                              av_log(s->avctx, AV_LOG_ERROR,
>                                     "ERROR: block code look-up failed\n");
> @@ -860,20 +854,18 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>                      } else {
>                          /* no coding */
>                          for (m = 0; m < SAMPLES_PER_SUBBAND; m++)
> -                            block[SAMPLES_PER_SUBBAND * l + m] = 
> get_sbits(&s->gb, abits - 3);
> +                            subband_samples[l][m] = get_sbits(&s->gb, abits 
> - 3);
>                      }
>                  } else {
>                      /* Huffman coded */
>                      for (m = 0; m < SAMPLES_PER_SUBBAND; m++)
> -                        block[SAMPLES_PER_SUBBAND * l + m] = 
> get_bitalloc(&s->gb,
> -                                                        
> &dca_smpl_bitalloc[abits], sel);
> +                        subband_samples[l][m] = get_bitalloc(&s->gb,
> +                                                             
> &dca_smpl_bitalloc[abits], sel);
>                  }
> +                s->dcadsp.dequantize(subband_samples[l], quant_step_size, 
> rscale);
>              }
>          }
>  
> -        s->fmt_conv.int32_to_float_fmul_array8(&s->fmt_conv, 
> subband_samples[0],
> -                                               block, rscale, 
> SAMPLES_PER_SUBBAND * s->audio_header.vq_start_subband[k]);
> -
>          for (l = 0; l < s->audio_header.vq_start_subband[k]; l++) {
>              int m;
>              /*
> @@ -883,25 +875,25 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>                  int n;
>                  if (s->predictor_history)
>                      subband_samples[l][0] += 
> (ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][0] *
> -                                                 
> s->dca_chan[k].subband_samples_hist[l][3] +
> -                                                 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][1] *
> -                                                 
> s->dca_chan[k].subband_samples_hist[l][2] +
> -                                                 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][2] *
> -                                                 
> s->dca_chan[k].subband_samples_hist[l][1] +
> -                                                 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][3] *
> -                                                 
> s->dca_chan[k].subband_samples_hist[l][0]) *
> -                                                (1.0f / 8192);
> +                                              
> (int64_t)s->dca_chan[k].subband_samples_hist[l][3] +
> +                                              
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][1] *
> +                                              
> (int64_t)s->dca_chan[k].subband_samples_hist[l][2] +
> +                                              
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][2] *
> +                                              
> (int64_t)s->dca_chan[k].subband_samples_hist[l][1] +
> +                                              
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][3] *
> +                                              
> (int64_t)s->dca_chan[k].subband_samples_hist[l][0]) +
> +                                              (1 << 12) >> 13;
>                  for (m = 1; m < SAMPLES_PER_SUBBAND; m++) {
> -                    float sum = 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][0] *
> -                                subband_samples[l][m - 1];
> +                    int64_t sum = 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][0] *
> +                                  (int64_t)subband_samples[l][m - 1];
>                      for (n = 2; n <= 4; n++)
>                          if (m >= n)
>                              sum += 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][n - 1] *
> -                                   subband_samples[l][m - n];
> +                                   (int64_t)subband_samples[l][m - n];
>                          else if (s->predictor_history)
>                              sum += 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][n - 1] *
> -                                   s->dca_chan[k].subband_samples_hist[l][m 
> - n + 4];
> -                    subband_samples[l][m] += sum * 1.0f / 8192;
> +                                   
> (int64_t)s->dca_chan[k].subband_samples_hist[l][m - n + 4];
> +                    subband_samples[l][m] += (int32_t)(sum + (1 << 12) >> 
> 13);
>                  }
>              }
>  
> @@ -921,11 +913,12 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>                  s->debug_flag |= 0x01;
>              }
>  
> -            s->dcadsp.decode_hf(subband_samples, s->dca_chan[k].high_freq_vq,
> -                                ff_dca_high_freq_vq, subsubframe * 
> SAMPLES_PER_SUBBAND,
> -                                s->dca_chan[k].scale_factor,
> -                                s->audio_header.vq_start_subband[k],
> -                                s->audio_header.subband_activity[k]);
> +            s->dcadsp.decode_hf_int(subband_samples, 
> s->dca_chan[k].high_freq_vq,
> +                                    ff_dca_high_freq_vq, subsubframe * 
> SAMPLES_PER_SUBBAND,
> +                                    s->dca_chan[k].scale_factor,
> +                                    s->audio_header.vq_start_subband[k],
> +                                    s->audio_header.subband_activity[k]);
> +
>          }
>      }
>  
> @@ -945,6 +938,8 @@ static int dca_filter_channels(DCAContext *s, int 
> block_index, int upsample)
>      int k;
>  
>      if (upsample) {
> +        LOCAL_ALIGNED(32, float, samples, [64], [SAMPLES_PER_SUBBAND]);
> +
>          if (!s->qmf64_table) {
>              s->qmf64_table = qmf64_precompute();
>              if (!s->qmf64_table)
> @@ -953,21 +948,31 @@ static int dca_filter_channels(DCAContext *s, int 
> block_index, int upsample)
>  
>          /* 64 subbands QMF */
>          for (k = 0; k < s->audio_header.prim_channels; k++) {
> -            float (*subband_samples)[SAMPLES_PER_SUBBAND] = 
> s->dca_chan[k].subband_samples[block_index];
> +            int32_t (*subband_samples)[SAMPLES_PER_SUBBAND] =
> +                     s->dca_chan[k].subband_samples[block_index];
> +
> +            s->fmt_conv.int32_to_float(samples[0], subband_samples[0],
> +                                       64 * SAMPLES_PER_SUBBAND);

I missed in the first review that samples is a two-dimensional array.  
instead of int32_to_float() we need int32_to_float_array8(). sorry.


>              if (s->channel_order_tab[k] >= 0)
> -                qmf_64_subbands(s, k, subband_samples,
> +                qmf_64_subbands(s, k, samples,
>                                  s->samples_chanptr[s->channel_order_tab[k]],
>                                  /* Upsampling needs a factor 2 here. */
>                                  M_SQRT2 / 32768.0);
>          }
>      } else {
>          /* 32 subbands QMF */
> +        LOCAL_ALIGNED(32, float, samples, [32], [SAMPLES_PER_SUBBAND]);
> +
>          for (k = 0; k < s->audio_header.prim_channels; k++) {
> -            float (*subband_samples)[SAMPLES_PER_SUBBAND] = 
> s->dca_chan[k].subband_samples[block_index];
> +            int32_t (*subband_samples)[SAMPLES_PER_SUBBAND] =
> +                     s->dca_chan[k].subband_samples[block_index];
> +
> +            s->fmt_conv.int32_to_float(samples[0], subband_samples[0],
> +                                       32 * SAMPLES_PER_SUBBAND);
>  
>              if (s->channel_order_tab[k] >= 0)
> -                qmf_32_subbands(s, k, subband_samples,
> +                qmf_32_subbands(s, k, samples,
>                                  s->samples_chanptr[s->channel_order_tab[k]],
>                                  M_SQRT1_2 / 32768.0);
>          }
> diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
> index 34b5da2..d1c9492 100644
> --- a/libavcodec/dcadsp.c
> +++ b/libavcodec/dcadsp.c
> @@ -25,6 +25,7 @@
>  #include "libavutil/intreadwrite.h"
>  
>  #include "dcadsp.h"
> +#include "dcamath.h"
>  
>  static void decode_hf_c(float dst[DCA_SUBBANDS][8],
>                          const int32_t vq_num[DCA_SUBBANDS],
> @@ -44,6 +45,21 @@ static void decode_hf_c(float dst[DCA_SUBBANDS][8],
>      }
>  }
>  
> +static void decode_hf_int_c(int32_t dst[DCA_SUBBANDS][8],
> +                            const int32_t vq_num[DCA_SUBBANDS],
> +                            const int8_t hf_vq[1024][32], intptr_t vq_offset,
> +                            int32_t scale[DCA_SUBBANDS][2],
> +                            intptr_t start, intptr_t end)
> +{
> +    int i, j;
> +
> +    for (j = start; j < end; j++) {
> +        const int8_t *ptr = &hf_vq[vq_num[j]][vq_offset];
> +        for (i = 0; i < 8; i++)
> +            dst[j][i] = ptr[i] * scale[j][0] + 8 >> 4;
> +    }
> +}
> +
>  static inline void dca_lfe_fir(float *out, const float *in, const float 
> *coefs,
>                                 int decifactor)
>  {
> @@ -93,6 +109,22 @@ static void dca_qmf_32_subbands(float samples_in[32][8], 
> int sb_act,
>      }
>  }
>  
> +static void dequantize_c(int32_t *samples, uint32_t step_size, uint64_t 
> scale)

scale should fit into uint32_t

> +{
> +    int64_t step = (int64_t)step_size * scale;
> +    int shift, i;
> +    int32_t step_scale;
> +
> +    if (step > (1 << 23))
> +        shift = av_log2(step >> 23) + 1;

shift can in theory exceed 22 if I use the maximal values for both (I 
haven't checked if both can legally occur at the same time though). The 
maximal value for step_size is 6710886 and for scale it is 8317638 * 
23/16.  log2((6710886 * 8317638 * 23/16) >> 23) is 23. which makes the 
final values too small since it is divided by more the 2^22.

I don't see the need for this though. this rescaling doesn't avoids 
64-bit multiplications. It is also not required to prevent 64-bit 
overflows in the final samples * step_scale multiplication. scale is in 
[0, 2^24) and samples * step_size doesn't exceed 32-bit int. So this 
seems to be just unneeded extra calculations. Decoding should be faster 
and produce the same results if too large shift value bug is never 
triggered.

the rest looks ok

Janne
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH] dca: change the core to work with integer coefficients.

Reply via email to