Re: [libav-devel] [PATCH 2/2] dca: change the core to work with integer coefficients.

Janne Grunau Wed, 09 Dec 2015 13:22:32 -0800

On 2015-12-03 13:45:09 +0100, Alexandra Hájková wrote:
> The dca core decoder converts integer coefficients read from the
> bitstream to floats just after reading them (along with dequantization).
> All the other steps of the audio reconstruction are done with floats
> which makes the output for the DTS lossless extension (XLL)
> actually lossy.
> This patch changes the dca core to work with integer coefficients
> till QMF. At this point the integer coefficients are transformed to floats.
> The coefficients for the LFE channel (lfe_data) are not touched.
> This is the first step for the really lossless XLL decoding.
> ---
> 
> the output channels waveforms was compared in audacity with the waveforms of 
> the
> "before this patch state" and were considered the same
> 
>  libavcodec/dca.h    |   6 +--
>  libavcodec/dcadec.c | 117 
> ++++++++++++++++++++++++++++++++++------------------
>  2 files changed, 79 insertions(+), 44 deletions(-)
> 
> diff --git a/libavcodec/dca.h b/libavcodec/dca.h
> index 6548d75..9947878 100644
> --- a/libavcodec/dca.h
> +++ b/libavcodec/dca.h
> @@ -139,7 +139,7 @@ typedef struct DCAAudioHeader {
>      int scalefactor_huffman[DCA_PRIM_CHANNELS_MAX]; ///< scale factor code 
> book
>      int bitalloc_huffman[DCA_PRIM_CHANNELS_MAX];    ///< bit allocation 
> quantizer select
>      int quant_index_huffman[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX]; ///< 
> quantization index codebook select
> -    float scalefactor_adj[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX];   ///< 
> scale factor adjustment
> +    int scalefactor_adj[DCA_PRIM_CHANNELS_MAX][DCA_ABITS_MAX];     ///< 
> scale factor adjustment
>  
>      int subframes;              ///< number of subframes
>      int total_channels;         ///< number of channels including extensions
> @@ -147,10 +147,10 @@ typedef struct DCAAudioHeader {
>  } DCAAudioHeader;
>  
>  typedef struct DCAChan {
> -    DECLARE_ALIGNED(32, float, 
> subband_samples)[DCA_BLOCKS_MAX][DCA_SUBBANDS][8];
> +    DECLARE_ALIGNED(32, int, 
> subband_samples)[DCA_BLOCKS_MAX][DCA_SUBBANDS][8];
>  
>      /* Subband samples history (for ADPCM) */
> -    DECLARE_ALIGNED(16, float, subband_samples_hist)[DCA_SUBBANDS][4];
> +    DECLARE_ALIGNED(16, int, subband_samples_hist)[DCA_SUBBANDS][4];


these two should be probably int32_t instead of int to match code

>      int hist_index;
>  
>      /* Half size is sufficient for core decoding, but for 96 kHz data
> diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
> index 7e94638..80da622 100644
> --- a/libavcodec/dcadec.c
> +++ b/libavcodec/dcadec.c
> @@ -44,6 +44,7 @@
>  #include "dcadata.h"
>  #include "dcadsp.h"
>  #include "dcahuff.h"
> +#include "dcamath.h"
>  #include "fft.h"
>  #include "fmtconvert.h"
>  #include "get_bits.h"
> @@ -225,7 +226,7 @@ static inline void get_array(GetBitContext *gb, int *dst, 
> int len, int bits)
>  static int dca_parse_audio_coding_header(DCAContext *s, int base_channel)
>  {
>      int i, j;
> -    static const float adj_table[4] = { 1.0, 1.1250, 1.2500, 1.4375 };
> +    static const int adj_table[4] = { 16, 18, 20, 23 };
>      static const int bitlen[11] = { 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3 };
>      static const int thr[11]    = { 0, 1, 3, 3, 3, 3, 7, 7, 7, 7, 7 };
>  
> @@ -785,14 +786,26 @@ static int decode_blockcodes(int code1, int code2, int 
> levels, int32_t *values)
>  static const uint8_t abits_sizes[7]  = { 7, 10, 12, 13, 15, 17, 19 };
>  static const uint8_t abits_levels[7] = { 3,  5,  7,  9, 13, 17, 25 };
>  

This is probably also a candidate for SIMD optimizations and should go 
in dcadsc.c

> +static void dequantize(int *samples, int step_size, int scale) {
> +    int64_t step = (int64_t)step_size * scale;
> +    int shift, i;
> +    int32_t step_scale;
> +
> +    if (step > (1 << 23))
> +        shift = av_log2(step >> 23) + 1;
> +    else
> +        shift = 0;
> +    step_scale = (int32_t)(step >> shift);
> +
> +    for (i = 0; i < SAMPLES_PER_SUBBAND; i++)
> +        samples[i] = dca_clip23(dca_norm((int64_t)samples[i] * step_scale, 
> 22 - shift));
> +}
> +
>  static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
>  {
>      int k, l;
>      int subsubframe = s->current_subsubframe;
> -
> -    const float *quant_step_table;
> -
> -    LOCAL_ALIGNED_16(int32_t, block, [SAMPLES_PER_SUBBAND * DCA_SUBBANDS]);
> +    const int *quant_step_table;
>  
>      /*
>       * Audio data
> @@ -800,13 +813,13 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>  
>      /* Select quantization step size table */
>      if (s->bit_rate_index == 0x1f)
> -        quant_step_table = ff_dca_lossless_quant_d;
> +        quant_step_table = ff_dca_lossless_quant;
>      else
> -        quant_step_table = ff_dca_lossy_quant_d;
> +        quant_step_table = ff_dca_lossy_quant;
>  
>      for (k = base_channel; k < s->audio_header.prim_channels; k++) {
> -        float (*subband_samples)[8] = 
> s->dca_chan[k].subband_samples[block_index];
> -        float rscale[DCA_SUBBANDS];
> +        int (*subband_samples)[8] = 
> s->dca_chan[k].subband_samples[block_index];
> +        int64_t rscale[DCA_SUBBANDS];
>  
>          if (get_bits_left(&s->gb) < 0)
>              return AVERROR_INVALIDDATA;
> @@ -817,7 +830,7 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>              /* Select the mid-tread linear quantizer */
>              int abits = s->dca_chan[k].bitalloc[l];
>  
> -            float quant_step_size = quant_step_table[abits];
> +            int quant_step_size = quant_step_table[abits];
>  
>              /*
>               * Determine quantization index code book and its type
> @@ -831,12 +844,13 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>               */
>              if (!abits) {
>                  rscale[l] = 0;
> -                memset(block + SAMPLES_PER_SUBBAND * l, 0, 
> SAMPLES_PER_SUBBAND * sizeof(block[0]));
> +                memset(subband_samples[l], 0, SAMPLES_PER_SUBBAND *
> +                       sizeof(subband_samples[l][0]));
>              } else {
>                  /* Deal with transients */
>                  int sfi = s->dca_chan[k].transition_mode[l] &&
>                      subsubframe >= s->dca_chan[k].transition_mode[l];
> -                rscale[l] = quant_step_size * 
> s->dca_chan[k].scale_factor[l][sfi] *
> +                rscale[l] = s->dca_chan[k].scale_factor[l][sfi] *
>                              s->audio_header.scalefactor_adj[k][sel];
>  
>                  if (abits >= 11 || !dca_smpl_bitalloc[abits].vlc[sel].table) 
> {
> @@ -850,7 +864,7 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>                          block_code1 = get_bits(&s->gb, size);
>                          block_code2 = get_bits(&s->gb, size);
>                          err         = decode_blockcodes(block_code1, 
> block_code2,
> -                                                        levels, block + 
> SAMPLES_PER_SUBBAND * l);
> +                                                        levels, 
> subband_samples[l]);
>                          if (err) {
>                              av_log(s->avctx, AV_LOG_ERROR,
>                                     "ERROR: block code look-up failed\n");
> @@ -859,20 +873,18 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>                      } else {
>                          /* no coding */
>                          for (m = 0; m < SAMPLES_PER_SUBBAND; m++)
> -                            block[SAMPLES_PER_SUBBAND * l + m] = 
> get_sbits(&s->gb, abits - 3);
> +                            subband_samples[l][m] = get_sbits(&s->gb, abits 
> - 3);
>                      }
>                  } else {
>                      /* Huffman coded */
>                      for (m = 0; m < SAMPLES_PER_SUBBAND; m++)
> -                        block[SAMPLES_PER_SUBBAND * l + m] = 
> get_bitalloc(&s->gb,
> -                                                        
> &dca_smpl_bitalloc[abits], sel);
> +                        subband_samples[l][m] = get_bitalloc(&s->gb,
> +                                                             
> &dca_smpl_bitalloc[abits], sel);
>                  }
>              }
> +            dequantize(subband_samples[l], quant_step_size, rscale[l]);
>          }
>  
> -        s->fmt_conv.int32_to_float_fmul_array8(&s->fmt_conv, 
> subband_samples[0],
> -                                               block, rscale, 
> SAMPLES_PER_SUBBAND * s->audio_header.vq_start_subband[k]);
> -
>          for (l = 0; l < s->audio_header.vq_start_subband[k]; l++) {
>              int m;
>              /*
> @@ -882,25 +894,25 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>                  int n;
>                  if (s->predictor_history)
>                      subband_samples[l][0] += 
> (ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][0] *
> -                                                 
> s->dca_chan[k].subband_samples_hist[l][3] +
> -                                                 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][1] *
> -                                                 
> s->dca_chan[k].subband_samples_hist[l][2] +
> -                                                 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][2] *
> -                                                 
> s->dca_chan[k].subband_samples_hist[l][1] +
> -                                                 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][3] *
> -                                                 
> s->dca_chan[k].subband_samples_hist[l][0]) *
> -                                                (1.0f / 8192);
> +                                              
> (int64_t)s->dca_chan[k].subband_samples_hist[l][3] +
> +                                              
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][1] *
> +                                              
> (int64_t)s->dca_chan[k].subband_samples_hist[l][2] +
> +                                              
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][2] *
> +                                              
> (int64_t)s->dca_chan[k].subband_samples_hist[l][1] +
> +                                              
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][3] *
> +                                              
> (int64_t)s->dca_chan[k].subband_samples_hist[l][0]) +
> +                                              (1 << 12) >> 13;
>                  for (m = 1; m < SAMPLES_PER_SUBBAND; m++) {
> -                    float sum = 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][0] *
> -                                subband_samples[l][m - 1];
> +                    int64_t sum = 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][0] *
> +                                  (int64_t)subband_samples[l][m - 1];
>                      for (n = 2; n <= 4; n++)
>                          if (m >= n)
>                              sum += 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][n - 1] *
> -                                   subband_samples[l][m - n];
> +                                   (int64_t)subband_samples[l][m - n];
>                          else if (s->predictor_history)
>                              sum += 
> ff_dca_adpcm_vb[s->dca_chan[k].prediction_vq[l]][n - 1] *
> -                                   s->dca_chan[k].subband_samples_hist[l][m 
> - n + 4];
> -                    subband_samples[l][m] += sum * 1.0f / 8192;
> +                                   
> (int64_t)s->dca_chan[k].subband_samples_hist[l][m - n + 4];
> +                    subband_samples[l][m] += (int)(sum + (1 << 12) >> 13);
>                  }
>              }
>  
> @@ -914,17 +926,22 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>           * Decode VQ encoded high frequencies
>           */
>          if (s->audio_header.subband_activity[k] > 
> s->audio_header.vq_start_subband[k]) {
> +            int i, j;
> +
>              if (!s->debug_flag & 0x01) {
>                  av_log(s->avctx, AV_LOG_DEBUG,
>                         "Stream with high frequencies VQ coding\n");
>                  s->debug_flag |= 0x01;
>              }
>  
> -            s->dcadsp.decode_hf(subband_samples, s->dca_chan[k].high_freq_vq,
> -                                ff_dca_high_freq_vq, subsubframe * 
> SAMPLES_PER_SUBBAND,
> -                                s->dca_chan[k].scale_factor,
> -                                s->audio_header.vq_start_subband[k],
> -                                s->audio_header.subband_activity[k]);
> +            // this should be SIMDified

please move it to dcadsp.c and call it through a function pointer then

> +            for (j = s->audio_header.vq_start_subband[k]; j < 
> s->audio_header.subband_activity[k]; j++) {
> +                /* 1 vector -> 32 sampjes but we only need the 8 samples
> +                 * for this subsubframe. */
> +                const int8_t *ptr = 
> &ff_dca_high_freq_vq[s->dca_chan[k].high_freq_vq[j]][subsubframe * 
> SAMPLES_PER_SUBBAND];
> +                for (i = 0; i < 8; i++)
> +                    subband_samples[j][i] = ptr[i] * 
> s->dca_chan[k].scale_factor[j][0] + 8 >> 4;
> +            }
>          }
>      }
>  
> @@ -942,8 +959,14 @@ static int dca_subsubframe(DCAContext *s, int 
> base_channel, int block_index)
>  static int dca_filter_channels(DCAContext *s, int block_index, int upsample)
>  {
>      int k;
> +    float param[DCA_SUBBANDS];
> +
> +    for (k = 0; k < DCA_SUBBANDS; k++)
> +        param[k] = 1;

this is a little pointless, we don't seem to have SIMD code for int32_t 
to float conversion but that is no good reason to multiply by 1.0

>  
>      if (upsample) {
> +        LOCAL_ALIGNED_16(float, samples, [64], [SAMPLES_PER_SUBBAND]);
> +
>          if (!s->qmf64_table) {
>              s->qmf64_table = qmf64_precompute();
>              if (!s->qmf64_table)
> @@ -952,21 +975,33 @@ static int dca_filter_channels(DCAContext *s, int 
> block_index, int upsample)
>  
>          /* 64 subbands QMF */
>          for (k = 0; k < s->audio_header.prim_channels; k++) {
> -            float (*subband_samples)[SAMPLES_PER_SUBBAND] = 
> s->dca_chan[k].subband_samples[block_index];
> +            int (*subband_samples)[SAMPLES_PER_SUBBAND] =
> +                s->dca_chan[k].subband_samples[block_index];
> +
> +            s->fmt_conv.int32_to_float_fmul_array8(&s->fmt_conv, samples[0],
> +                                                   subband_samples[0], param,
> +                                                   64 * SAMPLES_PER_SUBBAND);

int32_to_float_fmul_array8 can be used instead if the factors in param 
are all identical, better yet add int32_to_float to fmtconvert.c so that 
we can avoid the pointless multiplication.

>              if (s->channel_order_tab[k] >= 0)
> -                qmf_64_subbands(s, k, subband_samples,
> +                qmf_64_subbands(s, k, samples,
>                                  s->samples_chanptr[s->channel_order_tab[k]],
>                                  /* Upsampling needs a factor 2 here. */
>                                  M_SQRT2 / 32768.0);
>          }
>      } else {
>          /* 32 subbands QMF */
> +        LOCAL_ALIGNED_16(float, samples, [32], [SAMPLES_PER_SUBBAND]);
> +
>          for (k = 0; k < s->audio_header.prim_channels; k++) {
> -            float (*subband_samples)[SAMPLES_PER_SUBBAND] = 
> s->dca_chan[k].subband_samples[block_index];
> +            int (*subband_samples)[SAMPLES_PER_SUBBAND] =
> +                s->dca_chan[k].subband_samples[block_index];
> +
> +            s->fmt_conv.int32_to_float_fmul_array8(&s->fmt_conv, samples[0],
> +                                                   subband_samples[0], param,
> +                                                   32 * SAMPLES_PER_SUBBAND);

same here

>  
>              if (s->channel_order_tab[k] >= 0)
> -                qmf_32_subbands(s, k, subband_samples,
> +                qmf_32_subbands(s, k, samples,
>                                  s->samples_chanptr[s->channel_order_tab[k]],
>                                  M_SQRT1_2 / 32768.0);
>          }

a 20% slow down in decoding performance looks high for this change.  Did 
you ran the tests on a 32-bit or 64-bit system?

Janne
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 2/2] dca: change the core to work with integer coefficients.

Reply via email to