Re: [FFmpeg-soc] AAC TNS encoder

Alex Converse Fri, 12 Jun 2009 14:25:41 -0700

On Fri, Jun 12, 2009 at 3:42 PM, Alex Converse<[email protected]> wrote:
> The following patchset adds TNS support to the AAC encoder.
>
> It's not 100% ready to be integrated.
>
> Patch 0 adds PARCOR code from Flake and doesn't have much to do with AAC.
> Patch 1 moves some AAC stuff around but doesn't ahev much to do with encoding.
> Patch 2 adds TNS support.
> Patch 3 adds a TNS coefficient compressor that shouldn't effect the
> output of the file after encoding and decoding but does for reasons
> taht stump me at the moment. Any help on this one would be greatly
> appreciated.
>


The tables have been dealt with, here are cleaner versions of patches 1-3.

--Alex

commit 96712c3d9652578eee809ffc1354829b18647942
Author: Alex Converse <[email protected]>
Date:   Fri Jun 12 10:48:13 2009 -0400

    Routine to calculate PARCOR reflection coefficients and prediction gain.
    Ported from Flake, written by Justin Ruggles.

diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h
index 05a1dee..65c250c 100644
--- a/libavcodec/lpc.h
+++ b/libavcodec/lpc.h
@@ -99,4 +99,34 @@ static inline int compute_lpc_coefs(const LPC_TYPE *autoc, int max_order,
     return 0;
 }
 
+/**
+ * Schur recursion.
+ * Produces LPC coefficients from autocorrelation data.
+ * @return prediction gain
+ */
+static float
+compute_parcor_coefs(const float *autoc, int max_order,
+                      float *ref)
+{
+    int i, j;
+    double error;
+    double gen[2][MAX_LPC_ORDER];
+
+    // Schur recursion
+    for(i=0; i<max_order; i++) gen[0][i] = gen[1][i] = autoc[i+1];
+    error = autoc[0];
+    ref[0] = -gen[1][0] / error;
+    error += gen[1][0] * ref[0];
+    for(i=1; i<max_order; i++) {
+        for(j=0; j<max_order-i; j++) {
+            gen[1][j] = gen[1][j+1] + ref[i-1] * gen[0][j];
+            gen[0][j] = gen[1][j+1] * ref[i-1] + gen[0][j];
+        }
+        ref[i] = -gen[1][0] / error;
+        error += gen[1][0] * ref[i];
+    }
+
+    return autoc[0]/error;
+}
+
 #endif /* AVCODEC_LPC_H */

commit bcc3ab4c19a04743ff0579f3c3825e0b03edeb85
Author: Alex Converse <[email protected]>
Date:   Fri Jun 12 11:00:31 2009 -0400

    Rudimentary TNS support based on ISO 13818-7 Annex C

diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index cd9eb8a..177ab08 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -152,6 +152,9 @@ typedef struct {
     int direction[8][4];
     int order[8][4];
     float coef[8][4][TNS_MAX_ORDER];
+    uint8_t coef_res[8];
+    uint8_t coef_compress[8][4];
+    int8_t icoef[8][4][TNS_MAX_ORDER];
 } TemporalNoiseShaping;
 
 /**
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index a4ed743..dde0e49 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -1,6 +1,7 @@
 /*
  * AAC coefficients encoder
  * Copyright (C) 2008-2009 Konstantin Shishkov
+ * Copyright (C) 2009 Alex Converse
  *
  * This file is part of FFmpeg.
  *
@@ -35,6 +36,7 @@
 #include "aac.h"
 #include "aacenc.h"
 #include "aactab.h"
+#include "lpc.h"
 
 /** bits needed to code codebook run value for long windows */
 static const uint8_t run_value_bits_long[64] = {
@@ -1242,29 +1244,158 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe, const float lam
     }
 }
 
+static const uint8_t tns_start_band_1024[12] =
+{ 11, 12, 15, 16, 17, 20, 25, 26, 24, 28, 30, 31 };
+static const uint8_t tns_start_band_128[12] =
+{ 2, 2, 2, 3, 3, 4, 6, 6, 8, 10, 10, 12 };
+
+static void autocorr(float * out, const float * in, int in_size, int out_size)
+{
+    int i, j;
+    float tmp;
+
+    for (i = 0; i < out_size; i++) {
+        tmp = 0.0f;
+        for (j = 0; j < in_size-i; j++) {
+            tmp += in[j]*in[i+j];
+        }
+        out[i] = tmp;
+    }
+}
+
+static void tns_quantize(int coef_res, int8_t* index, float* r, int order)
+{
+    const double iqfac = ((1 << (coef_res-1)) - 0.5) / (M_PI/2.0);
+    const double iqfac_m = ((1 << (coef_res-1)) + 0.5) / (M_PI/2.0);
+    int i;
+
+    /* Reflection coefficient quantization */
+    for (i = 0; i < order; i++) {
+        index[i] = (int)((asin( r[i] ) * ((r[i] >= 0) ? iqfac : iqfac_m))+0.5);
+        r[i] = sin(index[i] / ((index[i] >= 0) ? iqfac : iqfac_m));
+    }
+}
+
+static int tns_truncate_order(float* rq, int order)
+{
+    int i;
+    const float TRUNC_THRESH = 0.1f;
+
+    for (i = order-1; i >= 0; i--) {
+         rq[i] = (fabsf(rq[i]) > TRUNC_THRESH) ? rq[i]: 0.0f;
+         if (rq[i] != 0.0f)
+             return i+1;
+    }
+    return 0;
+}
+
+static void tns_step_up(int order, float* a, const float *ref)
+{
+    float b[TNS_MAX_ORDER+2];
+    int i, m;
+
+    a[0] = b[0] = 1.0f;
+    for (m = 1; m <= order; m++) {
+        for (i = 1; i < m; i++) {
+            b[i] = a[i] + ref[m-1] * a[m-i];
+        }
+        for (i = 1; i < m; i++) {
+            a[i] = b[i];
+        }
+        a[m] = ref[m-1];
+    }
+}
+
+static void tns_ma_filter(float * coef, const float * a, int len, int order)
+{
+    int i, m;
+    const int RING = 31;
+    float buf[RING+1];
+
+    for (m = 0; m < len; m++) {
+        buf[m&RING] = coef[m];
+        for (i = 1; i <= FFMIN(order, m); i++)
+            coef[m] += buf[(m - i)&RING] * a[i];
+    }
+}
+
+static void tns_encode_annex_c(AVCodecContext *avctx, AACEncContext *s,
+                               SingleChannelElement * sce)
+{
+
+    int max_bands, start_band, stop_band, order, coef_res, w;
+    float gain;
+    float ref[TNS_MAX_ORDER];
+    float   a[TNS_MAX_ORDER+1];
+    const float GAIN_THRESH = 1.4f;
+    TemporalNoiseShaping    * tns = &sce->tns;
+    IndividualChannelStream * ics = &sce->ics;
+
+    tns->present = 0;
+
+    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+        max_bands  =  ff_tns_max_bands_128[s->samplerate_index];
+        start_band =    tns_start_band_128[s->samplerate_index];
+        order      =  7;
+    } else {
+        max_bands  = ff_tns_max_bands_1024[s->samplerate_index];
+        start_band =   tns_start_band_1024[s->samplerate_index];
+        order      = 12;
+    }
+    stop_band  = ics->num_swb;
+    start_band = FFMAX(start_band, stop_band - max_bands);
+
+    for (w = 0; w < ics->num_windows; w++) {
+        int idx = w * 128 + ics->swb_offset[start_band];
+        int len = ics->swb_offset[stop_band] - ics->swb_offset[start_band];
+        autocorr(a, &sce->coeffs[idx], len, order+1);
+        gain = compute_parcor_coefs(a, order, ref);
+
+        if (gain > GAIN_THRESH) {
+            coef_res = 4;
+            tns->present = 1;
+            tns->n_filt[w] = 1;
+            tns->coef_res[w] = coef_res;
+            tns->length[w][0] = stop_band - start_band;
+            tns->direction[w][0] = 0;
+            tns_quantize(coef_res, &tns->icoef[w][0][0], ref, order);
+            tns->order[w][0] = tns_truncate_order(ref, order);
+            tns->coef_compress[w][0] = 0;
+            tns_step_up(tns->order[w][0], a, ref);
+            tns_ma_filter(&sce->coeffs[idx], a, len, tns->order[w][0]);
+        } else {
+            tns->n_filt[w] = 0;
+        }
+    }
+}
+
 AACCoefficientsEncoder ff_aac_coders[] = {
     {
         search_for_quantizers_faac,
         encode_window_bands_info_fixed,
         quantize_and_encode_band,
+        tns_encode_annex_c,
 //        search_for_ms,
     },
     {
         search_for_quantizers_anmr,
         encode_window_bands_info,
         quantize_and_encode_band,
+        tns_encode_annex_c,
 //        search_for_ms,
     },
     {
         search_for_quantizers_twoloop,
         encode_window_bands_info,
         quantize_and_encode_band,
+        tns_encode_annex_c,
 //        search_for_ms,
     },
     {
         search_for_quantizers_fast,
         encode_window_bands_info,
         quantize_and_encode_band,
+        tns_encode_annex_c,
 //        search_for_ms,
     },
 };
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index b4fab18..72c25bc 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -360,6 +360,14 @@ static void adjust_frame_information(AACEncContext *apc, ChannelElement *cpe, in
 }
 
 /**
+ * Encode tns but don't write it to the bitstream.
+ */
+static void encode_tns(AVCodecContext * avctx, AACEncContext *s, SingleChannelElement *sce)
+{
+    s->coder->encode_tns(avctx, s, sce);
+}
+
+/**
  * Encode scalefactor band coding type.
  */
 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
@@ -435,6 +443,36 @@ static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
     }
 }
 
+static void put_tns(PutBitContext * pb, SingleChannelElement * sce)
+{
+    TemporalNoiseShaping * tns = &sce->tns;
+    const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
+    const int num_windows = sce->ics.num_windows;
+    int w, filt, i, coef_len;
+
+    put_bits(pb, 1, tns->present);
+    if (tns->present) {
+        for (w = 0; w < num_windows; w++) {
+            put_bits(pb, 2 - is8, tns->n_filt[w]);
+            if (tns->n_filt[w]) {
+                put_bits(pb, 1, tns->coef_res[w] - 3);
+                for (filt = 0; filt < tns->n_filt[w]; filt++) {
+                    put_bits(pb, 6 - 2 * is8, tns->length[w][filt]);
+                    put_bits(pb, 5 - 2 * is8, tns->order [w][filt]);
+                    if (tns->order[w][filt]) {
+                        put_bits(pb, 1, tns->direction    [w][filt]);
+                        put_bits(pb, 1, tns->coef_compress[w][filt]);
+                        coef_len = tns->coef_res[w] - tns->coef_compress[w][filt];
+                        for (i = 0; i < tns->order[w][filt]; i++) {
+                            put_sbits(pb, coef_len, tns->icoef[w][filt][i]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
 /**
  * Encode one channel of audio data.
  */
@@ -443,9 +481,10 @@ static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, Si
     put_bits(&s->pb, 8, sce->sf_idx[0]);
     if(!common_window) put_ics_info(s, &sce->ics);
     encode_band_info(s, sce);
+    encode_tns(avctx, s, sce);
     encode_scale_factors(avctx, s, sce);
     encode_pulses(s, &sce->pulse);
-    put_bits(&s->pb, 1, 0); //tns
+    put_tns(&s->pb, sce);
     put_bits(&s->pb, 1, 0); //ssr
     encode_spectral_coeffs(s, sce);
     return 0;
@@ -527,6 +566,9 @@ static int aac_encode_frame(AVCodecContext *avctx,
             ics->num_windows        = wi[j].num_windows;
             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
             ics->num_swb            = s->psy.num_bands[ics->num_windows == 8];
+            ics->swb_offset         = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ?
+                                      ff_swb_offset_128 [s->samplerate_index] :
+                                      ff_swb_offset_1024[s->samplerate_index];
             for(k = 0; k < ics->num_windows; k++)
                 ics->group_len[k] = wi[j].grouping[k];
 
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 9dc1c78..580934c 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -39,6 +39,8 @@ typedef struct AACCoefficientsEncoder{
                                      int win, int group_len, const float lambda);
     void (*quantize_and_encode_band)(PutBitContext *pb, const float *in, int size,
                                      int scale_idx, int cb, const float lambda);
+    void (*encode_tns)(AVCodecContext *avctx, struct AACEncContext *s,
+                                  SingleChannelElement *sce);
     void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe, const float lambda);
 }AACCoefficientsEncoder;

commit d800e7a2cb6eaee6d4d1e46eff03e6c6f6aadd2a
Author: Alex Converse <[email protected]>
Date:   Fri Jun 12 12:58:45 2009 -0400

    TNS coefficient compressor

diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index dde0e49..7011b43 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -1289,6 +1289,19 @@ static int tns_truncate_order(float* rq, int order)
     return 0;
 }
 
+static int tns_compress(int coef_res, int8_t* index, int order)
+{
+    int i;
+    int shift = coef_res - 2;
+    int head;
+    for (i = 0; i < order; i++) {
+        head = index[i] >> shift;
+        if (head > 0 || head < -1)
+            return 0;
+    }
+    return 1;
+}
+
 static void tns_step_up(int order, float* a, const float *ref)
 {
     float b[TNS_MAX_ORDER+2];
@@ -1360,7 +1373,7 @@ static void tns_encode_annex_c(AVCodecContext *avctx, AACEncContext *s,
             tns->direction[w][0] = 0;
             tns_quantize(coef_res, &tns->icoef[w][0][0], ref, order);
             tns->order[w][0] = tns_truncate_order(ref, order);
-            tns->coef_compress[w][0] = 0;
+            tns->coef_compress[w][0] = tns_compress(coef_res, &tns->icoef[w][0][0], tns->order[w][0]);
             tns_step_up(tns->order[w][0], a, ref);
             tns_ma_filter(&sce->coeffs[idx], a, len, tns->order[w][0]);
         } else {

_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc

Re: [FFmpeg-soc] AAC TNS encoder

Reply via email to