[FFmpeg-cvslog] [ffmpeg] 01/06: aacenc_tns: reengineer coefficient derivation

Lynne via ffmpeg-cvslog Wed, 01 Jul 2026 05:07:08 -0700

This is an automated email from the git hooks/post-receive script.

Git pushed a commit to branch master
in repository ffmpeg.


commit 1298ce9eda515b718a77e9a77bbe1f5d434b1408
Author:     Lynne <[email protected]>
AuthorDate: Wed Jun 10 01:44:49 2026 +0900
Commit:     Lynne <[email protected]>
CommitDate: Wed Jul 1 21:05:22 2026 +0900

    aacenc_tns: reengineer coefficient derivation
    
    The TNS tool needed a full rewrite.
    
    1.) PNS cannot be used with M/S or I/S or TNS. The coding tools
    interfere with one another.
    
    2.) The coeffs were windowed, which was wrong.
    
    3.) The applied filter did not match what the spec required.
---
 libavcodec/aacenc_tns.c | 185 +++++++++++++++++++++++++++++++++++-------------
 libavcodec/lpc.c        |   8 ++-
 libavcodec/lpc.h        |   2 +-
 3 files changed, 140 insertions(+), 55 deletions(-)

diff --git a/libavcodec/aacenc_tns.c b/libavcodec/aacenc_tns.c
index 1e71c658c4..a45ca35f7d 100644
--- a/libavcodec/aacenc_tns.c
+++ b/libavcodec/aacenc_tns.c
@@ -41,9 +41,13 @@
 /* We really need the bits we save here elsewhere */
 #define TNS_ENABLE_COEF_COMPRESSION
 
-/* TNS will only be used if the LPC gain is within these margins */
-#define TNS_GAIN_THRESHOLD_LOW      1.4f
-#define TNS_GAIN_THRESHOLD_HIGH     1.16f*TNS_GAIN_THRESHOLD_LOW
+/* Apple-derived TNS: weighted-spectrum predictor, accepted only if the 
measured
+ * post-quantization prediction gain clears a block-type-dependent bar (Apple 
RE). */
+#define TNS_PREDGAIN_GATE   1.4f    /* first gate: predicted LPC gain */
+#define TNS_PG_C1_LONG      1.4f    /* min measured gain, long blocks */
+#define TNS_PG_C1_SHORT     2.2f    /* min measured gain, short blocks */
+#define TNS_PG_CLAMP        6.0f    /* upper bound: poles near unit circle → 
noise blowup */
+#define TNS_WEIGHT_FLOOR    0.01f   /* per-bin masking floor for the weighted 
spectrum */
 
 static inline int compress_coeffs(int *coef, int order, int c_bits)
 {
@@ -62,11 +66,7 @@ static inline int compress_coeffs(int *coef, int order, int 
c_bits)
     return 1;
 }
 
-/**
- * Encode TNS data.
- * Coefficient compression is simply not lossless as it should be
- * on any decoder tested and as such is not active.
- */
+/** Encode TNS data. */
 void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
 {
     TemporalNoiseShaping *tns = &sce->tns;
@@ -98,15 +98,28 @@ void ff_aac_encode_tns_info(AACEncContext *s, 
SingleChannelElement *sce)
     }
 }
 
+/* Cap the TNS band range at the first PNS band to avoid TNS+PNS conflicts. */
+static int tns_max_nonpns(const SingleChannelElement *sce, int mmm)
+{
+    for (int w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
+        for (int g = 0; g < mmm; g++)
+            if (sce->band_type[w*16+g] == NOISE_BT) { mmm = g; break; }
+    return mmm;
+}
+
 /* Apply TNS filter */
 void ff_aac_apply_tns(AACEncContext *s, SingleChannelElement *sce)
 {
     TemporalNoiseShaping *tns = &sce->tns;
     IndividualChannelStream *ics = &sce->ics;
     int w, filt, m, i, top, order, bottom, start, end, size, inc;
-    const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
+    const int mmm = tns_max_nonpns(sce, FFMIN(ics->tns_max_bands, 
ics->max_sfb));
     float lpc[TNS_MAX_ORDER];
 
+    /* TNS predicts from the post-M/S and post-I/S coefficients. */
+    float hist[1024];
+    memcpy(hist, sce->coeffs, sizeof(hist));
+
     for (w = 0; w < ics->num_windows; w++) {
         bottom = ics->num_swb;
         for (filt = 0; filt < tns->n_filt[w]; filt++) {
@@ -134,7 +147,7 @@ void ff_aac_apply_tns(AACEncContext *s, 
SingleChannelElement *sce)
             /* AR filter */
             for (m = 0; m < size; m++, start += inc) {
                 for (i = 1; i <= FFMIN(m, order); i++) {
-                    sce->coeffs[start] += lpc[i-1]*sce->pcoeffs[start - i*inc];
+                    sce->coeffs[start] += lpc[i-1]*hist[start - i*inc];
                 }
             }
         }
@@ -161,9 +174,8 @@ static inline void quantize_coefs(double *coef, int *idx, 
float *lpc, int order,
 void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
 {
     TemporalNoiseShaping *tns = &sce->tns;
-    int w, g, count = 0;
-    double gain, coefs[MAX_LPC_ORDER];
-    const int mmm = FFMIN(sce->ics.tns_max_bands, sce->ics.max_sfb);
+    int w, count = 0;
+    const int mmm = tns_max_nonpns(sce, FFMIN(sce->ics.tns_max_bands, 
sce->ics.max_sfb));
     const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE;
     const int c_bits = is8 ? TNS_Q_BITS_IS8 == 4 : TNS_Q_BITS == 4;
     const int sfb_start = av_clip(tns_min_sfb[is8][s->samplerate_index], 0, 
mmm);
@@ -174,56 +186,127 @@ void ff_aac_search_for_tns(AACEncContext *s, 
SingleChannelElement *sce)
     const int sfb_len = sfb_end - sfb_start;
     const int coef_len = sce->ics.swb_offset[sfb_end] - 
sce->ics.swb_offset[sfb_start];
     const int n_filt = is8 ? 1 : order != TNS_MAX_ORDER ? 2 : 3;
+    const int ord_g  = order / n_filt;
+
+    /* Apple's accept bar (minimum measured prediction gain): higher on short 
blocks,
+     * where a weak filter's shaped-noise tail spreads across the 50% overlap. 
*/
+    const float c1 = is8 ? TNS_PG_C1_SHORT : TNS_PG_C1_LONG;
+    FFPsyBand *const psy_bands = &s->psy.ch[s->cur_channel].psy_bands[0];
 
     if (coef_len <= 0 || sfb_len <= 0) {
         sce->tns.present = 0;
         return;
     }
 
+    /* time-domain window length backing one coding window: a long MDCT block 
is
+     * fed 2048 windowed samples (current 1024 + overlap), each short block 
256. */
+    const int tlen = is8 ? 256 : 2048;
+
     for (w = 0; w < sce->ics.num_windows; w++) {
-        float en[4] = {0.0f, 0.0f, 0.0f, 0.0f};
-        int oc_start = 0;
-        int coef_start = sce->ics.swb_offset[sfb_start];
-
-        if (n_filt == 2) {
-            for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
-                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
-                    if (g > sfb_start + (sfb_len/2))
-                        en[1] += band->energy; /* End */
-                    else
-                        en[0] += band->energy; /* Start */
-            }
-            en[2] = en[0];
-        } else {
-            for (g = sfb_start; g < sce->ics.num_swb && g <= sfb_end; g++) {
-                FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[w*16+g];
-                    if (g > sfb_start + (sfb_len/2) + (sfb_len/4))
-                        en[2] += band->energy; /* End */
-                    else if (g > sfb_start + (sfb_len/2) - (sfb_len/4))
-                        en[1] += band->energy; /* Middle */
-                    else
-                        en[0] += band->energy; /* Start */
+        int filt, any = 0;
+
+        /* The filter gets ran in the direction of the signal's *temporal* 
energy,
+         * so the quantization noise stays in the loud masked part rather than 
spilling
+         * into the quiet part. */
+        const float *tw = sce->ret_buf + w*tlen;
+        float e_early = 0.0f, e_late = 0.0f;
+        int ti;
+        for (ti = 0; ti < tlen/2; ti++)
+            e_early += tw[ti]*tw[ti];
+        for (; ti < tlen; ti++)
+            e_late += tw[ti]*tw[ti];
+        const int tdir = e_early > e_late;
+
+        /* Walk the frequency regions exactly as the decoder does: filter 0 is 
the
+         * topmost band region, each subsequent filter covers the next region 
down,
+         * clamped to mmm. Each filter gets its own LPC over its own region. */
+        int top_sfb = sce->ics.num_swb;
+        for (filt = 0; filt < n_filt; filt++) {
+            double coefs[MAX_LPC_ORDER];
+            float wspec[1024], tmp[1024], lpc_q[TNS_MAX_ORDER];
+            int len_sfb = (filt == n_filt - 1) ? sfb_len - 
filt*(sfb_len/n_filt)
+                                               : sfb_len/n_filt;
+            int bot_sfb = FFMAX(0, top_sfb - len_sfb);
+            int g_lo = FFMIN(bot_sfb, mmm), g_hi = FFMIN(top_sfb, mmm);
+            int c_lo = sce->ics.swb_offset[g_lo];
+            int c_hi = sce->ics.swb_offset[g_hi];
+            int clen = c_hi - c_lo;
+            const int dir = slant != 2 ? slant : tdir;
+            float gain, orig_e = 0.0f, filt_e = 0.0f;
+            int m, i, g, inc, st;
+
+            tns->length[w][filt] = len_sfb;
+            tns->order[w][filt]  = 0;     /* default: region carries no filter 
*/
+            top_sfb = bot_sfb;
+
+            if (clen <= 2*ord_g)          /* too short for a stable 
order-ord_g LPC */
+                continue;
+
+            /* Fit LPC on the perceptually-weighted spectrum X/sqrt(thr), 
floored
+             * to avoid a near-zero threshold blowing up a single bin (Apple). 
*/
+            {
+                float maxrms = 0.0f, floorrms;
+                int k;
+                for (g = g_lo; g < g_hi; g++) {
+                    int s0 = sce->ics.swb_offset[g], s1 = 
sce->ics.swb_offset[g+1];
+                    float rms = sqrtf(FFMAX(psy_bands[w*16 + g].threshold, 
0.0f) /
+                                      FFMAX(s1 - s0, 1));
+                    maxrms = FFMAX(maxrms, rms);
+                }
+                floorrms = FFMAX(maxrms * TNS_WEIGHT_FLOOR, 1e-9f);
+                for (g = g_lo; g < g_hi; g++) {
+                    int s0 = sce->ics.swb_offset[g], s1 = 
sce->ics.swb_offset[g+1];
+                    float rms = sqrtf(FFMAX(psy_bands[w*16 + g].threshold, 
0.0f) /
+                                      FFMAX(s1 - s0, 1));
+                    float wgt = 1.0f / FFMAX(rms, floorrms);
+                    for (k = s0; k < s1; k++)
+                        wspec[k - c_lo] = sce->coeffs[w*128 + k] * wgt;
+                }
+                /* Short blocks: unwindowed fit; Hann window zeros the edges 
of the
+                 * tiny region, wrecking the LPC. Long blocks keep the window. 
*/
+                gain = ff_lpc_calc_ref_coefs_f(&s->lpc, wspec, clen, ord_g, 
coefs, !is8);
             }
-            en[3] = en[0];
-        }
+            /* Reject below the first gate and above the clamp (poles near 
unit circle). */
+            if (!isfinite(gain) || gain < TNS_PREDGAIN_GATE || gain > 
TNS_PG_CLAMP)
+                continue;
+            /* Negate: ff_lpc_calc_ref_coefs_f sign convention is opposite to 
what
+             * ff_aac_apply_tns's MA filter needs; fed unnegated, it 
anti-whitens. */
+            for (i = 0; i < ord_g; i++)
+                coefs[i] = -coefs[i];
 
-        /* LPC */
-        gain = ff_lpc_calc_ref_coefs_f(&s->lpc, &sce->coeffs[w*128 + 
coef_start],
-                                       coef_len, order, coefs);
+            /* Quantize, then build the decoder's direct-form LPC. */
+            quantize_coefs(coefs, tns->coef_idx[w][filt], tns->coef[w][filt],
+                           ord_g, c_bits);
+            compute_lpc_coefs(tns->coef[w][filt], 0, ord_g, lpc_q, 0, 0, 0, 
NULL);
 
-        if (!order || !isfinite(gain) || gain < TNS_GAIN_THRESHOLD_LOW || gain 
> TNS_GAIN_THRESHOLD_HIGH)
-            continue;
+            /* Apply the quantized filter to the weighted spectrum and measure 
gain. */
+            const float *msrc = wspec;
+            inc = dir ? -1 : 1;
+            st  = dir ? clen - 1 : 0;
+            for (m = 0; m < clen; m++) {
+                int idx = st + m*inc;
+                float acc = msrc[idx];
+                for (i = 1; i <= FFMIN(m, ord_g); i++)
+                    acc += lpc_q[i-1] * msrc[idx - i*inc];
+                tmp[idx] = acc;
+            }
+            for (m = 0; m < clen; m++) {
+                orig_e += msrc[m]*msrc[m];
+                filt_e += tmp[m]*tmp[m];
+            }
+            filt_e = FFMAX(filt_e, 1e-9f);
+
+            /* Keep only if measured post-quantization gain clears C1 (Apple's 
outcome gate). */
+            if (orig_e < c1*filt_e)
+                continue;
 
-        tns->n_filt[w] = n_filt;
-        for (g = 0; g < tns->n_filt[w]; g++) {
-            tns->direction[w][g] = slant != 2 ? slant : en[g] < en[g + 1];
-            tns->order[w][g] = order/tns->n_filt[w];
-            tns->length[w][g] = sfb_len/tns->n_filt[w];
-            quantize_coefs(&coefs[oc_start], tns->coef_idx[w][g], 
tns->coef[w][g],
-                            tns->order[w][g], c_bits);
-            oc_start += tns->order[w][g];
+            tns->order[w][filt] = ord_g;
+            tns->direction[w][filt] = dir;
+            any = 1;
         }
-        count++;
+        tns->n_filt[w] = any ? n_filt : 0;
+        if (any)
+            count++;
     }
     sce->tns.present = !!count;
 }
diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c
index 38c78d9521..74a909a0af 100644
--- a/libavcodec/lpc.c
+++ b/libavcodec/lpc.c
@@ -207,16 +207,18 @@ int ff_lpc_calc_ref_coefs(LPCContext *s,
 }
 
 double ff_lpc_calc_ref_coefs_f(LPCContext *s, const float *samples, int len,
-                               int order, double *ref)
+                               int order, double *ref, int apply_window)
 {
     int i;
     double signal = 0.0f, avg_err = 0.0f;
     double autoc[MAX_LPC_ORDER+1] = {0}, error[MAX_LPC_ORDER+1] = {0};
     const double a = 0.5f, b = 1.0f - a;
 
-    /* Apply windowing */
+    /* Apply windowing. apply_window == 0 uses a rectangular (unity) window: a 
Hann
+     * taper zeros the edges, which over a very short region (e.g. a 
short-block TNS
+     * region of a few dozen lines) discards most of the data and wrecks the 
fit. */
     for (i = 0; i <= len / 2; i++) {
-        double weight = a - b*cos((2*M_PI*i)/(len - 1));
+        double weight = apply_window ? a - b*cos((2*M_PI*i)/(len - 1)) : 1.0;
         s->windowed_samples[i] = weight*samples[i];
         s->windowed_samples[len-1-i] = weight*samples[len-1-i];
     }
diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h
index 6d62707a59..2b1bfad93c 100644
--- a/libavcodec/lpc.h
+++ b/libavcodec/lpc.h
@@ -100,7 +100,7 @@ int ff_lpc_calc_ref_coefs(LPCContext *s,
                           const int32_t *samples, int order, double *ref);
 
 double ff_lpc_calc_ref_coefs_f(LPCContext *s, const float *samples, int len,
-                               int order, double *ref);
+                               int order, double *ref, int apply_window);
 
 /**
  * Initialize LPCContext.

_______________________________________________
ffmpeg-cvslog mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[FFmpeg-cvslog] [ffmpeg] 01/06: aacenc_tns: reengineer coefficient derivation

Reply via email to