PR #23430 opened by Lynne URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23430 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23430.patch
Matches Apple on Zimtohrli and ViSQOL. Beats fdk-aac conclusively. Exact benchmarks in a bit. >From 6ecaa5b91ef19ecf986cddb624c7287fec051c9d Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Wed, 10 Jun 2026 01:44:49 +0900 Subject: [PATCH 1/2] avcodec/aacenc_tns: relax the gain gate on short blocks TNS shapes quantization noise in time, so it is most useful on transients, which are coded as short blocks. The stock upper LPC-gain bound the strong temporal structure where TNS pays off, leaving audible pre-echo on those frames. So just relax the upper bound. --- libavcodec/aacenc_tns.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/libavcodec/aacenc_tns.c b/libavcodec/aacenc_tns.c index 1e71c658c4..8bc77af904 100644 --- a/libavcodec/aacenc_tns.c +++ b/libavcodec/aacenc_tns.c @@ -44,6 +44,10 @@ /* TNS will only be used if the LPC gain is within these margins */ #define TNS_GAIN_THRESHOLD_LOW 1.4f #define TNS_GAIN_THRESHOLD_HIGH 1.16f*TNS_GAIN_THRESHOLD_LOW +/* Short blocks (transients) relax the upper bound: TNS shapes quantization noise in + * time, so it pays off most on transients (coded as short blocks). 3.0x the low + * threshold, tuned by ear on transient material -- see ff_aac_search_for_tns. */ +#define TNS_GAIN_THRESHOLD_SHORT 3.0f*TNS_GAIN_THRESHOLD_LOW static inline int compress_coeffs(int *coef, int order, int c_bits) { @@ -64,8 +68,9 @@ static inline int compress_coeffs(int *coef, int order, int c_bits) /** * Encode TNS data. - * Coefficient compression is simply not lossless as it should be - * on any decoder tested and as such is not active. + * Coefficient compression (TNS_ENABLE_COEF_COMPRESSION) is active: compress_coeffs() + * only shifts a filter's indices when that shift is reversible, so it is lossless + * where applied, is signalled per filter by the coef_compress bit, and saves bits. */ void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce) { @@ -211,7 +216,12 @@ void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce) gain = ff_lpc_calc_ref_coefs_f(&s->lpc, &sce->coeffs[w*128 + coef_start], coef_len, order, coefs); - if (!order || !isfinite(gain) || gain < TNS_GAIN_THRESHOLD_LOW || gain > TNS_GAIN_THRESHOLD_HIGH) + /* The stock upper gain bound rejects exactly the strong temporal structure where + * TNS pays off; relax it for short blocks (transients) so they get TNS (perceptual + * win on pre-echo, confirmed by PEAQ) while long blocks (steady / tonal music) + * stay conservative. See TNS_GAIN_THRESHOLD_SHORT. */ + const float gain_high = is8 ? TNS_GAIN_THRESHOLD_SHORT : TNS_GAIN_THRESHOLD_HIGH; + if (!order || !isfinite(gain) || gain < TNS_GAIN_THRESHOLD_LOW || gain > gain_high) continue; tns->n_filt[w] = n_filt; -- 2.52.0 >From 20eadd5ae15f11e1ac90168b3d8abfac6ce6bdb1 Mon Sep 17 00:00:00 2001 From: Lynne <[email protected]> Date: Wed, 10 Jun 2026 14:46:00 +0900 Subject: [PATCH 2/2] aaccoder: add NMR-based coder --- libavcodec/aaccoder.c | 26 +++ libavcodec/aaccoder_nmr.h | 426 ++++++++++++++++++++++++++++++++++++++ libavcodec/aacenc.c | 9 + libavcodec/aacenc.h | 14 ++ libavcodec/aacencdsp.c | 32 ++- libavcodec/aacencdsp.h | 6 + 6 files changed, 511 insertions(+), 2 deletions(-) create mode 100644 libavcodec/aaccoder_nmr.h diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index 7f1c4cdcc1..eb4c0ca4b7 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -59,6 +59,7 @@ #define NOISE_LAMBDA_REPLACE 1.948f #include "libavcodec/aaccoder_trellis.h" +#include "libavcodec/aaccoder_nmr.h" typedef float (*quantize_and_encode_band_func)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, @@ -840,6 +841,18 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe) } } +static void search_for_is_nmr(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe) +{ + /* IS bitrate ceiling (bits/sample/channel), below it the shared lambda-driven + * search runs,above it IS is skipped. 1.5 keeps it for < ~144kbps stereo + * gates it out after the >= =192kpbs */ + const float maxbps = 1.5f; + float bps = (avctx->bit_rate > 0 && avctx->sample_rate && avctx->ch_layout.nb_channels) ? + (float)avctx->bit_rate / avctx->sample_rate / avctx->ch_layout.nb_channels : 0.0f; + if (bps == 0.f || bps < maxbps) /* bps 0 = VBR, rely on the search's own lambda gate */ + ff_aac_search_for_is(s, avctx, cpe); +} + const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { [AAC_CODER_TWOLOOP] = { search_for_quantizers_twoloop, @@ -867,4 +880,17 @@ const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { search_for_ms, ff_aac_search_for_is, }, + [AAC_CODER_NMR] = { + search_for_quantizers_nmr, + codebook_trellis_rate, + quantize_and_encode_band, + ff_aac_encode_tns_info, + ff_aac_apply_tns, + set_special_band_scalefactors, + NULL, /* PNS decided in the trellis (search_for_quantizers_nmr) */ + mark_pns, + ff_aac_search_for_tns, + search_for_ms, + search_for_is_nmr, + }, }; diff --git a/libavcodec/aaccoder_nmr.h b/libavcodec/aaccoder_nmr.h new file mode 100644 index 0000000000..7e6aef2aaa --- /dev/null +++ b/libavcodec/aaccoder_nmr.h @@ -0,0 +1,426 @@ +/* + * AAC encoder NMR (noise-to-mask ratio) scalefactor coder + * Copyright (c) 2026 Lynne <[email protected]> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * AAC encoder NMR scalefactor coder. + * + * Optimizes the same noise-to-mask objective as the two-loop coder, but with an + * optimal Viterbi search over scalefactors instead of a heuristic loop. For each + * coded band the per-scalefactor distortion/bits curve is precomputed, then a + * trellis over the (window-group, band) coding sequence minimizes + * sum_g = dist_g(sf_g)/threshold_g + + * lambda * (spectral_bits_g(sf_g) + scalefactor_differential_bits) + * with |sf_g - sf_{g-1}| <= SCALE_MAX_DIFF as a constraint, and lambda + * binary-searched so the coded size meets the per-frame bit budget + * + * Perceptual noise substitution (PNS) is integrated into the same objective: once + * the trellis settles on its operating lambda, each noise-like band (flagged by + * mark_pns) is offered a terminal "code as noise" candidate whose cost is + * nmr_pns + lambda*NMR_PNS_BITS. Because NMR_PNS_BITS is far below a band's spectral bit + * count, this candidate only wins when lambda is large, i.e. when the encoder is + * struggling to hold the bitrate. The bits freed by the chosen PNS bands are + * then re-spent by a second trellis pass over the remaining bands. + */ + +#ifndef AVCODEC_AACCODER_NMR_H +#define AVCODEC_AACCODER_NMR_H + +#include <float.h> +#include <string.h> +#include "libavutil/mathematics.h" +#include "mathops.h" +#include "avcodec.h" +#include "put_bits.h" +#include "aac.h" +#include "aacenc.h" +#include "aactab.h" +#include "aacenctab.h" + +/* differential scalefactor coding cost, clamped to the legal delta range */ +#define NMR_SFBITS(d) ff_aac_scalefactor_bits[av_clip((d) + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF)] + +#define NMR_ITERS 14 /* lambda binary-search iters */ +#define NMR_IFINE 9 /* fine-pass lambda iters */ +#define NMR_CITERS 7 /* coarse-pass lambda iters */ +#define NMR_COARSE 8 /* two-pass coarse->fine grid step, cuts the Viterbi ncand^2 with no + * quality loss, 0 disables it (single full-resolution pass) */ +#define NMR_STEP 1 /* fine-pass scalefactor candidate granularity */ + +#define NMR_PNS_BITS 9 /* approx cost in bits of signalling PNS */ + +/* only bands coded well above the masking floor (NMR > 4, genuine rate struggle/near-holes) + * become noise, tuned so noise-like content gains while tonal/music content sees no Zim/ViS + * regression (lower gates help noise more but harm music) */ +#define NMR_PNS_NDGATE 4.0f + +/* frame bitrate ceiling (bits/sample/channel) above which PNS is disabled, + * noise substitution only helps under rate distress, near transparency (~2.0 bits/sample, + * 96kbps mono) PNS is just worse. 1.5 keeps it to below 64k mono/144k stereo */ +#define NMR_PNS_MAXBPS 1.5f + +/** + * Viterbi over the coding sequence act[0..nact-1] (indices into the per-band + * curves nd/nb), with lambda binary-searched so the coded size ~ destbits. + * Fills chosen[band] for every band referenced by act. Returns the operating + * lambda. node cost = dist/threshold + lambda*spectral_bits; + * edge cost = lambda*sf_differential_bits; |delta sf| <= SCALE_MAX_DIFF hard. + */ +static float nmr_solve(AACEncContext *s, + const float (*nd)[NMR_NCAND], const int (*nb)[NMR_NCAND], + const int *blo, const int *bnc, int step, + const int *act, int nact, int destbits, int *chosen, + float lo_l, float hi_l, int iters) +{ + float dp[NMR_NCAND], dpp[NMR_NCAND], node[NMR_NCAND]; + float lamsf[2*SCALE_MAX_DIFF + 1]; /* lam*sfdiff bit cost, per lambda */ + uint8_t bp[128][NMR_NCAND]; + float lam = 1.0f; + + if (nact <= 0) + return lam; + + for (int it = 0; it < iters; it++) { + lam = sqrtf(lo_l * hi_l); + for (int i = 0; i <= 2*SCALE_MAX_DIFF; i++) + lamsf[i] = lam * ff_aac_scalefactor_bits[i]; /* edge cost for this lambda */ + + int b0 = act[0]; + for (int o = 0; o < bnc[b0]; o++) + dp[o] = nd[b0][o] + lam * nb[b0][o]; /* anchor band node cost */ + + for (int k = 1; k < nact; k++) { + int b = act[k], pb = act[k-1]; + memcpy(dpp, dp, sizeof(dp)); + for (int o = 0; o < bnc[b]; o++) + node[o] = nd[b][o] + lam * nb[b][o]; + /* dp[o] = node[o] + min_op(dpp[op] + edge cost) */ + s->aacdsp.nmr_trellis_step(dp, bp[k], dpp, node, lamsf, + bnc[b], bnc[pb], blo[b] - blo[pb], step, + SCALE_MAX_DIFF); + } + + /* backtrack */ + int beo = 0, b = act[nact-1]; + float bec = FLT_MAX; + for (int o = 0; o < bnc[b]; o++) + if (dp[o] < bec) { bec = dp[o]; beo = o; } + chosen[b] = beo; + for (int k = nact-1; k > 0; k--) + chosen[act[k-1]] = bp[k][chosen[act[k]]]; + + /* calc cost */ + int total = 0; + for (int k = 0; k < nact; k++) + total += nb[act[k]][chosen[act[k]]]; + for (int k = 1; k < nact; k++) + total += NMR_SFBITS((blo[act[k]]+chosen[act[k]]*step) - (blo[act[k-1]]+chosen[act[k-1]]*step)); + + if (it == iters - 1) + break; + + /* check if we went over budget, go coarser if we did */ + if (total > destbits) + lo_l = lam; + else + hi_l = lam; + } + return lam; +} + +/* Build one coded band's (dist/threshold, bits) cost curve, candidates sf = lo + o*step + * for o in [0,maxn), stopping when the band would drop (cb <= 0). Returns the bit count. */ +static int nmr_band_curve(AACEncContext *s, SingleChannelElement *sce, int w, int g, + int start, int lo, int step, int maxn, float invthr, + float maxval, float *nd_row, int *nb_row) +{ + int ncand = 0; + for (int o = 0; o < maxn && lo + o*step <= SCALE_MAX_POS; o++) { + int sf = lo + o*step, btot = 0, cb = find_min_book(maxval, sf); + float dist = 0.0f; + if (cb <= 0) + break; + for (int w2 = 0; w2 < sce->ics.group_len[w]; w2++) { + int bb; + dist += quantize_band_cost_cached(s, w + w2, g, sce->coeffs + start + w2*128, + s->scoefs + start + w2*128, sce->ics.swb_sizes[g], + sf, cb, 1.0f, INFINITY, &bb, NULL, 0); + btot += bb; + } + nd_row[ncand] = (dist - btot) * invthr; + nb_row[ncand] = btot; + ncand++; + } + return ncand; +} + +static void search_for_quantizers_nmr(AVCodecContext *avctx, + AACEncContext *s, + SingleChannelElement *sce, + const float lambda) +{ + int bch = ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->ch_layout.nb_channels); + int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / bch * (lambda / 120.f); + int allz = 0, cutoff = 1024, nbnd = 0; + + float thr[128]; + float pener[128]; /* band energy (for PNS noise target) */ + float pspread[128]; /* band tonality spread (1 = noise) */ + int minsf[128]; + float maxvals[128]; + + /* coded-band trellis state (indexed 0..nbnd-1) */ + int bidx[128]; /* sce band index (w*16+g) */ + int bw[128], bg[128], bst[128]; /* window group, swb, coef start per coded band */ + int blo[128]; /* finest candidate scalefactor */ + int bnc[128]; /* number of candidates */ + int chosen[128]; + int act[128]; /* active (non-PNS) band coding order */ + uint8_t is_pns[128]; /* trellis band coded as noise */ + + float (*nd)[NMR_NCAND] = s->nmr->nd; /* dist / threshold per candidate (heap) */ + int (*nb)[NMR_NCAND] = s->nmr->nb; /* spectral bits per candidate (heap) */ + + /* two-pass coarse->fine grid step (see NMR_COARSE), the lambda search runs on + * the cheap coarse grid, PASS 2 refines the winner at NMR_STEP granularity */ + const int cstep = NMR_COARSE > 0 ? NMR_COARSE : NMR_STEP; + + /* per-frame PNS enable: -aac_pns on and below the bitrate ceiling (CBR only; + * VBR has bit_rate 0 -> bps 0 -> the per-band nd gate governs). */ + float pns_thresh = (avctx->bit_rate > 0 && avctx->sample_rate && avctx->ch_layout.nb_channels) ? + (float)avctx->bit_rate / avctx->sample_rate / avctx->ch_layout.nb_channels : 0.f; + int pns_apply = s->options.pns && (pns_thresh < NMR_PNS_MAXBPS); + + if (s->psy.bitres.alloc >= 0) + destbits = s->psy.bitres.alloc * + (lambda / (avctx->global_quality ? avctx->global_quality : 120)); + destbits = FFMIN(destbits, 5800); + + /* cutoff frequency (kept in sync with twoloop's selection) */ + int wlen = 1024 / sce->ics.num_windows; + int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE) ? + (destbits * 1.5f * avctx->sample_rate / 1024) : + (avctx->bit_rate / avctx->ch_layout.nb_channels); + if (s->options.pns || s->options.intensity_stereo) + frame_bit_rate *= 1.15f; + + int bandwidth; + if (avctx->cutoff > 0) + bandwidth = avctx->cutoff; + else + bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate)); + cutoff = bandwidth * 2 * wlen / avctx->sample_rate; + + /* zero low-energy / out-of-band bands, accumulate per-band masking threshold, + * energy and tonality spread (the latter two drive the PNS decision) */ + for (int w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + int start = 0; + for (int g = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { + float uplim = 0.0f, ener = 0.0f, spread = 2.0f; + int nz = 0; + for (int w2 = 0; w2 < sce->ics.group_len[w]; w2++) { + FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; + ener += band->energy; + spread = FFMIN(spread, band->spread); + if (start >= cutoff || band->energy <= band->threshold || band->threshold == 0.0f) { + sce->zeroes[(w+w2)*16+g] = 1; + continue; + } + uplim += band->threshold; + nz = 1; + } + sce->zeroes[w*16+g] = !nz; + thr[w*16+g] = uplim; + pener[w*16+g] = ener; + pspread[w*16+g] = spread; + allz |= nz; + } + } + if (!allz) + return; + + s->aacdsp.abs_pow34(s->scoefs, sce->coeffs, 1024); + ff_quantize_band_cost_cache_init(s); + + /* finest codeable scalefactor and max value per band */ + for (int w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + int start = w*128; + for (int g = 0; g < sce->ics.num_swb; g++) { + maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs + start); + minsf[w*16+g] = maxvals[w*16+g] > 0 ? coef2minsf(maxvals[w*16+g]) : 0; + start += sce->ics.swb_sizes[g]; + } + } + + /* PASS 1: + * precompute each coded band's cost curve at the coarse candidate step + * (the lambda search runs on this cheap grid, PASS 2 refines the winner) */ + { + for (int w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + int start = w*128; + for (int g = 0; g < sce->ics.num_swb; g++) { + if (!sce->zeroes[w*16+g] && maxvals[w*16+g] > 0 && nbnd < 128) { + int lo = av_clip(minsf[w*16+g], 0, SCALE_MAX_POS); + float invthr = 1.0f / FFMAX(thr[w*16+g], 1e-9f); + int ncand = nmr_band_curve(s, sce, w, g, start, lo, cstep, NMR_NCAND, + invthr, maxvals[w*16+g], nd[nbnd], nb[nbnd]); + if (ncand == 0) { + sce->zeroes[w*16+g] = 1; /* nothing codeable -> leave to cleanup */ + } else { + bidx[nbnd] = w*16+g; + bw[nbnd] = w; + bg[nbnd] = g; + bst[nbnd] = start; + blo[nbnd] = lo; + bnc[nbnd] = ncand; + nbnd++; + } + } + start += sce->ics.swb_sizes[g]; + } + } + } + if (!nbnd) + return; + + /* solve the trellis over all coded bands, then offer PNS at the operating + * lambda and re-solve over the survivors with the freed budget */ + { + int nact = nbnd, pns_count = 0; + float lam; + + for (int b = 0; b < nbnd; b++) { + act[b] = b; + is_pns[b] = 0; + } + lam = nmr_solve(s, nd, nb, blo, bnc, cstep, act, nact, destbits, chosen, + 1e-9f, 1e4f, NMR_COARSE > 0 ? NMR_CITERS : NMR_ITERS); + + /* PASS 2: + * refine each band at full granularity (NMR_STEP) in a +/-cstep window + * around the coarse pick, then re-solve. Recovers single-pass quality while the + * lambda search stayed cheap on the coarse grid. */ + if (NMR_COARSE > 0) { + /* nmr_speed, 0 = slowest/best, higher = faster. It narrows the fine + * refine +/-window (scalefactors) below NMR_COARSE: at speed 0 the window + * spans the whole coarse-grid gap, so the two-pass result matches the + * exhaustive single-pass search. + * Each speed level shaves one sf off the window. + * At @64k mono (Zim / xRT): speed 0 -> 0.00095/15x, + * 2 -> 0.00096/18x, 3 -> 0.00100/20x, 4 -> 0.00103/22x */ + int win = NMR_COARSE - av_clip(s->options.nmr_speed, 0, 4); + for (int b = 0; b < nbnd; b++) { + int center = blo[b] + chosen[b]*cstep; + int flo = av_clip(center - win, av_clip(minsf[bidx[b]], 0, SCALE_MAX_POS), SCALE_MAX_POS); + int maxn = FFMIN(NMR_NCAND, 2*win/NMR_STEP + 1); + float invthr = 1.0f / FFMAX(thr[bidx[b]], 1e-9f); + int ncand = nmr_band_curve(s, sce, bw[b], bg[b], bst[b], flo, NMR_STEP, maxn, + invthr, maxvals[bidx[b]], nd[b], nb[b]); + blo[b] = flo; + bnc[b] = FFMAX(1, ncand); + } + /* fine pass: seed a narrow lambda bracket around the coarse solution */ + lam = nmr_solve(s, nd, nb, blo, bnc, NMR_STEP, act, nact, destbits, chosen, + lam/16.0f, lam*16.0f, NMR_IFINE); + } + + if (pns_apply) { + /* band 0 (lowest freq) is kept as the global-gain / sf-chain anchor */ + for (int b = 1; b < nbnd; b++) { + int bi = bidx[b]; + float spread = pspread[bi]; + float nmr_pns, cost_keep, cost_pns; + if (!sce->can_pns[bi]) + continue; + /* Only replace a band that is being coded audibly badly: its coded + * noise-to-mask ratio must exceed the masking floor. Well-coded bands + * (low NMR with plenty of bits) keep their real content, this is what + * confines PNS to only rate distress and avoids high-bitrate harm */ + if (nd[b][chosen[b]] <= NMR_PNS_NDGATE) + continue; + /* perceptual cost of replacing the band with energy-matched noise: + * the non-noise-like fraction of its energy, in dist/threshold units */ + nmr_pns = FFMAX(0.0f, pener[bi] * (1.0f - spread*spread)) + / FFMAX(thr[bi], 1e-9f); + cost_keep = nd[b][chosen[b]] + lam * nb[b][chosen[b]]; + cost_pns = nmr_pns + lam * NMR_PNS_BITS; + if (cost_pns < cost_keep) { + is_pns[b] = 1; + pns_count++; + } + } + if (pns_count) { + int budget2 = destbits - pns_count * NMR_PNS_BITS; + nact = 0; + for (int b = 0; b < nbnd; b++) + if (!is_pns[b]) + act[nact++] = b; + nmr_solve(s, nd, nb, blo, bnc, NMR_STEP, act, nact, budget2, chosen, 1e-9f, 1e4f, NMR_ITERS); + } + } + for (int b = 0; b < nbnd; b++) { + int bi = bidx[b]; + if (is_pns[b]) { + sce->band_type[bi] = NOISE_BT; + sce->zeroes[bi] = 0; + sce->pns_ener[bi] = pener[bi] * FFMIN(1.0f, pspread[bi]*pspread[bi]); + } else { + sce->sf_idx[bi] = av_clip(blo[b] + chosen[b]*NMR_STEP, 0, SCALE_MAX_POS); + } + } + } + + /* SCALE_MAX_DIFF condition: + * re-clamp, codebook fixup, drop uncodeable, set global gain + * NOISE_BT bands keep their own scalefactor chain via set_special_band_scalefactors) */ + { + uint8_t nextband[128]; + int prev = -1; + ff_init_nextband_map(sce, nextband); + for (int w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + for (int g = 0; g < sce->ics.num_swb; g++) { + if (sce->zeroes[w*16+g]) { + sce->band_type[w*16+g] = 0; + continue; + } + if (sce->band_type[w*16+g] == NOISE_BT) + continue; + + if (prev != -1) + sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF); + sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); + if (sce->band_type[w*16+g] <= 0) { + if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) { + sce->band_type[w*16+g] = 1; + } else { + sce->zeroes[w*16+g] = 1; + sce->band_type[w*16+g] = 0; + continue; + } + } + if (prev == -1) + sce->sf_idx[0] = sce->sf_idx[w*16+g]; /* global gain */ + prev = sce->sf_idx[w*16+g]; + } + } + } +} + +#endif /* AVCODEC_AACCODER_NMR_H */ diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index f221d79ed3..6ac8bbb7f6 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -1114,6 +1114,7 @@ static av_cold int aac_encode_end(AVCodecContext *avctx) av_freep(&s->buffer.samples); av_freep(&s->cpe); av_freep(&s->fdsp); + av_freep(&s->nmr); ff_af_queue_close(&s->afq); return 0; } @@ -1147,6 +1148,12 @@ static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s) for(ch = 0; ch < s->channels; ch++) s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch; + if (s->options.coder == AAC_CODER_NMR) { + s->nmr = av_mallocz(sizeof(*s->nmr)); + if (!s->nmr) + return AVERROR(ENOMEM); + } + return 0; } @@ -1279,11 +1286,13 @@ static const AVOption aacenc_options[] = { {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, .unit = "coder"}, {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, .unit = "coder"}, {"fast", "Fast search", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, .unit = "coder"}, + {"nmr", "Noise-to-mask ratio scalefactor trellis", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_NMR}, INT_MIN, INT_MAX, AACENC_FLAGS, .unit = "coder"}, {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS}, {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS}, {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS}, {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS}, {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS}, + {"aac_nmr_speed", "NMR coder speed level: 0 = slowest/best, higher trades quality for speed", offsetof(AACEncContext, options.nmr_speed), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 4, AACENC_FLAGS}, FF_AAC_PROFILE_OPTS {NULL} }; diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h index 61a9e6102b..4e20a3892e 100644 --- a/libavcodec/aacenc.h +++ b/libavcodec/aacenc.h @@ -44,6 +44,7 @@ typedef enum AACCoder { AAC_CODER_TWOLOOP, AAC_CODER_FAST, + AAC_CODER_NMR, AAC_CODER_NB, }AACCoder; @@ -69,6 +70,7 @@ typedef struct AACEncOptions { int pce; int mid_side; int intensity_stereo; + int nmr_speed; ///< NMR coder speed level: 0 = slowest/best, higher is faster } AACEncOptions; /** @@ -165,6 +167,17 @@ typedef struct AACQuantizeBandCostCacheEntry { uint16_t generation; } AACQuantizeBandCostCacheEntry; +/** per-band scalefactor candidates above the finest codeable sf (NMR coder) */ +#define NMR_NCAND 96 + +/** + * NMR coder per-band candidate cost curves, ~96 KiB + */ +typedef struct AACNMRCurves { + float nd[128][NMR_NCAND]; ///< dist / threshold per candidate + int nb[128][NMR_NCAND]; ///< spectral bits per candidate +} AACNMRCurves; + typedef struct AACPCEInfo { AVChannelLayout layout; uint8_t num_ele[4]; ///< front, side, back, lfe @@ -216,6 +229,7 @@ typedef struct AACEncContext { AACQuantizeBandCostCacheEntry quantize_band_cost_cache[256][128]; ///< memoization area for quantize_band_cost AACEncDSPContext aacdsp; + AACNMRCurves *nmr; ///< NMR coder scratch (NULL unless coder == nmr) struct { float *samples; diff --git a/libavcodec/aacencdsp.c b/libavcodec/aacencdsp.c index fb809405f7..5ccc7e8fc8 100644 --- a/libavcodec/aacencdsp.c +++ b/libavcodec/aacencdsp.c @@ -16,6 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <float.h> #include <math.h> #include "config.h" @@ -45,10 +46,37 @@ static void quantize_bands(int *out, const float *in, const float *scaled, } } +/* One NMR scalefactor-trellis Viterbi step, for each current-band candidate, find the + * previous-band candidate minimising dpp[op] + lamsf[d] then set + * dp[o] = node[o] + that cost and record the back-pointer bp[o] */ +static void nmr_trellis_step_c(float *dp, uint8_t *bp, const float *dpp, + const float *node, const float *lamsf, + int n_cur, int n_prev, int base, int step, int mdiff) +{ + for (int o = 0; o < n_cur; o++) { + int best = -1; + float bestc = FLT_MAX; + for (int op = 0; op < n_prev; op++) { + int d = base + (o - op) * step; + float c; + if (d < -mdiff || d > mdiff) + continue; + c = dpp[op] + lamsf[d + mdiff]; + if (c < bestc) { + bestc = c; + best = op; + } + } + bp[o] = best < 0 ? 0 : best; + dp[o] = best < 0 ? FLT_MAX : node[o] + bestc; + } +} + void ff_aacenc_dsp_init(AACEncDSPContext *s) { - s->abs_pow34 = abs_pow34_v; - s->quant_bands = quantize_bands; + s->abs_pow34 = abs_pow34_v; + s->quant_bands = quantize_bands; + s->nmr_trellis_step = nmr_trellis_step_c; #if ARCH_RISCV ff_aacenc_dsp_init_riscv(s); diff --git a/libavcodec/aacencdsp.h b/libavcodec/aacencdsp.h index 6d9ae221d1..4ead54669d 100644 --- a/libavcodec/aacencdsp.h +++ b/libavcodec/aacencdsp.h @@ -19,11 +19,17 @@ #ifndef AVCODEC_AACENCDSP_H #define AVCODEC_AACENCDSP_H +#include <stdint.h> + typedef struct AACEncDSPContext { void (*abs_pow34)(float *out, const float *in, const int size); void (*quant_bands)(int *out, const float *in, const float *scaled, int size, int is_signed, int maxval, const float Q34, const float rounding); + + void (*nmr_trellis_step)(float *dp, uint8_t *bp, const float *dpp, + const float *node, const float *lamsf, + int n_cur, int n_prev, int base, int step, int mdiff); } AACEncDSPContext; void ff_aacenc_dsp_init(AACEncDSPContext *s); -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
