On Fri, Jun 12, 2009 at 3:42 PM, Alex Converse<[email protected]> wrote: > The following patchset adds TNS support to the AAC encoder. > > It's not 100% ready to be integrated. > > Patch 0 adds PARCOR code from Flake and doesn't have much to do with AAC. > Patch 1 moves some AAC stuff around but doesn't ahev much to do with encoding. > Patch 2 adds TNS support. > Patch 3 adds a TNS coefficient compressor that shouldn't effect the > output of the file after encoding and decoding but does for reasons > taht stump me at the moment. Any help on this one would be greatly > appreciated. >
The tables have been dealt with, here are cleaner versions of patches 1-3. --Alex
commit 96712c3d9652578eee809ffc1354829b18647942 Author: Alex Converse <[email protected]> Date: Fri Jun 12 10:48:13 2009 -0400 Routine to calculate PARCOR reflection coefficients and prediction gain. Ported from Flake, written by Justin Ruggles. diff --git a/libavcodec/lpc.h b/libavcodec/lpc.h index 05a1dee..65c250c 100644 --- a/libavcodec/lpc.h +++ b/libavcodec/lpc.h @@ -99,4 +99,34 @@ static inline int compute_lpc_coefs(const LPC_TYPE *autoc, int max_order, return 0; } +/** + * Schur recursion. + * Produces LPC coefficients from autocorrelation data. + * @return prediction gain + */ +static float +compute_parcor_coefs(const float *autoc, int max_order, + float *ref) +{ + int i, j; + double error; + double gen[2][MAX_LPC_ORDER]; + + // Schur recursion + for(i=0; i<max_order; i++) gen[0][i] = gen[1][i] = autoc[i+1]; + error = autoc[0]; + ref[0] = -gen[1][0] / error; + error += gen[1][0] * ref[0]; + for(i=1; i<max_order; i++) { + for(j=0; j<max_order-i; j++) { + gen[1][j] = gen[1][j+1] + ref[i-1] * gen[0][j]; + gen[0][j] = gen[1][j+1] * ref[i-1] + gen[0][j]; + } + ref[i] = -gen[1][0] / error; + error += gen[1][0] * ref[i]; + } + + return autoc[0]/error; +} + #endif /* AVCODEC_LPC_H */
commit bcc3ab4c19a04743ff0579f3c3825e0b03edeb85 Author: Alex Converse <[email protected]> Date: Fri Jun 12 11:00:31 2009 -0400 Rudimentary TNS support based on ISO 13818-7 Annex C diff --git a/libavcodec/aac.h b/libavcodec/aac.h index cd9eb8a..177ab08 100644 --- a/libavcodec/aac.h +++ b/libavcodec/aac.h @@ -152,6 +152,9 @@ typedef struct { int direction[8][4]; int order[8][4]; float coef[8][4][TNS_MAX_ORDER]; + uint8_t coef_res[8]; + uint8_t coef_compress[8][4]; + int8_t icoef[8][4][TNS_MAX_ORDER]; } TemporalNoiseShaping; /** diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index a4ed743..dde0e49 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -1,6 +1,7 @@ /* * AAC coefficients encoder * Copyright (C) 2008-2009 Konstantin Shishkov + * Copyright (C) 2009 Alex Converse * * This file is part of FFmpeg. * @@ -35,6 +36,7 @@ #include "aac.h" #include "aacenc.h" #include "aactab.h" +#include "lpc.h" /** bits needed to code codebook run value for long windows */ static const uint8_t run_value_bits_long[64] = { @@ -1242,29 +1244,158 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe, const float lam } } +static const uint8_t tns_start_band_1024[12] = +{ 11, 12, 15, 16, 17, 20, 25, 26, 24, 28, 30, 31 }; +static const uint8_t tns_start_band_128[12] = +{ 2, 2, 2, 3, 3, 4, 6, 6, 8, 10, 10, 12 }; + +static void autocorr(float * out, const float * in, int in_size, int out_size) +{ + int i, j; + float tmp; + + for (i = 0; i < out_size; i++) { + tmp = 0.0f; + for (j = 0; j < in_size-i; j++) { + tmp += in[j]*in[i+j]; + } + out[i] = tmp; + } +} + +static void tns_quantize(int coef_res, int8_t* index, float* r, int order) +{ + const double iqfac = ((1 << (coef_res-1)) - 0.5) / (M_PI/2.0); + const double iqfac_m = ((1 << (coef_res-1)) + 0.5) / (M_PI/2.0); + int i; + + /* Reflection coefficient quantization */ + for (i = 0; i < order; i++) { + index[i] = (int)((asin( r[i] ) * ((r[i] >= 0) ? iqfac : iqfac_m))+0.5); + r[i] = sin(index[i] / ((index[i] >= 0) ? iqfac : iqfac_m)); + } +} + +static int tns_truncate_order(float* rq, int order) +{ + int i; + const float TRUNC_THRESH = 0.1f; + + for (i = order-1; i >= 0; i--) { + rq[i] = (fabsf(rq[i]) > TRUNC_THRESH) ? rq[i]: 0.0f; + if (rq[i] != 0.0f) + return i+1; + } + return 0; +} + +static void tns_step_up(int order, float* a, const float *ref) +{ + float b[TNS_MAX_ORDER+2]; + int i, m; + + a[0] = b[0] = 1.0f; + for (m = 1; m <= order; m++) { + for (i = 1; i < m; i++) { + b[i] = a[i] + ref[m-1] * a[m-i]; + } + for (i = 1; i < m; i++) { + a[i] = b[i]; + } + a[m] = ref[m-1]; + } +} + +static void tns_ma_filter(float * coef, const float * a, int len, int order) +{ + int i, m; + const int RING = 31; + float buf[RING+1]; + + for (m = 0; m < len; m++) { + buf[m&RING] = coef[m]; + for (i = 1; i <= FFMIN(order, m); i++) + coef[m] += buf[(m - i)&RING] * a[i]; + } +} + +static void tns_encode_annex_c(AVCodecContext *avctx, AACEncContext *s, + SingleChannelElement * sce) +{ + + int max_bands, start_band, stop_band, order, coef_res, w; + float gain; + float ref[TNS_MAX_ORDER]; + float a[TNS_MAX_ORDER+1]; + const float GAIN_THRESH = 1.4f; + TemporalNoiseShaping * tns = &sce->tns; + IndividualChannelStream * ics = &sce->ics; + + tns->present = 0; + + if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { + max_bands = ff_tns_max_bands_128[s->samplerate_index]; + start_band = tns_start_band_128[s->samplerate_index]; + order = 7; + } else { + max_bands = ff_tns_max_bands_1024[s->samplerate_index]; + start_band = tns_start_band_1024[s->samplerate_index]; + order = 12; + } + stop_band = ics->num_swb; + start_band = FFMAX(start_band, stop_band - max_bands); + + for (w = 0; w < ics->num_windows; w++) { + int idx = w * 128 + ics->swb_offset[start_band]; + int len = ics->swb_offset[stop_band] - ics->swb_offset[start_band]; + autocorr(a, &sce->coeffs[idx], len, order+1); + gain = compute_parcor_coefs(a, order, ref); + + if (gain > GAIN_THRESH) { + coef_res = 4; + tns->present = 1; + tns->n_filt[w] = 1; + tns->coef_res[w] = coef_res; + tns->length[w][0] = stop_band - start_band; + tns->direction[w][0] = 0; + tns_quantize(coef_res, &tns->icoef[w][0][0], ref, order); + tns->order[w][0] = tns_truncate_order(ref, order); + tns->coef_compress[w][0] = 0; + tns_step_up(tns->order[w][0], a, ref); + tns_ma_filter(&sce->coeffs[idx], a, len, tns->order[w][0]); + } else { + tns->n_filt[w] = 0; + } + } +} + AACCoefficientsEncoder ff_aac_coders[] = { { search_for_quantizers_faac, encode_window_bands_info_fixed, quantize_and_encode_band, + tns_encode_annex_c, // search_for_ms, }, { search_for_quantizers_anmr, encode_window_bands_info, quantize_and_encode_band, + tns_encode_annex_c, // search_for_ms, }, { search_for_quantizers_twoloop, encode_window_bands_info, quantize_and_encode_band, + tns_encode_annex_c, // search_for_ms, }, { search_for_quantizers_fast, encode_window_bands_info, quantize_and_encode_band, + tns_encode_annex_c, // search_for_ms, }, }; diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index b4fab18..72c25bc 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -360,6 +360,14 @@ static void adjust_frame_information(AACEncContext *apc, ChannelElement *cpe, in } /** + * Encode tns but don't write it to the bitstream. + */ +static void encode_tns(AVCodecContext * avctx, AACEncContext *s, SingleChannelElement *sce) +{ + s->coder->encode_tns(avctx, s, sce); +} + +/** * Encode scalefactor band coding type. */ static void encode_band_info(AACEncContext *s, SingleChannelElement *sce) @@ -435,6 +443,36 @@ static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce) } } +static void put_tns(PutBitContext * pb, SingleChannelElement * sce) +{ + TemporalNoiseShaping * tns = &sce->tns; + const int is8 = sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE; + const int num_windows = sce->ics.num_windows; + int w, filt, i, coef_len; + + put_bits(pb, 1, tns->present); + if (tns->present) { + for (w = 0; w < num_windows; w++) { + put_bits(pb, 2 - is8, tns->n_filt[w]); + if (tns->n_filt[w]) { + put_bits(pb, 1, tns->coef_res[w] - 3); + for (filt = 0; filt < tns->n_filt[w]; filt++) { + put_bits(pb, 6 - 2 * is8, tns->length[w][filt]); + put_bits(pb, 5 - 2 * is8, tns->order [w][filt]); + if (tns->order[w][filt]) { + put_bits(pb, 1, tns->direction [w][filt]); + put_bits(pb, 1, tns->coef_compress[w][filt]); + coef_len = tns->coef_res[w] - tns->coef_compress[w][filt]; + for (i = 0; i < tns->order[w][filt]; i++) { + put_sbits(pb, coef_len, tns->icoef[w][filt][i]); + } + } + } + } + } + } +} + /** * Encode one channel of audio data. */ @@ -443,9 +481,10 @@ static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, Si put_bits(&s->pb, 8, sce->sf_idx[0]); if(!common_window) put_ics_info(s, &sce->ics); encode_band_info(s, sce); + encode_tns(avctx, s, sce); encode_scale_factors(avctx, s, sce); encode_pulses(s, &sce->pulse); - put_bits(&s->pb, 1, 0); //tns + put_tns(&s->pb, sce); put_bits(&s->pb, 1, 0); //ssr encode_spectral_coeffs(s, sce); return 0; @@ -527,6 +566,9 @@ static int aac_encode_frame(AVCodecContext *avctx, ics->num_windows = wi[j].num_windows; ics->swb_sizes = s->psy.bands [ics->num_windows == 8]; ics->num_swb = s->psy.num_bands[ics->num_windows == 8]; + ics->swb_offset = (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) ? + ff_swb_offset_128 [s->samplerate_index] : + ff_swb_offset_1024[s->samplerate_index]; for(k = 0; k < ics->num_windows; k++) ics->group_len[k] = wi[j].grouping[k]; diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h index 9dc1c78..580934c 100644 --- a/libavcodec/aacenc.h +++ b/libavcodec/aacenc.h @@ -39,6 +39,8 @@ typedef struct AACCoefficientsEncoder{ int win, int group_len, const float lambda); void (*quantize_and_encode_band)(PutBitContext *pb, const float *in, int size, int scale_idx, int cb, const float lambda); + void (*encode_tns)(AVCodecContext *avctx, struct AACEncContext *s, + SingleChannelElement *sce); void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe, const float lambda); }AACCoefficientsEncoder;
commit d800e7a2cb6eaee6d4d1e46eff03e6c6f6aadd2a Author: Alex Converse <[email protected]> Date: Fri Jun 12 12:58:45 2009 -0400 TNS coefficient compressor diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index dde0e49..7011b43 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -1289,6 +1289,19 @@ static int tns_truncate_order(float* rq, int order) return 0; } +static int tns_compress(int coef_res, int8_t* index, int order) +{ + int i; + int shift = coef_res - 2; + int head; + for (i = 0; i < order; i++) { + head = index[i] >> shift; + if (head > 0 || head < -1) + return 0; + } + return 1; +} + static void tns_step_up(int order, float* a, const float *ref) { float b[TNS_MAX_ORDER+2]; @@ -1360,7 +1373,7 @@ static void tns_encode_annex_c(AVCodecContext *avctx, AACEncContext *s, tns->direction[w][0] = 0; tns_quantize(coef_res, &tns->icoef[w][0][0], ref, order); tns->order[w][0] = tns_truncate_order(ref, order); - tns->coef_compress[w][0] = 0; + tns->coef_compress[w][0] = tns_compress(coef_res, &tns->icoef[w][0][0], tns->order[w][0]); tns_step_up(tns->order[w][0], a, ref); tns_ma_filter(&sce->coeffs[idx], a, len, tns->order[w][0]); } else {
_______________________________________________ FFmpeg-soc mailing list [email protected] https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc
