Author: kostya Date: Mon Jul 28 15:22:53 2008 New Revision: 2887 Log: Rudimentary multichannel support
Modified: aacenc/aacenc.c aacenc/aacpsy.c aacenc/aacpsy.h Modified: aacenc/aacenc.c ============================================================================== --- aacenc/aacenc.c (original) +++ aacenc/aacenc.c Mon Jul 28 15:22:53 2008 @@ -181,7 +181,9 @@ typedef struct { int swb_num1024; const uint8_t *swb_sizes128; int swb_num128; - ChannelElement cpe; + + ProgramConfig pc; + ChannelElement *cpe; AACPsyContext psy; } AACEncContext; @@ -239,6 +241,7 @@ static av_cold int aac_encode_init(AVCod ff_sine_window_init(sine_long_1024, 1024); ff_sine_window_init(sine_short_128, 128); + s->cpe = av_mallocz(sizeof(ChannelElement) * ((avctx->channels + 1) >> 1)); //TODO: psy model selection with some option ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP, 0, s->swb_sizes1024, s->swb_num1024, s->swb_sizes128, s->swb_num128); avctx->extradata = av_malloc(2); @@ -300,6 +303,45 @@ static void analyze(AVCodecContext *avct } /** + * Encode channel layout (aka program config element). + * @see table 4.2 + */ +static void put_program_config_element(AVCodecContext *avctx, AACEncContext *s) +{ + int i; + ProgramConfig *pc = &s->pc; + + put_bits(&s->pb, 2, 0); //object type - ? + put_bits(&s->pb, 4, s->samplerate_index); //sample rate index + + put_bits(&s->pb, 4, avctx->channels/2); // all channels are front :) + put_bits(&s->pb, 4, 0); // no side channels + put_bits(&s->pb, 4, 0); // no back channels + put_bits(&s->pb, 2, 0); // no LFE + put_bits(&s->pb, 3, 0); // no associated data + put_bits(&s->pb, 4, 0); // no valid channel couplings + + put_bits(&s->pb, 1, pc->mono_mixdown); + if(pc->mono_mixdown) + put_bits(&s->pb, 4, pc->mixdown_coeff_index); + put_bits(&s->pb, 1, pc->stereo_mixdown); + if(pc->stereo_mixdown) + put_bits(&s->pb, 4, pc->mixdown_coeff_index); + put_bits(&s->pb, 1, pc->matrix_mixdown); + if(pc->matrix_mixdown){ + put_bits(&s->pb, 2, pc->mixdown_coeff_index); + put_bits(&s->pb, 1, pc->pseudo_surround); + } + //TODO: proper channel map output + for(i = 0; i < avctx->channels; i += 2){ + put_bits(&s->pb, 1, 1); // channel is CPE + put_bits(&s->pb, 4, i/2); + } + align_put_bits(&s->pb); + put_bits(&s->pb, 8, 0); // no commentary bytes +} + +/** * Encode ics_info element. * @see Table 4.6 */ @@ -654,45 +696,47 @@ static int aac_encode_frame(AVCodecConte uint8_t *frame, int buf_size, void *data) { AACEncContext *s = avctx->priv_data; - int16_t *samples = s->samples; + int16_t *samples = s->samples, *samples2; + ChannelElement *cpe; + int i, j, chans; if(!samples){ s->samples = av_malloc(1024 * avctx->channels * sizeof(s->samples[0])); memcpy(s->samples, data, 1024 * avctx->channels * sizeof(s->samples[0])); return 0; } - ff_aac_psy_suggest_window(&s->psy, samples, data, 0, &s->cpe); - - analyze(avctx, s, &s->cpe, samples, 0); - if(avctx->channels > 1) - analyze(avctx, s, &s->cpe, samples, 1); - - ff_aac_psy_analyze(&s->psy, 0, &s->cpe); init_put_bits(&s->pb, frame, buf_size*8); if(avctx->frame_number==1 && !(avctx->flags & CODEC_FLAG_BITEXACT)){ put_bitstream_info(avctx, s, LIBAVCODEC_IDENT); } - switch(avctx->channels){ - case 1: - put_bits(&s->pb, 3, ID_SCE); - put_bits(&s->pb, 4, 0); //tag - encode_individual_channel(avctx, &s->cpe, 0); - break; - case 2: - put_bits(&s->pb, 3, ID_CPE); - put_bits(&s->pb, 4, 0); //tag - put_bits(&s->pb, 1, s->cpe.common_window); - if(s->cpe.common_window){ - put_ics_info(avctx, &s->cpe.ch[0].ics); - encode_ms_info(&s->pb, &s->cpe); + //encode channels as channel pairs and one optional single channel element + /*if(avctx->channels > 2){ + put_bits(&s->pb, 3, ID_PCE); + put_bits(&s->pb, 4, 0); + put_program_config_element(avctx, s); + }*/ + for(i = 0; i < avctx->channels; i += 2){ + chans = FFMIN(avctx->channels - i, 2); + cpe = &s->cpe[i/2]; + samples2 = samples + i; + ff_aac_psy_suggest_window(&s->psy, samples2, data, i, cpe); + for(j = 0; j < chans; j++){ + analyze(avctx, s, cpe, samples2, j); + } + ff_aac_psy_analyze(&s->psy, i, cpe); + put_bits(&s->pb, 3, chans > 1 ? ID_CPE : ID_SCE); + put_bits(&s->pb, 4, i >> 1); + if(chans == 2){ + put_bits(&s->pb, 1, cpe->common_window); + if(cpe->common_window){ + put_ics_info(avctx, &cpe->ch[0].ics); + encode_ms_info(&s->pb, cpe); + } + } + for(j = 0; j < chans; j++){ + encode_individual_channel(avctx, cpe, j); } - encode_individual_channel(avctx, &s->cpe, 0); - encode_individual_channel(avctx, &s->cpe, 1); - break; - default: - av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %d\n", avctx->channels); - return -1; } put_bits(&s->pb, 3, ID_END); @@ -713,6 +757,7 @@ static av_cold int aac_encode_end(AVCode ff_mdct_end(&s->mdct128); ff_aac_psy_end(&s->psy); av_freep(&s->samples); + av_freep(&s->cpe); return 0; } Modified: aacenc/aacpsy.c ============================================================================== --- aacenc/aacpsy.c (original) +++ aacenc/aacpsy.c Mon Jul 28 15:22:53 2008 @@ -76,13 +76,14 @@ static inline float calc_distortion(floa /** * Produce integer coefficients from scalefactors provided by model. */ -static void psy_create_output(AACPsyContext *apc, ChannelElement *cpe, int search_pulses) +static void psy_create_output(AACPsyContext *apc, ChannelElement *cpe, int channel, int search_pulses) { int i, w, w2, g, ch; int start, sum, maxsfb, cmaxsfb; int pulses, poff[4], pamp[4]; + int chans = FFMIN(apc->avctx->channels - channel, 2); - for(ch = 0; ch < apc->avctx->channels; ch++){ + for(ch = 0; ch < chans; ch++){ start = 0; maxsfb = 0; cpe->ch[ch].pulse.present = 0; @@ -152,7 +153,7 @@ static void psy_create_output(AACPsyCont } } - if(apc->avctx->channels > 1 && cpe->common_window){ + if(apc->avctx->channels - channel > 1 && cpe->common_window){ int msc = 0; cpe->ch[0].ics.max_sfb = FFMAX(cpe->ch[0].ics.max_sfb, cpe->ch[1].ics.max_sfb); cpe->ch[1].ics.max_sfb = cpe->ch[0].ics.max_sfb; @@ -167,8 +168,9 @@ static void psy_create_output(AACPsyCont static void psy_null_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe) { int ch; + int chans = FFMIN(apc->avctx->channels - channel, 2); - for(ch = 0; ch < apc->avctx->channels; ch++){ + for(ch = 0; ch < chans; ch++){ cpe->ch[ch].ics.window_sequence = ONLY_LONG_SEQUENCE; cpe->ch[ch].ics.use_kb_window[0] = 1; cpe->ch[ch].ics.num_windows = 1; @@ -184,8 +186,9 @@ static void psy_null_process(AACPsyConte int start; int ch, g, i; int minscale; + int chans = FFMIN(apc->avctx->channels - channel, 2); - for(ch = 0; ch < apc->avctx->channels; ch++){ + for(ch = 0; ch < chans; ch++){ start = 0; for(g = 0; g < apc->num_bands1024; g++){ float energy = 0.0f, ffac = 0.0f, thr, dist; @@ -207,7 +210,7 @@ static void psy_null_process(AACPsyConte } } } - for(ch = 0; ch < apc->avctx->channels; ch++){ + for(ch = 0; ch < chans; ch++){ minscale = 255; for(g = 0; g < apc->num_bands1024; g++) if(!cpe->ch[ch].zeroes[0][g]) @@ -217,14 +220,15 @@ static void psy_null_process(AACPsyConte if(!cpe->ch[ch].zeroes[0][g]) cpe->ch[ch].sf_idx[0][g] = FFMIN(minscale + SCALE_MAX_DIFF, cpe->ch[ch].sf_idx[0][g]); } - psy_create_output(apc, cpe, 1); + psy_create_output(apc, cpe, channel, 1); } static void psy_null8_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe) { int ch, i; + int chans = FFMIN(apc->avctx->channels - channel, 2); - for(ch = 0; ch < apc->avctx->channels; ch++){ + for(ch = 0; ch < chans; ch++){ int prev_seq = cpe->ch[ch].ics.window_sequence_prev; cpe->ch[ch].ics.use_kb_window[1] = cpe->ch[ch].ics.use_kb_window[0]; cpe->ch[ch].ics.window_sequence_prev = cpe->ch[ch].ics.window_sequence; @@ -257,9 +261,10 @@ static void psy_null8_process(AACPsyCont { int start; int w, ch, g, i; + int chans = FFMIN(apc->avctx->channels - channel, 2); //detect M/S - if(apc->avctx->channels > 1 && cpe->common_window){ + if(chans > 1 && cpe->common_window){ start = 0; for(w = 0; w < cpe->ch[0].ics.num_windows; w++){ for(g = 0; g < cpe->ch[0].ics.num_swb; g++){ @@ -271,7 +276,7 @@ static void psy_null8_process(AACPsyCont } } } - for(ch = 0; ch < apc->avctx->channels; ch++){ + for(ch = 0; ch < chans; ch++){ cpe->ch[ch].gain = SCALE_ONE_POS; for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){ for(g = 0; g < cpe->ch[ch].ics.num_swb; g++){ @@ -280,7 +285,7 @@ static void psy_null8_process(AACPsyCont } } } - psy_create_output(apc, cpe, 0); + psy_create_output(apc, cpe, channel, 0); } /** @@ -422,9 +427,10 @@ static av_cold int psy_3gpp_init(AACPsyC static void psy_3gpp_window(AACPsyContext *apc, int16_t *audio, int16_t *la, int channel, ChannelElement *cpe) { int ch; + int chans = FFMIN(apc->avctx->channels - channel, 2); //XXX: stub, because encoder does not support long to short window transition yet :( - for(ch = 0; ch < apc->avctx->channels; ch++){ + for(ch = 0; ch < chans; ch++){ cpe->ch[ch].ics.window_sequence = ONLY_LONG_SEQUENCE; cpe->ch[ch].ics.use_kb_window[0] = 1; cpe->ch[ch].ics.num_windows = 1; @@ -479,9 +485,10 @@ static void psy_3gpp_process(AACPsyConte Psy3gppContext *pctx = (Psy3gppContext*) apc->model_priv_data; float stereo_att, pe_target; int bits_avail; + const int chans = FFMIN(apc->avctx->channels - channel, 2); //calculate and apply stereo attenuation factor - 5.2 - if(apc->avctx->channels > 1){ + if(apc->avctx->channels - channel > 1){ float l, r; stereo_att = 1.0 / 2.0; //XXX: find some way to determine it for(i = 0; i < 1024; i++){ @@ -494,7 +501,7 @@ static void psy_3gpp_process(AACPsyConte //calculate energies, initial thresholds and related values - 5.4.2 memset(pctx->band, 0, sizeof(pctx->band)); - for(ch = 0; ch < apc->avctx->channels; ch++){ + for(ch = 0; ch < chans; ch++){ start = 0; cpe->ch[ch].gain = 0; for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){ @@ -517,7 +524,7 @@ static void psy_3gpp_process(AACPsyConte } //modify thresholds - spread, threshold in quiet - 5.4.3 - for(ch = 0; ch < apc->avctx->channels; ch++){ + for(ch = 0; ch < chans; ch++){ for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){ for(g = 1; g < cpe->ch[ch].ics.num_swb; g++){ g2 = w*16 + g; @@ -540,7 +547,7 @@ static void psy_3gpp_process(AACPsyConte } // M/S detection - 5.5.2 - if(apc->avctx->channels > 1 && cpe->common_window){ + if(chans > 1 && cpe->common_window){ start = 0; for(w = 0; w < cpe->ch[0].ics.num_windows; w++){ for(g = 0; g < cpe->ch[0].ics.num_swb; g++){ @@ -571,7 +578,7 @@ static void psy_3gpp_process(AACPsyConte } } - for(ch = 0; ch < apc->avctx->channels; ch++){ + for(ch = 0; ch < chans; ch++){ pctx->a[ch] = pctx->b[ch] = pctx->pe[ch] = pctx->thr[ch] = 0.0f; for(w = 0; w < cpe->ch[ch].ics.num_windows; w++){ for(g = 0; g < cpe->ch[ch].ics.num_swb; g++){ @@ -594,7 +601,7 @@ static void psy_3gpp_process(AACPsyConte bits_avail = pctx->avg_bits + pctx->reservoir; bits_avail = FFMIN(bits_avail, pctx->avg_bits * 1.5); pe_target = 1.18f * bits_avail / apc->avctx->channels; - for(ch = 0; ch < apc->avctx->channels; ch++){ + for(ch = 0; ch < chans; ch++){ float t0, pe, r; if(pctx->b[ch] == 0.0f) continue; for(i = 0; i < 2; i++){ @@ -622,7 +629,7 @@ static void psy_3gpp_process(AACPsyConte } //determine scalefactors - 5.6.2 - for(ch = 0; ch < apc->avctx->channels; ch++){ + for(ch = 0; ch < chans; ch++){ int min_scale = 256; prev_scale = -1; cpe->ch[ch].gain = 0; @@ -655,7 +662,7 @@ static void psy_3gpp_process(AACPsyConte } memcpy(pctx->prev_band, pctx->band, sizeof(pctx->band)); - psy_create_output(apc, cpe, 0); + psy_create_output(apc, cpe, channel, 0); } static av_cold void psy_3gpp_end(AACPsyContext *apc) Modified: aacenc/aacpsy.h ============================================================================== --- aacenc/aacpsy.h (original) +++ aacenc/aacpsy.h Mon Jul 28 15:22:53 2008 @@ -68,6 +68,21 @@ typedef struct { int amp[4]; } Pulse; +#define MAX_TAGID 16 + +/** + * Program configuration - describes how channels are arranged. Either read from + * stream (ID_PCE) or created based on a default fixed channel arrangement. + */ +typedef struct { + int che_type[4][MAX_TAGID]; ///< channel element type with the first index as the first 4 raw_data_block IDs + int mono_mixdown; ///< The SCE tag to use if user requests mono output, -1 if not available. + int stereo_mixdown; ///< The CPE tag to use if user requests stereo output, -1 if not available. + int matrix_mixdown; ///< The CPE tag to use if user requests matrixed stereo output, -1 if not available. + int mixdown_coeff_index; ///< 0-3 + int pseudo_surround; ///< Mix surround channels out of phase. +} ProgramConfig; + /** * Individual Channel Stream */ _______________________________________________ FFmpeg-soc mailing list FFmpeg-soc@mplayerhq.hu https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc