Hi everyone,

I would like to thank you in advance for your help.

I am new to ffmpeg/avlib and what I am trying to do is just decode and
encode back again an aac file. I don't try to make any changes to the file
although the avlib encoder is producing a file with more initial padding
than expected and it is cropping the final frames of the audio clip.

I exported the avframes created by the decoder and all the file information
is there. It is at the encoding step that I am doing something wrong.

I am using the libfdk-aac decoder and encoder with the latest version of
ffmpeg in Visual Studio 2013.

I have attached a simplified version of my code and a link to a simple
audio file I have created to test my application.

I would really appreciate it if you can guide me on how to correct the
encoding step by retrieving the final samples and if possible remove the
delay created by the encoder.

Input file: https://www.dropbox.com/s/camqvmttq9a9vks/sinestereo.m4a?dl=0
Output of my program:
https://www.dropbox.com/s/liwfwr85v4g1ucx/audio.raw?dl=0

Looking forward to hearing from you.

Regards,

Panicos
extern "C"
{
#include <libavutil/imgutils.h>
#include <libavutil/audio_fifo.h>
#include <libavutil/samplefmt.h>
#include <libavutil/timestamp.h>
#include <libavformat/avformat.h>
#include <libavutil/opt.h>
   

}
#include <crtdbg.h>


static AVFormatContext *format_ctx = NULL;
static AVCodecContext *video_dec_ctx = NULL, *audio_dec_ctx;
static const char *src_filename = NULL;
static const char *audio_dst_filename = NULL;

static int audio_stream_idx = -1;

static AVStream *audio_stream = NULL;

static int audio_frame_count = 0;

static AVCodecContext* audioCodecCtx;
static AVFormatContext* audioFormatCtx;

static AVAudioFifo* aQueue;

static AVPacket pkt;

static int add_samples_to_fifo( AVAudioFifo *fifo, uint8_t **converted_input_samples, const int frame_size )
{
  int error;

  if ( ( error = av_audio_fifo_realloc( fifo, av_audio_fifo_size( fifo ) + frame_size ) ) < 0 )
  {
    fprintf( stderr, "Could not reallocate FIFO\n" );
    return error;
  }

  if ( av_audio_fifo_write( fifo, (void **)converted_input_samples, frame_size ) < frame_size )
  {
    fprintf( stderr, "Could not write data to FIFO\n" );
    return AVERROR_EXIT;
  }

  return 0;
}
static AVFrame *alloc_audio_frame( enum AVSampleFormat sample_fmt, uint64_t channel_layout, int sample_rate, int nb_samples )
{
  AVFrame *frame = av_frame_alloc();
  int ret;
  if ( !frame ) {
    fprintf( stderr, "Error allocating an audio frame\n" );
    exit( 1 );
  }
  frame->format = sample_fmt;
  frame->channel_layout = channel_layout;
  frame->sample_rate = sample_rate;
  frame->nb_samples = nb_samples;
  if ( nb_samples ) {
    ret = av_frame_get_buffer( frame, 0 );
    if ( ret < 0 ) {
      fprintf( stderr, "Error allocating an audio buffer\n" );
      exit( 1 );
    }
  }
  return frame;
}

static int check_sample_fmt( AVCodec *codec, enum AVSampleFormat sample_fmt )
{
  const enum AVSampleFormat *p = codec->sample_fmts;
  while ( *p != AV_SAMPLE_FMT_NONE ) {
    if ( *p == sample_fmt )
      return 1;
    p++;
  }
  return 0;
}

static void encodeAudio( const char *filename )
{

  AVCodec *codec;
  AVCodecContext *c = NULL;
  AVFrame frame;
  AVFrame convertedFrame;
  AVPacket pkt;
  int i, j, k, ret, got_output;
  int buffer_size;
  FILE *f;
  uint8_t* convertedSamples;
  uint8_t* test;

  float t, tincr;
  printf( "Encode audio file %s\n", filename );
  /* find the MP2 encoder */
  codec = avcodec_find_encoder( audioCodecCtx->codec_id ); //AV_CODEC_ID_MP2
  if ( !codec )
  {
    fprintf( stderr, "Codec not found\n" );
    exit( 1 );
  }
  c = avcodec_alloc_context3( codec );
  if ( !c )
  {
    fprintf( stderr, "allocate audio codec context" );
    exit( 1 );
  }
  
  if ( !c )
  {
    fprintf( stderr, "Could not allocate audio codec context\n" );
    exit( 1 );
  }


  /* put sample parameters */
  c->bit_rate = audioCodecCtx->bit_rate;
  c->sample_fmt = AV_SAMPLE_FMT_S16; 
  
  if ( audioFormatCtx->iformat->flags & AVFMT_GLOBALHEADER )
  {
    c->flags |= CODEC_FLAG_GLOBAL_HEADER;
  }
  
  if ( !check_sample_fmt( codec, c->sample_fmt ) )
  {
    fprintf( stderr, "Encoder does not support sample format %s", av_get_sample_fmt_name( c->sample_fmt ) );
    exit( 1 );
  }


  /* select other audio parameters supported by the encoder */
  c->sample_rate = audioCodecCtx->sample_rate;//select_sample_rate( codec );
  c->channel_layout = audioCodecCtx->channel_layout; //select_channel_layout( codec );
  c->channels = audioCodecCtx->channels; //av_get_channel_layout_nb_channels( c->channel_layout );
  c->time_base = AVRational{ 1, c->sample_rate };
  c->delay = audioCodecCtx->initial_padding;
  c->frame_size = audioCodecCtx->frame_size;
  c->audio_service_type = audioCodecCtx->audio_service_type;
  c->block_align = audioCodecCtx->block_align;

  AVDictionary* encoderOptions = NULL;
  av_dict_set( &encoderOptions, "side_data_only_packets", "1", 0 );
  /* open it */
  if ( avcodec_open2( c, codec, &encoderOptions ) < 0 )
  {
    fprintf( stderr, "Could not open codec\n" );
    exit( 1 );
  }

  f = fopen( filename, "wb" );
  if ( !f ) {
    fprintf( stderr, "Could not open %s\n", filename );
    exit( 1 );
  }
  // Set up SWR context once you've got codec information
  /* SwrContext* swr = swr_alloc();
  av_opt_set_int( swr, "in_channel_layout", video::audioCodecCtx->channel_layout, 0 );
  av_opt_set_int( swr, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0 );
  av_opt_set_int( swr, "in_sample_rate", video::audioCodecCtx->sample_rate, 0 );
  av_opt_set_int( swr, "out_sample_rate", c->sample_rate, 0 );
  av_opt_set_sample_fmt( swr, "in_sample_fmt", video::audioCodecCtx->sample_fmt, 0 );
  av_opt_set_sample_fmt( swr, "out_sample_fmt", c->sample_fmt, 0 );
  swr_init( swr );*/
  j = 0;
  AVFrame* tempFrame = alloc_audio_frame( audioCodecCtx->sample_fmt, audioCodecCtx->channel_layout, audioCodecCtx->sample_rate, audioCodecCtx->frame_size );
  ret = av_audio_fifo_size( aQueue );
  
  while ( av_audio_fifo_size( aQueue ) > 0 )
  {

    if ( !tempFrame )
    {
      fprintf( stderr, "Could not allocate audio frame\n" );
      exit( 1 );
    }

    const int numChannels = tempFrame->channels;
    /*  we calculate the size of the samples buffer in bytes */
    buffer_size = av_samples_get_buffer_size( NULL, c->channels, c->frame_size, c->sample_fmt, 0 );
   
    av_samples_alloc( &convertedSamples, tempFrame->linesize, audioCodecCtx->channels, tempFrame->nb_samples, c->sample_fmt, 0 );
        
    const int frame_size = FFMIN( av_audio_fifo_size( aQueue ), c->frame_size );
    ret = av_audio_fifo_read( aQueue, (void **)tempFrame->data, frame_size );
    
    if ( ret < frame_size )
    {
      fprintf( stderr, "cannot read frame data" );
      exit( 1 );
    }
        
    if ( ret < 0 )
    {
      fprintf( stderr, "make frame writeable" );
      exit( 1 );
    }
        
    if ( ret != tempFrame->nb_samples )
    {
      fprintf( stderr, "cannot convert samples" );
      exit( 1 );
    }


    ret = avcodec_fill_audio_frame( tempFrame, c->channels, c->sample_fmt, ( const uint8_t* ) tempFrame->extended_data[ 0 ], buffer_size, 0 );

    tempFrame->pts = av_rescale_q( j, AVRational{ 1, c->sample_rate }, c->time_base );
    j++;

    av_init_packet( &pkt );
    pkt.data = NULL; // packet data will be allocated by the encoder
    pkt.size = 0;
    /* encode the samples */
    ret = avcodec_encode_audio2( c, &pkt, tempFrame, &got_output );
    if ( ret < 0 )
    {
      fprintf( stderr, "Error encoding audio frame\n" );
      exit( 1 );
    }
    if ( got_output )
    {
      printf( "Write frame %3d (size=%5d)\n", j, pkt.size );
      fwrite( pkt.data, 1, pkt.size, f );
      
      av_free_packet( &pkt );
    }

    /* get the delayed frames */
    i = 0;
    for ( got_output = 1; got_output; i++ )
    {
      ret = avcodec_encode_audio2( c, &pkt, NULL, &got_output );
      if ( ret < 0 )
      {
        fprintf( stderr, "Error encoding frame\n" );
        exit( 1 );
      }
      if ( got_output )
      {
        printf( "Write delayed frame %3d (size=%5d)\n", j, pkt.size );

        fwrite( pkt.data, 1, pkt.size, f );
        
        av_free_packet( &pkt );
      }
    }    
  }

  
  fclose( f );
  avcodec_close( c );
  av_free( c );
}
static int decode_packet( AVPacket* pkt, AVFrame* frame, int *got_frame, int cached )
{
  char errBuf[ AV_ERROR_MAX_STRING_SIZE ] = { 0 };
  char timeBuf[ AV_TS_MAX_STRING_SIZE ] = { 0 };
  
  int ret = 0;
  int decoded = pkt->size;
  
  *got_frame = 0;

  if ( pkt->stream_index == audio_stream_idx )
  {

    /* decode audio frame */
                
    ret = avcodec_decode_audio4( audio_dec_ctx, frame, got_frame, pkt );


    if ( ret < 0 )
    {
      fprintf( stderr, "Error decoding audio frame (%s)\n", av_err2str( ret ) );
      return ret;
    }

    /* Some audio decoders decode only part of the packet, and have to be
    * called again with the remainder of the packet data.
    * Sample: fate-suite/lossless-audio/luckynight-partial.shn
    * Also, some decoders might over-read the packet. */
    decoded = FFMIN( ret, pkt->size );

    if ( *got_frame )
    {
      size_t unpadded_linesize = frame->channels * frame->nb_samples * av_get_bytes_per_sample( (AVSampleFormat)frame->format );
      printf( "audio_frame%s n:%d nb_samples:%d pts:%s\n", cached ? "(cached)" : "", audio_frame_count++, frame->nb_samples, av_ts2timestr( frame->pts, &audio_dec_ctx->time_base ) );

      /* Write the raw audio data samples of the first plane. This works
      * fine for packed formats (e.g. AV_SAMPLE_FMT_S16). However,
      * most audio decoders output planar audio, which uses a separate
      * plane of audio samples for each channel (e.g. AV_SAMPLE_FMT_S16P).
      * In other words, this code will write only the first audio channel
      * in these cases.
      * You should use libswresample or libavfilter to convert the frame
      * to packed data. */
      //AVFrame* clonedFrame = av_frame_alloc();
      //int errorcode =  av_frame_ref( clonedFrame, frame );
      //fprintf( stderr, "Error decoding audio frame (%s)\n", av_err2str( errorcode ) );
                     
      add_samples_to_fifo( aQueue, frame->extended_data, frame->nb_samples );
            
    }

  }

  /* If we use the new API with reference counting, we own the data and need
  * to de-reference it when we don't use it anymore */
  /* if ( *got_frame && api_mode == API_MODE_NEW_API_REF_COUNT )
  {
  av_frame_unref( frame );
  }*/
  return decoded;
}

static int open_codec_context( int *stream_idx, AVFormatContext *format_ctx, enum AVMediaType type )
{
  int ret;
  int stream_index;
  AVStream *stream;
  AVCodecContext *dec_ctx = NULL;
  AVCodec *dec = NULL;
  AVDictionary *opts = NULL;

  ret = av_find_best_stream( format_ctx, type, -1, -1, NULL, 0 );
  if ( ret < 0 )
  {
    fprintf( stderr, "Could not find %s stream in input file '%s'\n", av_get_media_type_string( type ), src_filename );
    return ret;
  }
  else
  {
    stream_index = ret;
    stream = format_ctx->streams[ stream_index ];

    /* find decoder for the stream */
    dec_ctx = stream->codec;
    dec = avcodec_find_decoder( dec_ctx->codec_id );
    if ( !dec )
    {
      fprintf( stderr, "Failed to find %s codec\n", av_get_media_type_string( type ) );
      return AVERROR( EINVAL );
    }
    if ( dec->type == AVMEDIA_TYPE_VIDEO )
    {
      
    }
    else
    {

      audioFormatCtx = format_ctx;
      if ( dec->name = "aac" )
      {
        dec = avcodec_find_decoder_by_name( "libfdk_aac" );
        
        if ( dec == NULL )
        {
          fprintf( stderr, "cannot find libfdk decoder" );
          exit( 1 );
        }
      }
      audioCodecCtx = dec_ctx;
      
    }
    /* Init the decoders, with reference counting */
    av_dict_set( &opts, "refcounted_frames", "1", 0 );
    
    if ( ( ret = avcodec_open2( dec_ctx, dec, &opts ) ) < 0 )
    {
      fprintf( stderr, "Failed to open %s codec\n", av_get_media_type_string( type ) );
      return ret;
    }
    *stream_idx = stream_index;
  }

  return 0;
}

int main( int argc, char **argv )
{
  _CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_DEBUG );
  int ret = 0;
  int got_frame;
  AVFrame* frame = NULL;
  

  src_filename = "./sinestereo.m4a";
  
  //initialise network protocols
  avformat_network_init();

  /* register all formats and codecs */
  avcodec_register_all();
  av_register_all();

  /* open input file, and allocate format context */
  if ( avformat_open_input( &format_ctx, src_filename, NULL, NULL ) < 0 )
  {
    fprintf( stderr, "Could not open source file %s\n", src_filename );
    exit( 1 );
  }

  /* retrieve stream information */
  if ( avformat_find_stream_info( format_ctx, NULL ) < 0 )
  {
    fprintf( stderr, "Could not find stream information\n" );
    exit( 1 );
  }
  
  // IF AUDIO FRAME //
  if ( open_codec_context( &audio_stream_idx, format_ctx, AVMEDIA_TYPE_AUDIO ) >= 0 )
  {

    audio_stream = format_ctx->streams[ audio_stream_idx ];
    audio_dec_ctx = audio_stream->codec;
    aQueue = av_audio_fifo_alloc( audio_dec_ctx->sample_fmt, audio_dec_ctx->channels, audio_dec_ctx->frame_size );


  }//AUDIO FRAME //

  /* dump input information to stderr */
  av_dump_format( format_ctx, 0, src_filename, 0 );

  if ( !audio_stream )
  {
    fprintf( stderr, "Could not find audio or video stream in the input, aborting\n" );
    ret = 1;
    goto end;
  }

  /* When using the new API, you need to use the libavutil/frame.h API, while
  * the classic frame management is available in libavcodec */
  frame = av_frame_alloc();

  if ( !frame )
  {
    fprintf( stderr, "Could not allocate frame\n" );
    ret = AVERROR( ENOMEM );
    goto end;
  }

  /* initialize packet, set data to NULL, let the demuxer fill it */
  av_init_packet( &pkt );
  pkt.data = NULL;
  pkt.size = 0;
   
  if ( audio_stream )
  {
    printf( "Demuxing audio from file '%s' into '%s'\n", src_filename, audio_dst_filename );
  }
  /* read frames from the file */
  while ( av_read_frame( format_ctx, &pkt ) >= 0 )
  {
    AVPacket original_pkt = pkt;
    do
    {
      
      ret = decode_packet( &pkt, frame, &got_frame, 0 );
      if ( ret < 0 )
      {
        break;
      }
      pkt.data += ret;
      pkt.size -= ret;
      // stop sending empty packets if the decoder is finished
      if (audio_stream && !got_frame )
      {
        if ( !pkt.data && audio_dec_ctx->codec->capabilities&CODEC_CAP_DELAY )
        {
          return 0;
        }
        continue;
      }
    } while ( pkt.size > 0 );
    av_free_packet( &original_pkt );
  }

  /* flush cached frames */
  pkt.data = NULL;
  pkt.size = 0;
  do
  {
    //fwrite( pkt.data, 1, pkt.size, file );
    decode_packet( &pkt, frame, &got_frame, 1 );
  } while ( got_frame );
  
  printf( "Demuxing succeeded.\n" );
    

end:
  
  encodeAudio( "./audio.out" );
  
  avcodec_close( audio_dec_ctx );
  avformat_close_input( &format_ctx );
      
  return ret < 0;
}
_______________________________________________
Libav-user mailing list
Libav-user@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/libav-user

Reply via email to