I am using ffmpeg and libswresample to play back movie trailers. For stereo playback everything is good, but for Dolby 5.1, I get correct playback of all ambient noises but the audio track for voices is missing or too low of a sound, like a missing channel. My only weird implementation if that the audio tracks are split into frames before playback.
I wonder if someone can suggest what it is I am doing wrong or missing.
Here is some sample code:

#if defined(_WIN32) || defined(_WIN64)
mrv::AudioEngine::AudioFormat kIntSampleFormat = mrv::AudioEngine::kS16LSB;
AVSampleFormat kInternalSampleFormat = AV_SAMPLE_FMT_S16;
unsigned kFormatSize = (unsigned) sizeof(int16_t);
#else
mrv::AudioEngine::AudioFormat kIntSampleFormat = mrv::AudioEngine::kFloatLSB;
AVSampleFormat kInternalSampleFormat = AV_SAMPLE_FMT_FLT;
unsigned kFormatSize = (unsigned) sizeof(float);
#endif

...



int CMedia::decode_audio3(AVCodecContext *avctx, int16_t *samples,
              int *frame_size_ptr,
              AVPacket *avpkt)
{
   AVFrame frame = { { 0 } };
   int ret, got_frame = 0;


    ret = avcodec_decode_audio4(avctx, &frame, &got_frame, avpkt);

    if (ret >= 0 && got_frame) {
       int plane_size;
       int planar    = av_sample_fmt_is_planar(avctx->sample_fmt);
int data_size = av_samples_get_buffer_size(&plane_size, avctx->channels,
                          frame.nb_samples,
                          avctx->sample_fmt, 1);
        if (*frame_size_ptr < data_size) {
       IMG_ERROR( "decode_audio3 - Output buffer size is too small for "
              "the current frame ("
              << *frame_size_ptr << " < " << data_size << ")" );
       return AVERROR(EINVAL);
        }


    if ( avctx->sample_fmt != kInternalSampleFormat )
    {
       if (!forw_ctx)
       {
          char buf[256];

          uint64_t  in_ch_layout =
get_valid_channel_layout(avctx->channel_layout, avctx->channels);

          if ( in_ch_layout == 0 ) in_ch_layout = AV_CH_LAYOUT_STEREO;

          av_get_channel_layout_string( buf, 256, avctx->channels,
                        in_ch_layout );

          IMG_INFO("Create audio conversion from " << buf
               << ", channels " << avctx->channels << ", " );
          IMG_INFO( "format "
               << av_get_sample_fmt_name( avctx->sample_fmt )
            << ", sample rate " << avctx->sample_rate << " to" );

          uint64_t out_ch_layout = in_ch_layout;

          av_get_channel_layout_string( buf, 256, avctx->channels,
                        out_ch_layout );
          AVSampleFormat  out_sample_fmt = kInternalSampleFormat;
          AVSampleFormat  in_sample_fmt = avctx->sample_fmt;
          int in_sample_rate = avctx->sample_rate;
          int out_sample_rate = in_sample_rate;
          IMG_INFO( buf << ", channels " << avctx->channels << ", format "
            << av_get_sample_fmt_name( out_sample_fmt )
            << ", sample rate "
            << out_sample_rate);


          forw_ctx  = swr_alloc_set_opts(NULL, out_ch_layout,
                         out_sample_fmt,  out_sample_rate,
                         in_ch_layout,  in_sample_fmt,
                         in_sample_rate,
                         0, NULL);
          if(!forw_ctx) {
         LOG_ERROR("Failed to alloc swresample library");
         return 0;
          }
          if(swr_init(forw_ctx) < 0)
          {
         char buf[256];
         av_get_channel_layout_string(buf, 256, -1, in_ch_layout);
         LOG_ERROR( "Failed to init swresample library with "
                << buf << " "
                << av_get_sample_fmt_name(in_sample_fmt)
                << " frequency: " << in_sample_rate );
         return 0;
          }
       }


       swr_convert(forw_ctx, (uint8_t**)&samples,
               data_size / sizeof(int16_t),
               (const uint8_t **)frame.extended_data,
               frame.nb_samples );

    // If S16, data is half the size after conversion
       if ( kInternalSampleFormat == AV_SAMPLE_FMT_S16 )
          data_size /= 2;

    }
    else
    {
       memcpy(samples, frame.extended_data[0], data_size);

       if (planar && avctx->channels > 1) {
          uint8_t *out = ((uint8_t *)samples) + plane_size;
          for (int ch = 1; ch < avctx->channels; ch++) {
         memcpy(out, frame.extended_data[ch], plane_size);
         out += plane_size;
          }
       }
    }

        *frame_size_ptr = data_size;
    } else {
        *frame_size_ptr = 0;
    }
    return ret;
}

...
CMedia::DecodeStatus
CMedia::decode_audio_packet( boost::int64_t& ptsframe,
                 const boost::int64_t frame,
                 const AVPacket& pkt )
{

  AVStream* stream = get_audio_stream();
  if ( !stream ) return kDecodeNoStream;

  // Get the audio codec context
  AVCodecContext* ctx = stream->codec;


  assert( !_audio_packets.is_seek( pkt ) );
  assert( !_audio_packets.is_flush( pkt ) );
  assert( !_audio_packets.is_preroll( pkt ) );
  assert( !_audio_packets.is_loop_end( pkt ) );
  assert( !_audio_packets.is_loop_start( pkt ) );

  ptsframe = get_frame( stream, pkt );

  // Make sure audio frames are continous during playback to
  // accomodate weird sample rates not evenly divisable by frame rate
  if ( _audio_buf_used != 0 && (!_audio.empty()) )
    {
       ptsframe = _audio_last_frame + 1;
      // assert( ptsframe <= last_frame() );
    }



#ifdef DEBUG
  if ( _audio_buf_used + pkt.size >= _audio_max )
    {
      IMG_ERROR( _("Too much audio used:") << _audio_buf_used  );
    }
#endif

  AVPacket pkt_temp;
  av_init_packet(&pkt_temp);
  pkt_temp.data = pkt.data;
  pkt_temp.size = pkt.size;



  assert( _audio_buf != NULL );
  assert( pkt.size + _audio_buf_used < _audio_max );

  int audio_size = AVCODEC_MAX_AUDIO_FRAME_SIZE;
  assert( pkt_temp.size <= audio_size );

  while ( pkt_temp.size > 0 || pkt_temp.data == NULL )
    {
       // Decode the audio into the buffer
       assert( _audio_buf_used + pkt_temp.size <= _audio_max );
       assert( audio_size > 0 );
       int ret = decode_audio3( ctx,
                               ( int16_t * )( (char*)_audio_buf +
_audio_buf_used ),
                               &audio_size, &pkt_temp );

      // If no samples are returned, then break now
      if ( ret <= 0 )
    {
       pkt_temp.size = 0;
       IMG_ERROR( _("Audio missed for frame: ") << ptsframe
              << _(" ret: ") << ret
              << _(" audio max: ")  << _audio_max
              << _(" audio used: ") << _audio_buf_used
               );

      return kDecodeMissingSamples;
    }


      assert( audio_size > 0 );
      assert( audio_size + _audio_buf_used <= _audio_max );

      // Decrement the length by the number of bytes parsed
      pkt_temp.data += ret;
      pkt_temp.size -= ret;

      if ( audio_size <= 0 ) break;

      _audio_buf_used += audio_size;
    }

  if ( pkt_temp.size == 0 ) return kDecodeOK;

  return kDecodeMissingSamples;
}



bool CMedia::open_audio( const short channels,
             const unsigned nSamplesPerSec )
{
  close_audio();

  _samples_per_sec = nSamplesPerSec;
  return _audio_engine->open( channels, nSamplesPerSec,
                  kIntSampleFormat, kFormatSize*8);
}



I can show the implementation of _audio_engine and how I split the audio into frames if needed, too.
_______________________________________________
Libav-user mailing list
[email protected]
http://ffmpeg.org/mailman/listinfo/libav-user

Reply via email to