Re: [libav-devel] [PATCH 1/2] Add an "-apad" option to ffmpeg to pad audio to video length.

Aℓex Converse Tue, 12 Apr 2011 14:33:22 -0700

On Mon, Mar 28, 2011 at 6:02 PM, Justin Ruggles
<[email protected]> wrote:
> On 03/28/2011 06:14 PM, Alex Converse wrote:
>
>> On Mon, Mar 28, 2011 at 2:37 PM, Justin Ruggles
>> <[email protected]> wrote:
>>> On 03/28/2011 05:27 PM, Alex Converse wrote:
>>>
>>>> On Mon, Mar 28, 2011 at 2:19 PM, Justin Ruggles
>>>> <[email protected]> wrote:
>>>>> On 03/28/2011 04:54 PM, Alex Converse wrote:
>>>>>
>>>>>> ---
>>>>>>  doc/ffmpeg.texi |    2 ++
>>>>>>  ffmpeg.c        |   47 +++++++++++++++++++++++++++++++++++++++++++++--
>>>>>>  2 files changed, 47 insertions(+), 2 deletions(-)
>>>>>>
>>>>>>
>>>>>>
>>>>>> 0001-Add-an-apad-option-to-ffmpeg-to-pad-audio-to-video-l.patch
>>>>>>
>>>>>>
>>>>>> diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi
>>>>>> index 21c6f2c..2927017 100644
>>>>>> --- a/doc/ffmpeg.texi
>>>>>> +++ b/doc/ffmpeg.texi
>>>>>> @@ -579,6 +579,8 @@ ffmpeg -i file.mpg -vcodec copy -acodec ac3 -ab 384k 
>>>>>> test.mpg -acodec mp2 -ab 19
>>>>>>  @end example
>>>>>>  @item -alang @var{code}
>>>>>>  Set the ISO 639 language code (3 letters) of the current audio stream.
>>>>>> +@item -apad
>>>>>> +Pad audio to video length.
>>>>>>  @end table
>>>>>>
>>>>>>  @section Advanced Audio options:
>>>>>> diff --git a/ffmpeg.c b/ffmpeg.c
>>>>>> index 5e50db3..84a278e 100644
>>>>>> --- a/ffmpeg.c
>>>>>> +++ b/ffmpeg.c
>>>>>> @@ -176,6 +176,7 @@ static int64_t channel_layout = 0;
>>>>>>  #define QSCALE_NONE -99999
>>>>>>  static float audio_qscale = QSCALE_NONE;
>>>>>>  static int audio_disable = 0;
>>>>>> +static int audio_pad = 0;
>>>>>>  static int audio_channels = 1;
>>>>>>  static char  *audio_codec_name = NULL;
>>>>>>  static unsigned int audio_codec_tag = 0;
>>>>>> @@ -297,6 +298,7 @@ typedef struct AVOutputStream {
>>>>>>      int reformat_pair;
>>>>>>      AVAudioConvert *reformat_ctx;
>>>>>>      AVFifoBuffer *fifo;     /* for compression: one audio fifo per 
>>>>>> codec */
>>>>>> +
>>>>>>      FILE *logfile;
>>>>>>  } AVOutputStream;
>>>>>>
>>>>>> @@ -702,7 +704,8 @@ static void write_frame(AVFormatContext *s, AVPacket 
>>>>>> *pkt, AVCodecContext *avctx
>>>>>>  static void do_audio_out(AVFormatContext *s,
>>>>>>                           AVOutputStream *ost,
>>>>>>                           AVInputStream *ist,
>>>>>> -                         unsigned char *buf, int size)
>>>>>> +                         unsigned char *buf, int size,
>>>>>> +                         int skip_resample_reformat)
>>>>>>  {
>>>>>>      uint8_t *buftmp;
>>>>>>      int64_t audio_out_size, audio_buf_size;
>>>>>> @@ -739,6 +742,7 @@ need_realloc:
>>>>>>          ffmpeg_exit(1);
>>>>>>      }
>>>>>>
>>>>>> +    if (!skip_resample_reformat) {
>>>>>>      if (enc->channels != dec->channels)
>>>>>>          ost->audio_resample = 1;
>>>>>>
>>>>>> @@ -871,6 +875,10 @@ need_realloc:
>>>>>>          buftmp = audio_buf;
>>>>>>          size_out = len*osize;
>>>>>>      }
>>>>>> +    } else {
>>>>>> +        buftmp = buf;
>>>>>> +        size_out = size;
>>>>>> +    }
>>>>>>
>>>>>>      /* now encode as many frames as possible */
>>>>>>      if (enc->frame_size > 1) {
>>>>>> @@ -1603,7 +1611,7 @@ static int output_packet(AVInputStream *ist, int 
>>>>>> ist_index,
>>>>>>                          av_assert0(ist->decoding_needed);
>>>>>>                          switch(ost->st->codec->codec_type) {
>>>>>>                          case AVMEDIA_TYPE_AUDIO:
>>>>>> -                            do_audio_out(os, ost, ist, 
>>>>>> decoded_data_buf, decoded_data_size);
>>>>>> +                            do_audio_out(os, ost, ist, 
>>>>>> decoded_data_buf, decoded_data_size, 0);
>>>>>>                              break;
>>>>>>                          case AVMEDIA_TYPE_VIDEO:
>>>>>>  #if CONFIG_AVFILTER
>>>>>> @@ -1697,8 +1705,18 @@ static int output_packet(AVInputStream *ist, int 
>>>>>> ist_index,
>>>>>>   discard_packet:
>>>>>>      if (pkt == NULL) {
>>>>>>          /* EOF handling */
>>>>>> +        double vpts = 0.0;
>>>>>> +
>>>>>> +        for(i=0;i<nb_ostreams;i++) {
>>>>>> +            ost = ost_table[i];
>>>>>> +            if(ost->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
>>>>>> +                double opts = ost->st->pts.val * 
>>>>>> av_q2d(ost->st->time_base);
>>>>>> +                vpts = FFMAX(opts, vpts);
>>>>>> +            }
>>>>>> +        }
>>>>>>
>>>>>>          for(i=0;i<nb_ostreams;i++) {
>>>>>> +            int ost_pad_audio = audio_pad;
>>>>>>              ost = ost_table[i];
>>>>>>              if (ost->source_index == ist_index) {
>>>>>>                  AVCodecContext *enc= ost->st->codec;
>>>>>> @@ -1713,13 +1731,37 @@ static int output_packet(AVInputStream *ist, int 
>>>>>> ist_index,
>>>>>>                      for(;;) {
>>>>>>                          AVPacket pkt;
>>>>>>                          int fifo_bytes;
>>>>>> +                        double opts;
>>>>>>                          av_init_packet(&pkt);
>>>>>>                          pkt.stream_index= ost->index;
>>>>>>
>>>>>>                          switch(ost->st->codec->codec_type) {
>>>>>>                          case AVMEDIA_TYPE_AUDIO:
>>>>>> +                            opts = ost->st->pts.val * 
>>>>>> av_q2d(ost->st->time_base);
>>>>>>                              fifo_bytes = av_fifo_size(ost->fifo);
>>>>>>                              ret = 0;
>>>>>> +
>>>>>> +                            if (ost_pad_audio && opts < vpts) {
>>>>>> +                                int osize = 
>>>>>> av_get_bits_per_sample_fmt(enc->sample_fmt) >> 3;
>>>>>> +                                int frame_bytes = 
>>>>>> enc->frame_size*osize*enc->channels;
>>>>>> +                                ost_pad_audio = 0;
>>>>>> +                                if (samples_size < frame_bytes)
>>>>>> +                                    ffmpeg_exit(1);
>>>>>> +                                memset(samples, 0, frame_bytes);
>>>>>> +                                /* finish the current frame in the 
>>>>>> fifo, then send whole frames */
>>>>>> +                                if (fifo_bytes > 0) {
>>>>>> +                                    do_audio_out(os, ost, ist, samples, 
>>>>>> frame_bytes-fifo_bytes, 1);
>>>>>> +                                    opts = ost->st->pts.val * 
>>>>>> av_q2d(ost->st->time_base);
>>>>>> +                                }
>>>>>> +                                while (opts < vpts) {
>>>>>> +                                    do_audio_out(os, ost, ist, samples, 
>>>>>> frame_bytes, 1);
>>>>>> +                                    opts = ost->st->pts.val * 
>>>>>> av_q2d(ost->st->time_base);
>>>>>> +                                }
>>>>>> +                                fifo_bytes = av_fifo_size(ost->fifo);
>>>>>> +                                if (fifo_bytes != 0)
>>>>>> +                                    ffmpeg_exit(1);
>>>>>> +                            }
>>>>>> +
>>>>>>                              /* encode any samples remaining in fifo */
>>>>>>                              if (fifo_bytes > 0) {
>>>>>>                                  int osize = 
>>>>>> av_get_bits_per_sample_fmt(enc->sample_fmt) >> 3;
>>>>>> @@ -4233,6 +4275,7 @@ static const OptionDef options[] = {
>>>>>>      { "vol", OPT_INT | HAS_ARG | OPT_AUDIO, {(void*)&audio_volume}, 
>>>>>> "change audio volume (256=normal)" , "volume" }, //
>>>>>>      { "newaudio", OPT_AUDIO | OPT_FUNC2, {(void*)opt_new_stream}, "add 
>>>>>> a new audio stream to the current output stream" },
>>>>>>      { "alang", HAS_ARG | OPT_STRING | OPT_AUDIO, {(void 
>>>>>> *)&audio_language}, "set the ISO 639 language code (3 letters) of the 
>>>>>> current audio stream" , "code" },
>>>>>> +    { "apad", OPT_BOOL | OPT_AUDIO, {(void*)&audio_pad}, "pad audio to 
>>>>>> video length", "pad" },
>>>>>>      { "sample_fmt", HAS_ARG | OPT_EXPERT | OPT_AUDIO, 
>>>>>> {(void*)opt_audio_sample_fmt}, "set sample format, 'list' as argument 
>>>>>> shows all the sample formats supported", "format" },
>>>>>>
>>>>>>      /* subtitle options */
>>>>>
>>>>>
>>>>> Based on how you're using skip_resample_reformat, it seems this patch
>>>>> doesn't work when changing sample format or sample rate when encoding,
>>>>> correct?
>>>>>
>>>>> -Justin
>>>>
>>>> The silence is generated in the output sample format domain to allow
>>>> appending silence one audio block at a time (based on the encoder's
>>>> frame size). So it should work with changing sample format and sample
>>>> rate.
>>>
>>>
>>> I see. But it uses memset(0), so this wouldn't work with SAMPLE_FMT_U8.
>>>
>>
>> Actually it turns out this code does not run at all for PCM codecs because of
>>                 if(ost->st->codec->codec_type == AVMEDIA_TYPE_AUDIO &&
>> enc->frame_size <=1)
>>                     continue;
>>
>> So I do need to fix that part.
>
> Yeah, that frame_size==1 (but not really) for pcm thing really sucks.
> It's on my list of things to get rid of.
>


Our PCM handling code is subtly broken in a variety of ways.

I have some code to force PCM to go through the FIFO based on some
semi-arbitrary chunking.

I'm not sure if we want it.

I think the ideal fifo situation would be an avfilter that sits
between the remixer and the encoder but that being feasible is quite a
while off.

>> Do we have any non-PCM *encoders* that support SAMPLE_FMT_U8. FWIW
>> they are broken already if they don't support
>> CODEC_CAP_SMALL_LAST_FRAME. Still it should be fairly trivial to add a
>> small generate silence function that works for any (sane) sample
>> format.
>
> I don't know if we have non-PCM encoders that support SAMPLE_FMT_U8, but
> I do think we should support it.
>

So we now have silence generation support for AV_SAMPLE_FMT_U8.

I'm not sure where to go from here. What should the next step be. For
the time being I can't fix-the-world to implement this.
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 1/2] Add an "-apad" option to ffmpeg to pad audio to video length.

Reply via email to