From bdcfb9a276a09bd3a95f7c00483319e157b9d95a Mon Sep 17 00:00:00 2001
From: Andy Martin <andyndeanna@gmail.com>
Date: Sat, 23 Jul 2016 12:46:24 -0700
Subject: [PATCH] Adds another boundry mode to dynaudnorm that performs better
 when the first frame contains silence.  The can be demonstrated with the
 following command:

./ffmpeg -report \
  -filter_complex "aevalsrc=0:s=8000:d=0.4 [a_in0];\
  aevalsrc=0.9*sin(440*2*PI*t):s=8000:d=10 [a_in1];\
  [a_in0] [a_in1] concat=n=2:v=0:a=1 [a_in];\
  [a_in] dynaudnorm=b=1:p=0.5 [a_out]" \
  -map "[a_out]" test_bad1.wav

The above results in clipping.  Changing to b=2 results in no clipping.

This change also adds a warning to the log when clipping occurs.
---
 doc/filters.texi            |  9 +++++++--
 libavfilter/af_dynaudnorm.c | 24 +++++++++++++++++++++---
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 24abdda..cc2198c 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -2262,10 +2262,15 @@ frames are available. In particular, for the first few frames in the audio
 file, the preceding frames are not known. And, similarly, for the last few
 frames in the audio file, the subsequent frames are not known. Thus, the
 question arises which gain factors should be assumed for the missing frames
-in the "boundary" region. The Dynamic Audio Normalizer implements two modes
+in the "boundary" region. The Dynamic Audio Normalizer implements three modes
 to deal with this situation. The default boundary mode assumes a gain factor
 of exactly 1.0 for the missing frames, resulting in a smooth "fade in" and
-"fade out" at the beginning and at the end of the input, respectively.
+"fade out" at the beginning and at the end of the input, respectively.  A
+value of 1 assumes that the missing frames at the beginning of the file have
+the same gain factor as the very first available frame. It furthermore assumes
+that the missing frames at the end of the file have same gain factor as the
+very last frame.  A value of 2 uses a partial guassian filter at the beginning
+in order to better handle silence in the first frame.
 
 @item s
 Set the compress factor. In range from 0.0 to 30.0. Default is 0.0.
diff --git a/libavfilter/af_dynaudnorm.c b/libavfilter/af_dynaudnorm.c
index 1dd221c..207d372 100644
--- a/libavfilter/af_dynaudnorm.c
+++ b/libavfilter/af_dynaudnorm.c
@@ -67,6 +67,8 @@ typedef struct DynamicAudioNormalizerContext {
 
     int channels;
     int delay;
+    
+    int gaussian_filter_start;
 
     cqueue **gain_history_original;
     cqueue **gain_history_minimum;
@@ -84,7 +86,7 @@ static const AVOption dynaudnorm_options[] = {
     { "r", "set the target RMS",               OFFSET(target_rms),        AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},  0.0,   1.0, FLAGS },
     { "n", "set channel coupling",             OFFSET(channels_coupled),  AV_OPT_TYPE_BOOL,   {.i64 = 1},      0,     1, FLAGS },
     { "c", "set DC correction",                OFFSET(dc_correction),     AV_OPT_TYPE_BOOL,   {.i64 = 0},      0,     1, FLAGS },
-    { "b", "set alternative boundary mode",    OFFSET(alt_boundary_mode), AV_OPT_TYPE_BOOL,   {.i64 = 0},      0,     1, FLAGS },
+    { "b", "set alternative boundary mode",    OFFSET(alt_boundary_mode), AV_OPT_TYPE_INT,    {.i64 = 0},      0,     2, FLAGS },
     { "s", "set the compress factor",          OFFSET(compress_factor),   AV_OPT_TYPE_DOUBLE, {.dbl = 0.0},  0.0,  30.0, FLAGS },
     { NULL }
 };
@@ -331,6 +333,12 @@ static int config_input(AVFilterLink *inlink)
 
     s->channels = inlink->channels;
     s->delay = s->filter_size;
+    
+    if (s->alt_boundary_mode == 2) {
+        s->gaussian_filter_start = s->filter_size/2;
+    } else {
+        s->gaussian_filter_start = 0;
+    }
 
     return 0;
 }
@@ -424,10 +432,17 @@ static double minimum_filter(cqueue *q)
 static double gaussian_filter(DynamicAudioNormalizerContext *s, cqueue *q)
 {
     double result = 0.0;
+    double total_weight = 0.0;
     int i;
 
-    for (i = 0; i < cqueue_size(q); i++) {
+    for (i = s->gaussian_filter_start; i < cqueue_size(q); i++) {
         result += cqueue_peek(q, i) * s->weights[i];
+        total_weight += s->weights[i];
+    }
+    
+    if (s->gaussian_filter_start) {
+        s->gaussian_filter_start--;
+        result *= 1.0 / total_weight;
     }
 
     return result;
@@ -637,8 +652,11 @@ static void amplify_frame(DynamicAudioNormalizerContext *s, AVFrame *frame)
 
             dst_ptr[i] *= amplification_factor;
 
-            if (fabs(dst_ptr[i]) > s->peak_value)
+            if (fabs(dst_ptr[i]) > s->peak_value) {
                 dst_ptr[i] = copysign(s->peak_value, dst_ptr[i]);
+                av_log(s, AV_LOG_WARNING, "audio output clipped\n");
+
+            }
         }
 
         s->prev_amplification_factor[c] = current_amplification_factor;
-- 
2.7.4