Further to my previous "silence" patch I have discovered
that there is another error involving the rms calculations.

Initially, and after a reset, the rms sample window is empty
yet the rms calculation always uses the window size as
the calculation denominator. This will make the rms
value appear artificially low to start with. Instead, an
actual count of samples should be used.

Incidentally, I think that the hard-coded 1/50 for selection
of a window size should also be parametized. Sometimes a size using
1/20 (100 milliseconds when stereo) can be more appropriate than the
very small 40 millisecond hard-coded sample window size.

The following is the updated patch:

===================================================================
--- sox-downstream-sox-14.4.2.0.modified/src/silence.c.jw
+++ sox-downstream-sox-14.4.2.0.modified/src/silence.c
@@ -55,6 +55,7 @@
     double      *window_current;
     double      *window_end;
     size_t   window_size;
+    size_t   window_count;
     double      rms_sum;
 
     char        leave_silence;
@@ -73,6 +74,7 @@
 
     silence->window_current = silence->window;
     silence->window_end = silence->window + silence->window_size;
+    silence->window_count = 0;
     silence->rms_sum = 0;
 }
 
@@ -277,9 +279,15 @@
 {
   /* When scaling low bit data, noise values got scaled way up */
   /* Only consider the original bits when looking for silence */
-  sox_sample_t masked_value = value & (-1 << (32 - effp->in_signal.precision));
+  double scaled_value;
+  sox_sample_t rounded_value = value;
 
-  double scaled_value = (double)masked_value / SOX_SAMPLE_MAX;
+  /* before we mask we should round the value (a sqrt irrational) */
+  if (effp->in_signal.precision < 32)
+    rounded_value += (1 << (32 - effp->in_signal.precision - 1));
+  rounded_value &= (-1 << (32 - effp->in_signal.precision));
+
+  scaled_value = (double)rounded_value / SOX_SAMPLE_MAX;
 
   if (unit == '%')
     scaled_value *= 100;
@@ -294,12 +302,17 @@
     priv_t * silence = (priv_t *) effp->priv;
     double new_sum;
     sox_sample_t rms;
+    size_t count;
 
     new_sum = silence->rms_sum;
     new_sum -= *silence->window_current;
     new_sum += ((double)sample * (double)sample);
 
-    rms = sqrt(new_sum / silence->window_size);
+    count = silence->window_count;
+    if (count < silence->window_size)
+       count++;
+
+    rms = sqrt(new_sum / count);
 
     return (rms);
 }
@@ -315,6 +328,9 @@
     silence->window_current++;
     if (silence->window_current >= silence->window_end)
         silence->window_current = silence->window;
+
+    if (silence->window_count < silence->window_size)
+       silence->window_count++;
 }
 
 /* Process signed long samples from ibuf to obuf. */
===================================================================

:JW



_______________________________________________
SoX-devel mailing list
SoX-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sox-devel

Reply via email to