On 26/12/2015, Eric Wong <normalper...@yhbt.net> wrote:
> I've started maintaining a branch of things in their absence.

Thanks Eric. I have three commits, all regarding "sox spectrogram":
- remove arbitrary limit on spectrogram output height (was 1200)
- add "spectrogram -n" flag to normalize the image brightness
regardless of audio file loudness
- add FFTW3 support, which is between 150 and 250 times faster than
the slow built-in FFT routine
see commentary in the patches for further details.

They are the last three commits on https://github.com/martinwguy/sox.git
which is a fork of the main sf.net repository and are also attached.

They only touch src/spectrogram.c, and are independent o each other
(i.e. you can apply any or all of them in any order as they do not
conflict with each other). They are also attached as patches.

This were all needed to be able to use "sox spectrogram" for the
production of log-frequency-axis spectrograms of music in
wikidelia.net

Cheers

    M
spectrogram: remove arbitrary limit on height of spectrogram

Sox had an arbitrary limit of 8193 on the vertical axis size
based on MAX_FFT_SIZE=4096 and had fixed-size arrays for its data.
This is both wasteful of memory for smaller FFTs and stops us producing
more detailed output for no obvious reason.

This patch removes the size limit on Y-axis-height by making array
allocation dynamic. In practice, you can't remove the limit as getopt
insists on minimum and maximum values for numeric arguments, so we
copy the similarly arbitrary limit of 200000 from MAX_X_SIZE.

diff --git a/src/spectrogram.c b/src/spectrogram.c
index afb0b0e..b38d212 100644
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -36,10 +36,13 @@
   #include <io.h>
 #endif
 
-#define MAX_FFT_SIZE 4096
 #define is_p2(x) !(x & (x - 1))
 
+/* These are arbitrary as there is no upper limit, but
+ * sox's getopt() needs an upper limit for each option, so...
+ */
 #define MAX_X_SIZE 200000
+#define MAX_Y_SIZE 200000
 
 typedef enum {Window_Hann, Window_Hamming, Window_Bartlett, Window_Rectangular, Window_Kaiser, Window_Dolph} win_type_t;
 static lsx_enum_item const window_options[] = {
@@ -71,8 +74,11 @@ typedef struct {
   int        dft_size, step_size, block_steps, block_num, rows, cols, read;
   int        x_size, end, end_min, last_end;
   sox_bool   truncated;
-  double     buf[MAX_FFT_SIZE], dft_buf[MAX_FFT_SIZE], window[MAX_FFT_SIZE+1];
-  double     block_norm, max, magnitudes[(MAX_FFT_SIZE>>1) + 1];
+  double     * buf;		/* [dft_size] */
+  double     * dft_buf;		/* [dft_size] */
+  double     * window;		/* [dft_size + 1] */
+  double     block_norm, max;
+  double     * magnitudes;	/* [(dft_size / 2) + 1] */
   float      * dBfs;
 } priv_t;
 
@@ -114,9 +120,9 @@ static int getopts(sox_effect_t * effp, int argc, char **argv)
 
   while ((c = lsx_getopt(&optstate)) != -1) switch (c) {
     GETOPT_NUMERIC(optstate, 'x', x_size0       , 100, MAX_X_SIZE)
-    GETOPT_NUMERIC(optstate, 'X', pixels_per_sec,  1 , 5000)
-    GETOPT_NUMERIC(optstate, 'y', y_size        , 64 , 1200)
-    GETOPT_NUMERIC(optstate, 'Y', Y_size        , 130, MAX_FFT_SIZE / 2 + 2)
+    GETOPT_NUMERIC(optstate, 'X', pixels_per_sec,  1 , MAX_X_SIZE)
+    GETOPT_NUMERIC(optstate, 'y', y_size        , 64 , MAX_Y_SIZE)
+    GETOPT_NUMERIC(optstate, 'Y', Y_size        , 130, MAX_Y_SIZE)
     GETOPT_NUMERIC(optstate, 'z', dB_range      , 20 , 180)
     GETOPT_NUMERIC(optstate, 'Z', gain          ,-100, 100)
     GETOPT_NUMERIC(optstate, 'q', spectrum_points, 0 , p->spectrum_points)
@@ -172,7 +178,7 @@ static double make_window(priv_t * p, int end)
   double sum = 0, * w = end < 0? p->window : p->window + end;
   int i, n = 1 + p->dft_size - abs(end);
 
-  if (end) memset(p->window, 0, sizeof(p->window));
+  if (end) memset(p->window, 0, sizeof(*(p->window)) * p->dft_size);
   for (i = 0; i < n; ++i) w[i] = 1;
   switch (p->win_type) {
     case Window_Hann: lsx_apply_hann(w, n); break;
@@ -190,10 +196,10 @@ static double make_window(priv_t * p, int end)
   return sum;
 }
 
-static double * rdft_init(int n)
+static double * rdft_init(size_t n)
 {
   double * q = lsx_malloc(2 * (n / 2 + 1) * n * sizeof(*q)), * p = q;
-  int i, j;
+  size_t i, j;
   for (j = 0; j <= n / 2; ++j) for (i = 0; i < n; ++i)
     *p++ = cos(2 * M_PI * j * i / n), *p++ = sin(2 * M_PI * j * i / n);
   return q;
@@ -260,11 +266,18 @@ static int start(sox_effect_t * effp)
   if (p->y_size) {
     p->dft_size = 2 * (p->y_size - 1);
     if (!is_p2(p->dft_size) && !effp->flow)
-      p->shared = rdft_init(p->dft_size);
+      p->shared = rdft_init((size_t)(p->dft_size));
   } else {
    int y = max(32, (p->Y_size? p->Y_size : 550) / effp->in_signal.channels - 2);
    for (p->dft_size = 128; p->dft_size <= y; p->dft_size <<= 1);
   }
+
+  /* Now that dft_size is set, allocate variable-sized elements of priv_t */
+  p->buf	= lsx_calloc(p->dft_size, sizeof(*(p->buf)));
+  p->dft_buf	= lsx_calloc(p->dft_size, sizeof(*(p->dft_buf)));
+  p->window	= lsx_calloc(p->dft_size + 1, sizeof(*(p->window)));
+  p->magnitudes = lsx_calloc((p->dft_size / 2) + 1, sizeof(*(p->magnitudes)));
+
   if (is_p2(p->dft_size) && !effp->flow)
     lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
   lsx_debug("duration=%g x_size=%i pixels_per_sec=%g dft_size=%i", duration, p->x_size, pixels_per_sec, p->dft_size);
@@ -651,6 +664,10 @@ error: png_destroy_write_struct(&png, &png_info);
   free(png_rows);
   free(pixels);
   free(p->dBfs);
+  free(p->buf);
+  free(p->dft_buf);
+  free(p->window);
+  free(p->magnitudes);
   return SOX_SUCCESS;
 }
 
Add spectrogram -n flag to normalise the output to maximum brightness

This change adds a "normalize" flag, -n, to sox spectrogram to adjust
the spectrogram gain such that the highest values get the brightest
colours, allowing one to get uniform spectrograms regardless of
different volumes in music files.

diff --git a/sox.1 b/sox.1
index 2c4ca47..699ceda 100644
--- a/sox.1
+++ b/sox.1
@@ -3235,6 +3235,10 @@ A negative
 .I num
 effectively increases the `brightness' of the spectrogram display,
 and vice versa.
+.IP \fB\-n\fR
+Sets the upper limit of the Z axis so that the loudest pixels
+are shown using the brightest colour in the palette - a kind of
+automatic \fB\-Z\fR flag.
 .IP \fB\-q\ \fInum\fR
 Sets the Z-axis quantisation, i.e. the number of different colours (or
 intensities) in which to render Z-axis
diff --git a/src/spectrogram.c b/src/spectrogram.c
index 8cbb3f4..6866e7b 100644
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -59,7 +59,7 @@ typedef struct {
   double     pixels_per_sec, window_adjust;
   int        x_size0, y_size, Y_size, dB_range, gain, spectrum_points, perm;
   sox_bool   monochrome, light_background, high_colour, slack_overlap, no_axes;
-  sox_bool   raw, alt_palette, truncate;
+  sox_bool   normalize, raw, alt_palette, truncate;
   win_type_t win_type;
   char const * out_name, * title, * comment;
   char const *duration_str, *start_time_str;
@@ -113,7 +113,7 @@ static int getopts(sox_effect_t * effp, int argc, char **argv)
   char const * next;
   int c;
   lsx_getopt_t optstate;
-  lsx_getopt_init(argc, argv, "+S:d:x:X:y:Y:z:Z:q:p:W:w:st:c:AarmlhTo:", NULL, lsx_getopt_flag_none, 1, &optstate);
+  lsx_getopt_init(argc, argv, "+S:d:x:X:y:Y:z:Z:q:p:W:w:st:c:AarmnlhTo:", NULL, lsx_getopt_flag_none, 1, &optstate);
 
   p->dB_range = 120, p->spectrum_points = 249, p->perm = 1; /* Non-0 defaults */
   p->out_name = "spectrogram.png", p->comment = "Created by SoX";
@@ -134,6 +134,7 @@ static int getopts(sox_effect_t * effp, int argc, char **argv)
     case 'a': p->no_axes          = sox_true;   break;
     case 'r': p->raw              = sox_true;   break;
     case 'm': p->monochrome       = sox_true;   break;
+    case 'n': p->normalize        = sox_true;   break;
     case 'l': p->light_background = sox_true;   break;
     case 'h': p->high_colour      = sox_true;   break;
     case 'T': p->truncate         = sox_true;   break;
@@ -557,6 +558,7 @@ static int stop(sox_effect_t * effp) /* only called, by end(), on flow 0 */
   int         i, j, k, base, step, tick_len = 3 - p->no_axes;
   char        text[200], * prefix;
   double      limit;
+  float       autogain = 0.0;	/* Is changed if the -n flag was supplied */
 
   free(p->shared);
   if (p->using_stdout) {
@@ -583,8 +585,22 @@ static int stop(sox_effect_t * effp) /* only called, by end(), on flow 0 */
     png_rows[rows - 1 - j] = (png_bytep)(pixels + j * cols);
 
   /* Spectrogram */
+
+  if (p->normalize)
+    /* values are already in dB, so we subtract the maximum value
+     * (which will normally be negative) to raise the maximum to 0.0.
+     */
+    autogain = -p->max;
+
   for (k = 0; k < chans; ++k) {
     priv_t * q = (priv_t *)(effp - effp->flow + k)->priv;
+
+    if (p->normalize) {
+      float *fp;
+
+      for (i = p->rows * p->cols, fp = q->dBfs; i > 0 ; fp++, i--)
+	*fp += autogain;
+    }
     base = !p->raw * below + (chans - 1 - k) * (p->rows + 1);
     for (j = 0; j < p->rows; ++j) {
       for (i = 0; i < p->cols; ++i)
@@ -651,7 +667,7 @@ static int stop(sox_effect_t * effp) /* only called, by end(), on flow 0 */
     step = 10 * ceil(p->dB_range / 10. * (font_y + 2) / (k - 1));
     for (i = 0; i <= p->dB_range; i += step) {           /* (Tick) labels */
       int y = (double)i / p->dB_range * (k - 1) + .5;
-      sprintf(text, "%+i", i - p->gain - p->dB_range);
+      sprintf(text, "%+i", i - p->gain - p->dB_range - (int)(autogain+0.5));
       print_at(cols - right + 1, base + y + 5, Labels, text);
     }
   }
@@ -692,6 +708,7 @@ sox_effect_handler_t const * lsx_spectrogram_effect_fn(void)
     "\t-Y num\tY-height total (i.e. not per channel); default 550",
     "\t-z num\tZ-axis range in dB; default 120",
     "\t-Z num\tZ-axis maximum in dBFS; default 0",
+    "\t-n\tSet Z-axis maximum to the brightest pixel",
     "\t-q num\tZ-axis quantisation (0 - 249); default 249",
     "\t-w name\tWindow: Hann(default)/Hamming/Bartlett/Rectangular/Kaiser/Dolph",
     "\t-W num\tWindow adjust parameter (-10 - 10); applies only to Kaiser/Dolph",
spectrogram: add FFTW3 support

Sox specrogram has two different algorithms to do spectrograms:
lsx_safe_rdft() for dft_size of powers of two (-ysize=2^n+1) and
rdft_p(), private to spectrogram.c, which does any size but is from
150 to 250 times slower.

This adds FFTW3 support, which is about the same speed as the old lsx
routine but works at any size:
- stuff in "configure" to autodetect it and "./configure --without-fftw"
  to forcibly disable it
- code in src/spectrogram.c to use FFTW if it is enabled or the old
  routines otherwise
- changes to debian/control to build with FFTW (though the debian package
  build dies for other reasons not concerning this patch).

The output from the old algorithms and FFTW (the PNGs) are identical.

diff --git a/configure.ac b/configure.ac
index 23138a9..4bc3064 100644
--- a/configure.ac
+++ b/configure.ac
@@ -332,6 +332,28 @@ AC_SUBST(PNG_LIBS)
 
 
 
+dnl Check for FFTW3 libraries
+AC_ARG_WITH(fftw,
+    AS_HELP_STRING([--without-fftw],
+        [Don't try to use FFTW]))
+using_fftw=no
+if test "$with_fftw" != "no"; then
+    AC_CHECK_HEADERS(fftw3.h, using_fftw=yes)
+    if test "$using_fftw" = "yes"; then
+        AC_CHECK_LIB(fftw3, fftw_execute, FFTW_LIBS="$FFTW_LIBS -lfftw3", using_fftw=no)
+    fi
+    if test "$with_fftw" = "yes" -a "$using_fftw" = "no"; then
+        AC_MSG_FAILURE([cannot find FFTW3])
+    fi
+fi
+if test "$using_fftw" = yes; then
+   AC_DEFINE(HAVE_FFTW, 1, [Define to 1 if you have FFTW3.])
+fi
+AM_CONDITIONAL(HAVE_FFTW, test x$using_fftw = xyes)
+AC_SUBST(FFTW_LIBS)
+
+
+
 dnl Test for LADSPA
 AC_ARG_WITH(ladspa,
     AS_HELP_STRING([--without-ladspa], [Don't try to use LADSPA]))
@@ -756,6 +778,7 @@ echo "OTHER OPTIONS"
 echo "ladspa effects.............$using_ladspa"
 echo "magic support..............$using_magic"
 echo "png support................$using_png"
+echo "FFTW support...............$using_fftw"
 if test "x$OPENMP_CFLAGS" = "x"; then
 echo "OpenMP support.............no"
 else
diff --git a/debian/control b/debian/control
index dc3c34b..94681e8 100644
--- a/debian/control
+++ b/debian/control
@@ -8,6 +8,7 @@ Build-Depends: dh-autoreconf,
                ladspa-sdk,
                libao-dev,
                libasound2-dev [linux-any],
+               libfftw3-dev,
                libgsm1-dev,
                libid3tag0-dev,
                libltdl3-dev,
diff --git a/src/Makefile.am b/src/Makefile.am
index 7cceaaf..5838f1f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -90,6 +90,9 @@ libsox_la_LIBADD = @PNG_LIBS@
 if HAVE_MAGIC
 libsox_la_LIBADD += @MAGIC_LIBS@
 endif
+if HAVE_FFTW
+libsox_la_LIBADD += @FFTW_LIBS@
+endif
 
 libsox_la_LIBADD += @GOMP_LIBS@
 
diff --git a/src/spectrogram.c b/src/spectrogram.c
index 34457f2..3d8c208 100644
--- a/src/spectrogram.c
+++ b/src/spectrogram.c
@@ -30,6 +30,10 @@
 #endif
 #include <zlib.h>
 
+#ifdef HAVE_FFTW3_H
+#include <fftw3.h>
+#endif
+
 /* For SET_BINARY_MODE: */
 #include <fcntl.h>
 #ifdef HAVE_IO_H
@@ -80,6 +84,9 @@ typedef struct {
   double     block_norm, max;
   double     * magnitudes;	/* [(dft_size / 2) + 1] */
   float      * dBfs;
+#if HAVE_FFTW
+  fftw_plan  fftw_plan;		/* Used if FFT_type == FFT_fftw */
+#endif
 } priv_t;
 
 #define secs(cols) \
@@ -197,6 +204,8 @@ static double make_window(priv_t * p, int end)
   return sum;
 }
 
+#if !HAVE_FFTW
+
 static double * rdft_init(size_t n)
 {
   double * q = lsx_malloc(2 * (n / 2 + 1) * n * sizeof(*q)), * p = q;
@@ -218,6 +227,8 @@ static void rdft_p(double const * q, double const * in, double * out, int n)
   }
 }
 
+#endif /* HAVE_FFTW */
+
 static int start(sox_effect_t * effp)
 {
   priv_t * p = (priv_t *)effp->priv;
@@ -266,8 +277,10 @@ static int start(sox_effect_t * effp)
 
   if (p->y_size) {
     p->dft_size = 2 * (p->y_size - 1);
+#if !HAVE_FFTW
     if (!is_p2(p->dft_size) && !effp->flow)
       p->shared = rdft_init((size_t)(p->dft_size));
+#endif
   } else {
    int y = max(32, (p->Y_size? p->Y_size : 550) / effp->in_signal.channels - 2);
    for (p->dft_size = 128; p->dft_size <= y; p->dft_size <<= 1);
@@ -279,12 +292,20 @@ static int start(sox_effect_t * effp)
   p->window	= lsx_calloc(p->dft_size + 1, sizeof(*(p->window)));
   p->magnitudes = lsx_calloc((p->dft_size / 2) + 1, sizeof(*(p->magnitudes)));
 
+  /* Initialize the FFT routine */
+#if HAVE_FFTW
+  /* We have one FFT plan per flow because the input/output arrays differ. */
+  p->fftw_plan = fftw_plan_r2r_1d(p->dft_size, p->dft_buf, p->dft_buf,
+                      FFTW_R2HC, FFTW_MEASURE);
+#else
   if (is_p2(p->dft_size) && !effp->flow)
     lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
+#endif
+
   lsx_debug("duration=%g x_size=%i pixels_per_sec=%g dft_size=%i", duration, p->x_size, pixels_per_sec, p->dft_size);
 
   p->end = p->dft_size;
-  p->rows = (p->dft_size >> 1) + 1;
+  p->rows = (p->dft_size / 2) + 1;
   actual = make_window(p, p->last_end = 0);
   lsx_debug("window_density=%g", actual / p->dft_size);
   p->step_size = (p->slack_overlap? sqrt(actual * p->dft_size) : actual) + .5;
@@ -359,6 +380,19 @@ static int flow(sox_effect_t * effp,
     if ((p->end = max(p->end, p->end_min)) != p->last_end)
       make_window(p, p->last_end = p->end);
     for (i = 0; i < p->dft_size; ++i) p->dft_buf[i] = p->buf[i] * p->window[i];
+#if HAVE_FFTW
+    fftw_execute(p->fftw_plan);
+    /* Convert from FFTW's "half complex" format to an array of magnitudes.
+     * In HC format, the values are stored:
+     * r0, r1, r2 ... r(n/2), i(n+1)/2-1 .. i2, i1
+     */
+    p->magnitudes[0] += sqr(p->dft_buf[0]);
+    for (i = 1; i < p->dft_size / 2; ++i) {
+      p->magnitudes[i] += sqr(p->dft_buf[i]) + sqr(p->dft_buf[p->dft_size - i
+]);
+    }
+    p->magnitudes[p->dft_size / 2] += sqr(p->dft_buf[p->dft_size / 2]);
+#else
     if (is_p2(p->dft_size)) {
       lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
       p->magnitudes[0] += sqr(p->dft_buf[0]);
@@ -367,6 +401,8 @@ static int flow(sox_effect_t * effp,
       p->magnitudes[p->dft_size >> 1] += sqr(p->dft_buf[1]);
     }
     else rdft_p(*p->shared_ptr, p->dft_buf, p->magnitudes, p->dft_size);
+#endif
+
     if (++p->block_num == p->block_steps && do_column(effp) == SOX_EOF)
       return SOX_EOF;
   }
------------------------------------------------------------------------------
_______________________________________________
SoX-devel mailing list
SoX-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/sox-devel

Reply via email to