From: Martin Guy <martinw...@gmail.com> This change adds a "normalize" flag, -n, to sox spectrogram to adjust the spectrogram gain such that the highest values get the brightest colours, allowing one to get uniform spectrograms regardless of different volumes in music files.
Tested-by: Eric Wong <normalper...@yhbt.net> --- sox.1 | 4 ++++ src/spectrogram.c | 23 ++++++++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/sox.1 b/sox.1 index 5a4766dd..04ab09b1 100644 --- a/sox.1 +++ b/sox.1 @@ -3235,6 +3235,10 @@ A negative .I num effectively increases the `brightness' of the spectrogram display, and vice versa. +.IP \fB\-n\fR +Sets the upper limit of the Z axis so that the loudest pixels +are shown using the brightest colour in the palette - a kind of +automatic \fB\-Z\fR flag. .IP \fB\-q\ \fInum\fR Sets the Z-axis quantisation, i.e. the number of different colours (or intensities) in which to render Z-axis diff --git a/src/spectrogram.c b/src/spectrogram.c index b38d2124..34457f21 100644 --- a/src/spectrogram.c +++ b/src/spectrogram.c @@ -59,7 +59,7 @@ typedef struct { double pixels_per_sec, window_adjust; int x_size0, y_size, Y_size, dB_range, gain, spectrum_points, perm; sox_bool monochrome, light_background, high_colour, slack_overlap, no_axes; - sox_bool raw, alt_palette, truncate; + sox_bool normalize, raw, alt_palette, truncate; win_type_t win_type; char const * out_name, * title, * comment; char const *duration_str, *start_time_str; @@ -113,7 +113,7 @@ static int getopts(sox_effect_t * effp, int argc, char **argv) char const * next; int c; lsx_getopt_t optstate; - lsx_getopt_init(argc, argv, "+S:d:x:X:y:Y:z:Z:q:p:W:w:st:c:AarmlhTo:", NULL, lsx_getopt_flag_none, 1, &optstate); + lsx_getopt_init(argc, argv, "+S:d:x:X:y:Y:z:Z:q:p:W:w:st:c:AarmnlhTo:", NULL, lsx_getopt_flag_none, 1, &optstate); p->dB_range = 120, p->spectrum_points = 249, p->perm = 1; /* Non-0 defaults */ p->out_name = "spectrogram.png", p->comment = "Created by SoX"; @@ -134,6 +134,7 @@ static int getopts(sox_effect_t * effp, int argc, char **argv) case 'a': p->no_axes = sox_true; break; case 'r': p->raw = sox_true; break; case 'm': p->monochrome = sox_true; break; + case 'n': p->normalize = sox_true; break; case 'l': p->light_background = sox_true; break; case 'h': p->high_colour = sox_true; break; case 'T': p->truncate = sox_true; break; @@ -557,6 +558,7 @@ static int stop(sox_effect_t * effp) /* only called, by end(), on flow 0 */ int i, j, k, base, step, tick_len = 3 - p->no_axes; char text[200], * prefix; double limit; + float autogain = 0.0; /* Is changed if the -n flag was supplied */ free(p->shared); if (p->using_stdout) { @@ -583,8 +585,22 @@ static int stop(sox_effect_t * effp) /* only called, by end(), on flow 0 */ png_rows[rows - 1 - j] = (png_bytep)(pixels + j * cols); /* Spectrogram */ + + if (p->normalize) + /* values are already in dB, so we subtract the maximum value + * (which will normally be negative) to raise the maximum to 0.0. + */ + autogain = -p->max; + for (k = 0; k < chans; ++k) { priv_t * q = (priv_t *)(effp - effp->flow + k)->priv; + + if (p->normalize) { + float *fp; + + for (i = p->rows * p->cols, fp = q->dBfs; i > 0 ; fp++, i--) + *fp += autogain; + } base = !p->raw * below + (chans - 1 - k) * (p->rows + 1); for (j = 0; j < p->rows; ++j) { for (i = 0; i < p->cols; ++i) @@ -651,7 +667,7 @@ static int stop(sox_effect_t * effp) /* only called, by end(), on flow 0 */ step = 10 * ceil(p->dB_range / 10. * (font_y + 2) / (k - 1)); for (i = 0; i <= p->dB_range; i += step) { /* (Tick) labels */ int y = (double)i / p->dB_range * (k - 1) + .5; - sprintf(text, "%+i", i - p->gain - p->dB_range); + sprintf(text, "%+i", i - p->gain - p->dB_range - (int)(autogain+0.5)); print_at(cols - right + 1, base + y + 5, Labels, text); } } @@ -692,6 +708,7 @@ sox_effect_handler_t const * lsx_spectrogram_effect_fn(void) "\t-Y num\tY-height total (i.e. not per channel); default 550", "\t-z num\tZ-axis range in dB; default 120", "\t-Z num\tZ-axis maximum in dBFS; default 0", + "\t-n\tSet Z-axis maximum to the brightest pixel", "\t-q num\tZ-axis quantisation (0 - 249); default 249", "\t-w name\tWindow: Hann(default)/Hamming/Bartlett/Rectangular/Kaiser/Dolph", "\t-W num\tWindow adjust parameter (-10 - 10); applies only to Kaiser/Dolph", _______________________________________________ SoX-devel mailing list SoX-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/sox-devel