There is no reason for computing cbrtf at runtime; we have a table for this.
Cruft needed due to the build system, the people who still like using hardcoded tables and need for single cbrt_tab across the code. Yields non-negligible speedup (Haswell+GCC, -march=native) before: ffmpeg -i sin.flac -acodec aac -y sin_new.aac 5.22s user 0.03s system 105% cpu 4.970 total after: ffmpeg -i sin.flac -acodec aac -y sin_new.aac 5.15s user 0.02s system 105% cpu 4.884 total Also reduces size of the binary: after: 15503040 libavcodec/libavcodec.so.57 before: 15504176 libavcodec/libavcodec.so.57 Signed-off-by: Ganesh Ajjanagadde <gajja...@gmail.com> --- libavcodec/Makefile | 27 ++++++++++---- libavcodec/aacdec_fixed.c | 4 +-- libavcodec/aacdec_template.c | 4 +-- libavcodec/aacenc.c | 2 ++ libavcodec/aacenc_quantization.h | 3 +- libavcodec/{cbrt_tablegen.h => cbrt_data.c} | 56 ++++++++++++----------------- libavcodec/cbrt_tablegen.c | 20 +++++++++-- libavcodec/cbrt_tablegen.h | 53 +++++---------------------- libavcodec/cbrt_tablegen_template.c | 38 -------------------- 9 files changed, 77 insertions(+), 130 deletions(-) copy libavcodec/{cbrt_tablegen.h => cbrt_data.c} (64%) delete mode 100644 libavcodec/cbrt_tablegen_template.c diff --git a/libavcodec/Makefile b/libavcodec/Makefile index ee9a962..3315cf7 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -132,22 +132,31 @@ OBJS-$(CONFIG_WMA_FREQS) += wma_freqs.o OBJS-$(CONFIG_WMV2DSP) += wmv2dsp.o # decoders/encoders +AAC_DECODER-OBJS-$(CONFIG_HARDCODED_TABLES) += cbrt_tables.o +AAC_FIXED_DECODER-OBJS-$(CONFIG_HARDCODED_TABLES) += cbrt_tables.o +AAC_ENCODER-OBJS-$(CONFIG_HARDCODED_TABLES) += cbrt_tables.o +AAC_DECODER-OBJS-$(!CONFIG_HARDCODED_TABLES) += cbrt_data.o +AAC_FIXED_DECODER-OBJS-$(!CONFIG_HARDCODED_TABLES) += cbrt_data.o +AAC_ENCODER-OBJS-$(!CONFIG_HARDCODED_TABLES) += cbrt_data.o OBJS-$(CONFIG_ZERO12V_DECODER) += 012v.o OBJS-$(CONFIG_A64MULTI_ENCODER) += a64multienc.o elbg.o OBJS-$(CONFIG_A64MULTI5_ENCODER) += a64multienc.o elbg.o OBJS-$(CONFIG_AAC_DECODER) += aacdec.o aactab.o aacsbr.o aacps_float.o \ aacadtsdec.o mpeg4audio.o kbdwin.o \ - sbrdsp.o aacpsdsp_float.o + sbrdsp.o aacpsdsp_float.o \ + $(AAC_DECODER-OBJS-yes) OBJS-$(CONFIG_AAC_FIXED_DECODER) += aacdec_fixed.o aactab.o aacsbr_fixed.o aacps_fixed.o \ aacadtsdec.o mpeg4audio.o kbdwin.o \ - sbrdsp_fixed.o aacpsdsp_fixed.o + sbrdsp_fixed.o aacpsdsp_fixed.o \ + $(AAC_FIXED_DECODER-OBJS-yes) OBJS-$(CONFIG_AAC_ENCODER) += aacenc.o aaccoder.o aacenctab.o \ aacpsy.o aactab.o \ aacenc_is.o \ aacenc_tns.o \ aacenc_ltp.o \ aacenc_pred.o \ - psymodel.o mpeg4audio.o kbdwin.o + psymodel.o mpeg4audio.o kbdwin.o \ + $(AAC_ENCODER-OBJS-yes) OBJS-$(CONFIG_AASC_DECODER) += aasc.o msrledec.o OBJS-$(CONFIG_AC3_DECODER) += ac3dec_float.o ac3dec_data.o ac3.o kbdwin.o OBJS-$(CONFIG_AC3_FIXED_DECODER) += ac3dec_fixed.o ac3dec_data.o ac3.o kbdwin.o @@ -979,7 +988,6 @@ TOOLS = fourcc2pixfmt HOSTPROGS = aacps_tablegen \ aacps_fixed_tablegen \ cbrt_tablegen \ - cbrt_fixed_tablegen \ cos_tablegen \ dv_tablegen \ motionpixels_tablegen \ @@ -993,6 +1001,7 @@ CLEANFILES = *_tables.c *_tables.h *_tablegen$(HOSTEXESUF) $(SUBDIR)dct-test$(EXESUF): $(SUBDIR)dctref.o $(SUBDIR)aandcttab.o $(SUBDIR)dv_tablegen$(HOSTEXESUF): $(SUBDIR)dvdata_host.o +$(SUBDIR)cbrt_tablegen$(HOSTEXESUF): $(SUBDIR)cbrt_data.o TRIG_TABLES = cos cos_fixed sin TRIG_TABLES := $(TRIG_TABLES:%=$(SUBDIR)%_tables.c) @@ -1000,13 +1009,19 @@ TRIG_TABLES := $(TRIG_TABLES:%=$(SUBDIR)%_tables.c) $(TRIG_TABLES): $(SUBDIR)%_tables.c: $(SUBDIR)cos_tablegen$(HOSTEXESUF) $(M)./$< $* > $@ +CBRT_TABLES = cbrt +CBRT_TABLES := $(CBRT_TABLES:%=$(SUBDIR)%_tables.c) + +$(CBRT_TABLES): $(SUBDIR)%_tables.c: $(SUBDIR)cbrt_tablegen$(HOSTEXESUF) + $(M)./$< $* > $@ + ifdef CONFIG_SMALL $(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=1 else $(SUBDIR)%_tablegen$(HOSTEXESUF): HOSTCFLAGS += -DCONFIG_SMALL=0 endif -GEN_HEADERS = cbrt_tables.h cbrt_fixed_tables.h aacps_tables.h aacps_fixed_tables.h \ +GEN_HEADERS = aacps_tables.h aacps_fixed_tables.h \ dv_tables.h \ sinewin_tables.h sinewin_fixed_tables.h mpegaudio_tables.h motionpixels_tables.h \ pcm_tables.h qdm2_tables.h @@ -1016,8 +1031,6 @@ $(GEN_HEADERS): $(SUBDIR)%_tables.h: $(SUBDIR)%_tablegen$(HOSTEXESUF) $(M)./$< > $@ ifdef CONFIG_HARDCODED_TABLES -$(SUBDIR)aacdec.o: $(SUBDIR)cbrt_tables.h -$(SUBDIR)aacdec_fixed.o: $(SUBDIR)cbrt_fixed_tables.h $(SUBDIR)aacps_float.o: $(SUBDIR)aacps_tables.h $(SUBDIR)aacps_fixed.o: $(SUBDIR)aacps_fixed_tables.h $(SUBDIR)aactab_fixed.o: $(SUBDIR)aac_fixed_tables.h diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c index 396a874..04ebe99 100644 --- a/libavcodec/aacdec_fixed.c +++ b/libavcodec/aacdec_fixed.c @@ -155,9 +155,9 @@ static void vector_pow43(int *coefs, int len) for (i=0; i<len; i++) { coef = coefs[i]; if (coef < 0) - coef = -(int)cbrt_tab[-coef]; + coef = -(int)ff_cbrt_tab_fixed[-coef]; else - coef = (int)cbrt_tab[coef]; + coef = (int)ff_cbrt_tab_fixed[coef]; coefs[i] = coef; } } diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c index 6bc94c8..883ed52 100644 --- a/libavcodec/aacdec_template.c +++ b/libavcodec/aacdec_template.c @@ -1104,7 +1104,7 @@ static av_cold void aac_static_table_init(void) AAC_RENAME(ff_init_ff_sine_windows)( 9); AAC_RENAME(ff_init_ff_sine_windows)( 7); - AAC_RENAME(cbrt_tableinit)(); + AAC_RENAME(ff_cbrt_tableinit)(); } static AVOnce aac_table_init = AV_ONCE_INIT; @@ -1795,7 +1795,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, INTFLOAT coef[1024], v = -v; *icf++ = v; #else - *icf++ = cbrt_tab[n] | (bits & 1U<<31); + *icf++ = ff_cbrt_tab[n] | (bits & 1U<<31); #endif /* USE_FIXED */ bits <<= 1; } else { diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index 023260a..863df65 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -45,6 +45,7 @@ #include "aacenc.h" #include "aacenctab.h" #include "aacenc_utils.h" +#include "cbrt_tablegen.h" #include "psymodel.h" @@ -897,6 +898,7 @@ alloc_fail: static av_cold void aac_encode_init_tables(void) { ff_aac_tableinit(); + AAC_RENAME(ff_cbrt_tableinit)(); } static av_cold int aac_encode_init(AVCodecContext *avctx) diff --git a/libavcodec/aacenc_quantization.h b/libavcodec/aacenc_quantization.h index 4250407..b20669c 100644 --- a/libavcodec/aacenc_quantization.h +++ b/libavcodec/aacenc_quantization.h @@ -32,6 +32,7 @@ #include "aacenc.h" #include "aacenctab.h" #include "aacenc_utils.h" +#include "cbrt_tablegen.h" /** * Calculate rate distortion cost for quantizing with given codebook @@ -105,7 +106,7 @@ static av_always_inline float quantize_and_encode_band_cost_template( curbits += 21; } else { int c = av_clip_uintp2(quant(t, Q, ROUNDING), 13); - quantized = c*cbrtf(c)*IQ; + quantized = av_int2float(ff_cbrt_tab[c])*IQ; curbits += av_log2(c)*2 - 4 + 1; } } else { diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_data.c similarity index 64% copy from libavcodec/cbrt_tablegen.h copy to libavcodec/cbrt_data.c index 21e4b9a..c697581 100644 --- a/libavcodec/cbrt_tablegen.h +++ b/libavcodec/cbrt_data.c @@ -1,5 +1,5 @@ /* - * Header file for hardcoded AAC cube-root table + * AAC cube-root table * * Copyright (c) 2010 Reimar Döffinger <reimar.doeffin...@gmx.de> * @@ -20,37 +20,17 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#ifndef AVCODEC_CBRT_TABLEGEN_H -#define AVCODEC_CBRT_TABLEGEN_H - #include <stdint.h> -#include <math.h> -#include "libavutil/attributes.h" -#include "libavutil/intfloat.h" -#include "libavcodec/aac_defines.h" +#include "cbrt_tablegen.h" -#if USE_FIXED -#define CBRT(x) lrint((x) * 8192) -#else -#define CBRT(x) av_float2int((float)(x)) -#endif +uint32_t ff_cbrt_tab[1 << 13]; +uint32_t ff_cbrt_tab_fixed[1 << 13]; +static double cbrt_tab_dbl[1 << 13]; -#if CONFIG_HARDCODED_TABLES -#if USE_FIXED -#define cbrt_tableinit_fixed() -#include "libavcodec/cbrt_fixed_tables.h" -#else -#define cbrt_tableinit() -#include "libavcodec/cbrt_tables.h" -#endif -#else -static uint32_t cbrt_tab[1 << 13]; +static av_cold void cbrt_tableinit_internal(void) { + int i, j, k; -static av_cold void AAC_RENAME(cbrt_tableinit)(void) -{ - static double cbrt_tab_dbl[1 << 13]; - if (!cbrt_tab[(1<<13) - 1]) { - int i, j, k; + if (!cbrt_tab_dbl[(1<<13)-1]) { double cbrt_val; for (i = 1; i < 1<<13; i++) @@ -73,11 +53,21 @@ static av_cold void AAC_RENAME(cbrt_tableinit)(void) cbrt_tab_dbl[j] *= cbrt_val; } } - - for (i = 0; i < 1<<13; i++) - cbrt_tab[i] = CBRT(cbrt_tab_dbl[i]); } } -#endif /* CONFIG_HARDCODED_TABLES */ -#endif /* AVCODEC_CBRT_TABLEGEN_H */ +av_cold void ff_cbrt_tableinit_fixed(void) +{ + cbrt_tableinit_internal(); + if (!ff_cbrt_tab_fixed[(1<<13)-1]) + for (int i = 0; i < 1<<13; i++) + ff_cbrt_tab_fixed[i] = lrint(cbrt_tab_dbl[i] * 8192); +} + +av_cold void ff_cbrt_tableinit(void) +{ + cbrt_tableinit_internal(); + if (!ff_cbrt_tab[(1<<13)-1]) + for (int i = 0; i < 1<<13; i++) + ff_cbrt_tab[i] = av_float2int((float)cbrt_tab_dbl[i]); +} diff --git a/libavcodec/cbrt_tablegen.c b/libavcodec/cbrt_tablegen.c index 8c2235e..b04d02f 100644 --- a/libavcodec/cbrt_tablegen.c +++ b/libavcodec/cbrt_tablegen.c @@ -20,5 +20,21 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#define USE_FIXED 0 -#include "cbrt_tablegen_template.c" +#include <stdlib.h> +#define CONFIG_HARDCODED_TABLES 0 +#include "libavutil/tablegen.h" +#include "cbrt_tablegen.h" +#include "tableprint.h" + +int main(void) +{ + ff_cbrt_tableinit(); + ff_cbrt_tableinit_fixed(); + + write_fileheader(); + + WRITE_ARRAY("const", uint32_t, ff_cbrt_tab); + WRITE_ARRAY("const", uint32_t, ff_cbrt_tab_fixed); + + return 0; +} diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h index 21e4b9a..446f9c1 100644 --- a/libavcodec/cbrt_tablegen.h +++ b/libavcodec/cbrt_tablegen.h @@ -29,55 +29,18 @@ #include "libavutil/intfloat.h" #include "libavcodec/aac_defines.h" -#if USE_FIXED -#define CBRT(x) lrint((x) * 8192) -#else -#define CBRT(x) av_float2int((float)(x)) -#endif - #if CONFIG_HARDCODED_TABLES -#if USE_FIXED -#define cbrt_tableinit_fixed() -#include "libavcodec/cbrt_fixed_tables.h" +#define ff_cbrt_tableinit_fixed() +#define ff_cbrt_tableinit() +extern const uint32_t ff_cbrt_tab[1 << 13]; +extern const uint32_t ff_cbrt_tab_fixed[1 << 13]; #else -#define cbrt_tableinit() -#include "libavcodec/cbrt_tables.h" -#endif -#else -static uint32_t cbrt_tab[1 << 13]; - -static av_cold void AAC_RENAME(cbrt_tableinit)(void) -{ - static double cbrt_tab_dbl[1 << 13]; - if (!cbrt_tab[(1<<13) - 1]) { - int i, j, k; - double cbrt_val; - - for (i = 1; i < 1<<13; i++) - cbrt_tab_dbl[i] = 1; - - /* have to take care of non-squarefree numbers */ - for (i = 2; i < 90; i++) { - if (cbrt_tab_dbl[i] == 1) { - cbrt_val = i * cbrt(i); - for (k = i; k < 1<<13; k *= i) - for (j = k; j < 1<<13; j += k) - cbrt_tab_dbl[j] *= cbrt_val; - } - } +extern uint32_t ff_cbrt_tab[1 << 13]; +extern uint32_t ff_cbrt_tab_fixed[1 << 13]; - for (i = 91; i <= 8191; i+= 2) { - if (cbrt_tab_dbl[i] == 1) { - cbrt_val = i * cbrt(i); - for (j = i; j < 1<<13; j += i) - cbrt_tab_dbl[j] *= cbrt_val; - } - } +av_cold void ff_cbrt_tableinit_fixed(void); +av_cold void ff_cbrt_tableinit(void); - for (i = 0; i < 1<<13; i++) - cbrt_tab[i] = CBRT(cbrt_tab_dbl[i]); - } -} #endif /* CONFIG_HARDCODED_TABLES */ #endif /* AVCODEC_CBRT_TABLEGEN_H */ diff --git a/libavcodec/cbrt_tablegen_template.c b/libavcodec/cbrt_tablegen_template.c deleted file mode 100644 index 7dcab91..0000000 --- a/libavcodec/cbrt_tablegen_template.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Generate a header file for hardcoded AAC cube-root table - * - * Copyright (c) 2010 Reimar Döffinger <reimar.doeffin...@gmx.de> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <stdlib.h> -#define CONFIG_HARDCODED_TABLES 0 -#include "libavutil/tablegen.h" -#include "cbrt_tablegen.h" -#include "tableprint.h" - -int main(void) -{ - AAC_RENAME(cbrt_tableinit)(); - - write_fileheader(); - - WRITE_ARRAY("static const", uint32_t, cbrt_tab); - - return 0; -} -- 2.7.2 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel