ffmpeg | branch: master | Lynne <d...@lynne.ee> | Fri Feb  9 15:21:37 2024 
+0100| [9af87828bd787e09724b86d233ead75d6589ae79] | committer: Lynne

x86/tx_init: propely indicate the extended available transform sizes

Forgot to do this with the previous commit.

Actually makes the assembly being used.

Still the fastest FFT in the world, 15% faster than FFTW on the
largest available size.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9af87828bd787e09724b86d233ead75d6589ae79
---

 libavutil/x86/tx_float_init.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/libavutil/x86/tx_float_init.c b/libavutil/x86/tx_float_init.c
index d3c0beb50f..84ec1122f6 100644
--- a/libavutil/x86/tx_float_init.c
+++ b/libavutil/x86/tx_float_init.c
@@ -270,15 +270,15 @@ const FFTXCodelet * const ff_tx_codelet_list_float_x86[] 
= {
            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, 
AV_CPU_FLAG_AVXSLOW),
     TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE 
| FF_TX_PRESHUFFLE,
            AV_CPU_FLAG_AVXSLOW),
-    TX_DEF(fft_sr,    FFT, 64, 131072, 2, 0, 256, b8_i2, avx, AVX,  0, 
AV_CPU_FLAG_AVXSLOW),
-    TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX,
+    TX_DEF(fft_sr,    FFT, 64, 2097152, 2, 0, 256, b8_i2, avx, AVX,  0, 
AV_CPU_FLAG_AVXSLOW),
+    TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx, AVX,
            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, 
AV_CPU_FLAG_AVXSLOW),
-    TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX,  
AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+    TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx, AVX,  
AV_TX_INPLACE | FF_TX_PRESHUFFLE,
            AV_CPU_FLAG_AVXSLOW),
-    TX_DEF(fft_sr,    FFT, 64, 131072, 2, 0, 288, b8_i2, fma3,  FMA3,  0, 
AV_CPU_FLAG_AVXSLOW),
-    TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3,  FMA3,
+    TX_DEF(fft_sr,    FFT, 64, 2097152, 2, 0, 288, b8_i2, fma3,  FMA3,  0, 
AV_CPU_FLAG_AVXSLOW),
+    TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 352, b8_i2, fma3,  FMA3,
            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, 
AV_CPU_FLAG_AVXSLOW),
-    TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3,  FMA3,  
AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+    TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 352, b8_i2, fma3,  FMA3,  
AV_TX_INPLACE | FF_TX_PRESHUFFLE,
            AV_CPU_FLAG_AVXSLOW),
 
 #if HAVE_AVX2_EXTERNAL
@@ -287,11 +287,11 @@ const FFTXCodelet * const ff_tx_codelet_list_float_x86[] 
= {
     TX_DEF(fft15_ns, FFT, 15, 15, 15, 0, 384, factor_init, avx2, AVX2,
            AV_TX_INPLACE | FF_TX_PRESHUFFLE, AV_CPU_FLAG_AVXSLOW),
 
-    TX_DEF(fft_sr,    FFT, 64, 131072, 2, 0, 320, b8_i2, avx2, AVX2, 0,
+    TX_DEF(fft_sr,    FFT, 64, 2097152, 2, 0, 320, b8_i2, avx2, AVX2, 0,
            AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER),
-    TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 384, b8_i2, avx2, AVX2,
+    TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 384, b8_i2, avx2, AVX2,
            AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, 
AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER),
-    TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 384, b8_i2, avx2, AVX2, 
AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+    TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 384, b8_i2, avx2, AVX2, 
AV_TX_INPLACE | FF_TX_PRESHUFFLE,
            AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER),
 
     TX_DEF(fft_pfa_15xM, FFT, 60, TX_LEN_UNLIMITED, 15, 2, 320, fft_pfa_init, 
avx2, AVX2,

_______________________________________________
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".

Reply via email to