Re: [FFmpeg-devel] [PATCH] lavu/tx: add support for double precision FFT and MDCT

2019-08-02 Thread Lynne
Aug 1, 2019, 4:54 PM by d...@lynne.ee:

> Jul 27, 2019, 7:29 PM by d...@lynne.ee:
>
>> Simply moves and templates the actual transforms to support an
>> additional data type.
>> Unlike the float version, which is equal or better than libfftw3f,
>> double precision output is bit identical with libfftw3.
>>
>
> Planning to push attached version soon.
> Just some minor changes (moved the radix permute to tx_priv, added if guards).
>

Pushed.
Still working on the SIMD.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH] lavu/tx: add support for double precision FFT and MDCT

2019-08-01 Thread Lynne
Jul 27, 2019, 7:29 PM by d...@lynne.ee:

> Simply moves and templates the actual transforms to support an
> additional data type.
> Unlike the float version, which is equal or better than libfftw3f,
> double precision output is bit identical with libfftw3.
>

Planning to push attached version soon.
Just some minor changes (moved the radix permute to tx_priv, added if guards).

>From 3ea4284c2bc65cb3aee0f00e4c3e5ef5b75c3508 Mon Sep 17 00:00:00 2001
From: Lynne 
Date: Sat, 27 Jul 2019 18:54:20 +0100
Subject: [PATCH v2] lavu/tx: add support for double precision FFT and MDCT

Simply moves and templates the actual transforms to support an
additional data type.
Unlike the float version, which is equal or better than libfftw3f,
double precision output is bit identical with libfftw3.
---
 doc/APIchanges  |   3 +
 libavutil/Makefile  |   2 +
 libavutil/tx.c  | 691 +---
 libavutil/tx.h  |  12 +
 libavutil/tx_double.c   |  21 ++
 libavutil/tx_float.c|  21 ++
 libavutil/tx_priv.h | 105 ++
 libavutil/tx_template.c | 643 +
 libavutil/version.h |   2 +-
 9 files changed, 824 insertions(+), 676 deletions(-)
 create mode 100644 libavutil/tx_double.c
 create mode 100644 libavutil/tx_float.c
 create mode 100644 libavutil/tx_priv.h
 create mode 100644 libavutil/tx_template.c

diff --git a/doc/APIchanges b/doc/APIchanges
index 07331b16e7..6603a8229e 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,9 @@ libavutil: 2017-10-21
 
 API changes, most recent first:
 
+2019-07-27 - xx - lavu 56.33.100 - tx.h
+  Add AV_TX_DOUBLE_FFT and AV_TX_DOUBLE_MDCT
+
  8< - FFmpeg 4.2 was cut here  8< -
 
 2019-06-21 - a30e44098a - lavu 56.30.100 - frame.h
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 8a7a44e4b5..57e6e3d7e8 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -161,6 +161,8 @@ OBJS = adler32.o\
xtea.o   \
tea.o\
tx.o \
+   tx_float.o   \
+   tx_double.o
 
 OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o
 OBJS-$(CONFIG_D3D11VA)  += hwcontext_d3d11va.o
diff --git a/libavutil/tx.c b/libavutil/tx.c
index 93f6e489d3..83e36f88cc 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -1,10 +1,4 @@
 /*
- * Copyright (c) 2019 Lynne 
- * Power of two FFT:
- * Copyright (c) 2008 Loren Merritt
- * Copyright (c) 2002 Fabrice Bellard
- * Partly based on libdjbfft by D. J. Bernstein
- *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -22,576 +16,10 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include 
-#include "tx.h"
-#include "thread.h"
-#include "mem.h"
-#include "avassert.h"
-
-typedef float FFTSample;
-typedef AVComplexFloat FFTComplex;
-
-struct AVTXContext {
-int n;  /* Nptwo part */
-int m;  /* Ptwo part */
-
-FFTComplex *exptab; /* MDCT exptab */
-FFTComplex *tmp;/* Temporary buffer needed for all compound transforms */
-int*pfatab; /* Input/Output mapping for compound transforms */
-int*revtab; /* Input mapping for power of two transforms */
-};
-
-#define FFT_NAME(x) x
-
-#define COSTABLE(size) \
-static DECLARE_ALIGNED(32, FFTSample, FFT_NAME(ff_cos_##size))[size/2]
-
-static FFTSample * const FFT_NAME(ff_cos_tabs)[18];
-
-COSTABLE(16);
-COSTABLE(32);
-COSTABLE(64);
-COSTABLE(128);
-COSTABLE(256);
-COSTABLE(512);
-COSTABLE(1024);
-COSTABLE(2048);
-COSTABLE(4096);
-COSTABLE(8192);
-COSTABLE(16384);
-COSTABLE(32768);
-COSTABLE(65536);
-COSTABLE(131072);
-
-static av_cold void init_ff_cos_tabs(int index)
-{
-int m = 1 << index;
-double freq = 2*M_PI/m;
-FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
-for(int i = 0; i <= m/4; i++)
-tab[i] = cos(i*freq);
-for(int i = 1; i < m/4; i++)
-tab[m/2 - i] = tab[i];
-}
-
-typedef struct CosTabsInitOnce {
-void (*func)(void);
-AVOnce control;
-} CosTabsInitOnce;
-
-#define INIT_FF_COS_TABS_FUNC(index, size) \
-static av_cold void init_ff_cos_tabs_ ## size (void)   \
-{  \
-init_ff_cos_tabs(index);   \
-}
-
-INIT_FF_COS_TABS_FUNC(4, 16)
-INIT_FF_COS_TABS_FUNC(5, 32)
-INIT_FF_COS_TABS_FUNC(6, 64)
-INIT_FF_COS_TABS_FUNC(7, 128)
-INIT_FF_COS_TABS_FUNC(8, 256)
-INIT_FF_COS_TABS_FUNC(9, 512)
-INIT_FF_COS_TABS_FUNC(10, 1024)
-INIT_FF_COS_TABS_FUNC(11, 2048)
-INIT_FF_COS_TABS_FUNC(12, 

[FFmpeg-devel] [PATCH] lavu/tx: add support for double precision FFT and MDCT

2019-07-27 Thread Lynne
Simply moves and templates the actual transforms to support an
additional data type.
Unlike the float version, which is equal or better than libfftw3f,
double precision output is bit identical with libfftw3.

>From 121dc2f176f2d03be10c11db33b956310eb1f5db Mon Sep 17 00:00:00 2001
From: Lynne 
Date: Sat, 27 Jul 2019 18:54:20 +0100
Subject: [PATCH] lavu/tx: add support for double precision FFT and MDCT

Simply moves and templates the actual transforms to support an
additional data type.
Unlike the float version, which is equal or better than libfftw3f,
double precision output is bit identical with libfftw3.
---
 doc/APIchanges  |   3 +
 libavutil/Makefile  |   2 +
 libavutil/tx.c  | 673 +---
 libavutil/tx.h  |  12 +
 libavutil/tx_double.c   |  21 ++
 libavutil/tx_float.c|  21 ++
 libavutil/tx_priv.h |  82 +
 libavutil/tx_template.c | 641 ++
 libavutil/version.h |   2 +-
 9 files changed, 795 insertions(+), 662 deletions(-)
 create mode 100644 libavutil/tx_double.c
 create mode 100644 libavutil/tx_float.c
 create mode 100644 libavutil/tx_priv.h
 create mode 100644 libavutil/tx_template.c

diff --git a/doc/APIchanges b/doc/APIchanges
index 07331b16e7..6603a8229e 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,9 @@ libavutil: 2017-10-21
 
 API changes, most recent first:
 
+2019-07-27 - xx - lavu 56.33.100 - tx.h
+  Add AV_TX_DOUBLE_FFT and AV_TX_DOUBLE_MDCT
+
  8< - FFmpeg 4.2 was cut here  8< -
 
 2019-06-21 - a30e44098a - lavu 56.30.100 - frame.h
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 8a7a44e4b5..57e6e3d7e8 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -161,6 +161,8 @@ OBJS = adler32.o\
xtea.o   \
tea.o\
tx.o \
+   tx_float.o   \
+   tx_double.o
 
 OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o
 OBJS-$(CONFIG_D3D11VA)  += hwcontext_d3d11va.o
diff --git a/libavutil/tx.c b/libavutil/tx.c
index 93f6e489d3..81d09202d9 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -1,10 +1,4 @@
 /*
- * Copyright (c) 2019 Lynne 
- * Power of two FFT:
- * Copyright (c) 2008 Loren Merritt
- * Copyright (c) 2002 Fabrice Bellard
- * Partly based on libdjbfft by D. J. Bernstein
- *
  * This file is part of FFmpeg.
  *
  * FFmpeg is free software; you can redistribute it and/or
@@ -22,576 +16,10 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include 
-#include "tx.h"
-#include "thread.h"
-#include "mem.h"
-#include "avassert.h"
-
-typedef float FFTSample;
-typedef AVComplexFloat FFTComplex;
-
-struct AVTXContext {
-int n;  /* Nptwo part */
-int m;  /* Ptwo part */
-
-FFTComplex *exptab; /* MDCT exptab */
-FFTComplex *tmp;/* Temporary buffer needed for all compound transforms */
-int*pfatab; /* Input/Output mapping for compound transforms */
-int*revtab; /* Input mapping for power of two transforms */
-};
-
-#define FFT_NAME(x) x
-
-#define COSTABLE(size) \
-static DECLARE_ALIGNED(32, FFTSample, FFT_NAME(ff_cos_##size))[size/2]
-
-static FFTSample * const FFT_NAME(ff_cos_tabs)[18];
-
-COSTABLE(16);
-COSTABLE(32);
-COSTABLE(64);
-COSTABLE(128);
-COSTABLE(256);
-COSTABLE(512);
-COSTABLE(1024);
-COSTABLE(2048);
-COSTABLE(4096);
-COSTABLE(8192);
-COSTABLE(16384);
-COSTABLE(32768);
-COSTABLE(65536);
-COSTABLE(131072);
-
-static av_cold void init_ff_cos_tabs(int index)
-{
-int m = 1 << index;
-double freq = 2*M_PI/m;
-FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
-for(int i = 0; i <= m/4; i++)
-tab[i] = cos(i*freq);
-for(int i = 1; i < m/4; i++)
-tab[m/2 - i] = tab[i];
-}
-
-typedef struct CosTabsInitOnce {
-void (*func)(void);
-AVOnce control;
-} CosTabsInitOnce;
-
-#define INIT_FF_COS_TABS_FUNC(index, size) \
-static av_cold void init_ff_cos_tabs_ ## size (void)   \
-{  \
-init_ff_cos_tabs(index);   \
-}
-
-INIT_FF_COS_TABS_FUNC(4, 16)
-INIT_FF_COS_TABS_FUNC(5, 32)
-INIT_FF_COS_TABS_FUNC(6, 64)
-INIT_FF_COS_TABS_FUNC(7, 128)
-INIT_FF_COS_TABS_FUNC(8, 256)
-INIT_FF_COS_TABS_FUNC(9, 512)
-INIT_FF_COS_TABS_FUNC(10, 1024)
-INIT_FF_COS_TABS_FUNC(11, 2048)
-INIT_FF_COS_TABS_FUNC(12, 4096)
-INIT_FF_COS_TABS_FUNC(13, 8192)
-INIT_FF_COS_TABS_FUNC(14, 16384)
-INIT_FF_COS_TABS_FUNC(15, 32768)
-INIT_FF_COS_TABS_FUNC(16, 65536)
-INIT_FF_COS_TABS_FUNC(17,