Re: [FFmpeg-devel] [PATCH v3] avcodec/fft_template: improve performance of the ff_fft_init in fft_template
Michael Niedermayer 于2019年1月4日周五 上午3:01写道: > > On Wed, Dec 26, 2018 at 04:15:27PM +0800, Steven Liu wrote: > > Before patch: > > init nbits = 17, get 1 samples, average cost: 16175 us > > After patch: > > init nbits = 17, get 1 samples, average cost: 14989 us > > > > Signed-off-by: Steven Liu > > --- > > libavcodec/fft_template.c | 46 > > +++--- > > 1 file changed, 35 insertions(+), 11 deletions(-) > > should be ok Pushed Thanks > > thx > > [...] > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > The real ebay dictionary, page 1 > "Used only once"- "Some unspecified defect prevented a second use" > "In good condition" - "Can be repaird by experienced expert" > "As is" - "You wouldnt want it even if you were payed for it, if you knew ..." > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH v3] avcodec/fft_template: improve performance of the ff_fft_init in fft_template
On Wed, Dec 26, 2018 at 04:15:27PM +0800, Steven Liu wrote: > Before patch: > init nbits = 17, get 1 samples, average cost: 16175 us > After patch: > init nbits = 17, get 1 samples, average cost: 14989 us > > Signed-off-by: Steven Liu > --- > libavcodec/fft_template.c | 46 +++--- > 1 file changed, 35 insertions(+), 11 deletions(-) should be ok thx [...] -- Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB The real ebay dictionary, page 1 "Used only once"- "Some unspecified defect prevented a second use" "In good condition" - "Can be repaird by experienced expert" "As is" - "You wouldnt want it even if you were payed for it, if you knew ..." signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH v3] avcodec/fft_template: improve performance of the ff_fft_init in fft_template
Steven Liu 于2018年12月26日周三 下午4:15写道: > > Before patch: > init nbits = 17, get 1 samples, average cost: 16175 us > After patch: > init nbits = 17, get 1 samples, average cost: 14989 us > > Signed-off-by: Steven Liu > --- > libavcodec/fft_template.c | 46 +++--- > 1 file changed, 35 insertions(+), 11 deletions(-) > > diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c > index 762c014bc8..20a62e4290 100644 > --- a/libavcodec/fft_template.c > +++ b/libavcodec/fft_template.c > @@ -261,17 +261,41 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int > inverse) > if (s->fft_permutation == FF_FFT_PERM_AVX) { > fft_perm_avx(s); > } else { > -for(i=0; i -int k; > -j = i; > -if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) > -j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); > -k = -split_radix_permutation(i, n, s->inverse) & (n-1); > -if (s->revtab) > -s->revtab[k] = j; > -if (s->revtab32) > -s->revtab32[k] = j; > -} > +#define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\ > +for(i = 0; i < n; i++) {\ > +int k;\ > +j = i;\ > +j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\ > +k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\ > +s->revtab##num[k] = j;\ > +} \ > +} while(0); > + > +#define PROCESS_FFT_PERM_DEFAULT(num) do {\ > +for(i = 0; i < n; i++) {\ > +int k;\ > +j = i;\ > +k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\ > +s->revtab##num[k] = j;\ > +} \ > +} while(0); > + > +#define SPLIT_RADIX_PERMUTATION(num) do { \ > +if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\ > +PROCESS_FFT_PERM_SWAP_LSBS(num) \ > +} else {\ > +PROCESS_FFT_PERM_DEFAULT(num) \ > +}\ > +} while(0); > + > +if (s->revtab) > +SPLIT_RADIX_PERMUTATION() > +if (s->revtab32) > +SPLIT_RADIX_PERMUTATION(32) > + > +#undef PROCESS_FFT_PERM_DEFAULT > +#undef PROCESS_FFT_PERM_SWAP_LSBS > +#undef SPLIT_RADIX_PERMUTATION > } > > return 0; > -- > 2.15.2 (Apple Git-101.1) > > > > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel ping ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH v3] avcodec/fft_template: improve performance of the ff_fft_init in fft_template
Before patch: init nbits = 17, get 1 samples, average cost: 16175 us After patch: init nbits = 17, get 1 samples, average cost: 14989 us Signed-off-by: Steven Liu --- libavcodec/fft_template.c | 46 +++--- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c index 762c014bc8..20a62e4290 100644 --- a/libavcodec/fft_template.c +++ b/libavcodec/fft_template.c @@ -261,17 +261,41 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) if (s->fft_permutation == FF_FFT_PERM_AVX) { fft_perm_avx(s); } else { -for(i=0; ifft_permutation == FF_FFT_PERM_SWAP_LSBS) -j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); -k = -split_radix_permutation(i, n, s->inverse) & (n-1); -if (s->revtab) -s->revtab[k] = j; -if (s->revtab32) -s->revtab32[k] = j; -} +#define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\ +for(i = 0; i < n; i++) {\ +int k;\ +j = i;\ +j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\ +k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\ +s->revtab##num[k] = j;\ +} \ +} while(0); + +#define PROCESS_FFT_PERM_DEFAULT(num) do {\ +for(i = 0; i < n; i++) {\ +int k;\ +j = i;\ +k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\ +s->revtab##num[k] = j;\ +} \ +} while(0); + +#define SPLIT_RADIX_PERMUTATION(num) do { \ +if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\ +PROCESS_FFT_PERM_SWAP_LSBS(num) \ +} else {\ +PROCESS_FFT_PERM_DEFAULT(num) \ +}\ +} while(0); + +if (s->revtab) +SPLIT_RADIX_PERMUTATION() +if (s->revtab32) +SPLIT_RADIX_PERMUTATION(32) + +#undef PROCESS_FFT_PERM_DEFAULT +#undef PROCESS_FFT_PERM_SWAP_LSBS +#undef SPLIT_RADIX_PERMUTATION } return 0; -- 2.15.2 (Apple Git-101.1) ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel