Re: [FFmpeg-devel] [PATCH] avcodec/x86/vvc/vvcdsp_init: fix unresolved external symbol on ARCH_X86_32

2024-02-07 Thread Wang Bin
Andreas Rheinhardt  于2024年2月5日周一 20:04写道:

> toq...@outlook.com:
> > From: Wu Jianhua 
> >
> > Signed-off-by: Wu Jianhua 
> > ---
> >  libavcodec/x86/vvc/vvcdsp_init.c | 78 
> >  1 file changed, 40 insertions(+), 38 deletions(-)
> >
> > diff --git a/libavcodec/x86/vvc/vvcdsp_init.c
> b/libavcodec/x86/vvc/vvcdsp_init.c
> > index 909ef9f56b..8ee4074350 100644
> > --- a/libavcodec/x86/vvc/vvcdsp_init.c
> > +++ b/libavcodec/x86/vvc/vvcdsp_init.c
> > @@ -31,6 +31,7 @@
> >  #include "libavcodec/vvc/vvcdsp.h"
> >  #include "libavcodec/x86/h26x/h2656dsp.h"
> >
> > +#if ARCH_X86_64
> >  #define FW_PUT(name, depth, opt) \
> >  static void ff_vvc_put_ ## name ## _ ## depth ## _##opt(int16_t *dst,
> const uint8_t *src, ptrdiff_t srcstride, \
> >   int height, const
> int8_t *hf, const int8_t *vf, int width)\
> > @@ -204,51 +205,52 @@ AVG_FUNCS(16, 12, avx2)
> >  c->inter.avg= bf(avg, bd, opt);
>  \
> >  c->inter.w_avg  = bf(w_avg, bd, opt);
>  \
> >  } while (0)
> > +#endif
> >
> >  void ff_vvc_dsp_init_x86(VVCDSPContext *const c, const int bd)
> >  {
> > +#if ARCH_X86_64
> >  const int cpu_flags = av_get_cpu_flags();
> >
> > -if (ARCH_X86_64) {
> > -if (bd == 8) {
> > -if (EXTERNAL_SSE4(cpu_flags)) {
> > -MC_LINK_SSE4(8);
> > -}
> > -if (EXTERNAL_AVX2_FAST(cpu_flags)) {
> > -MC_LINKS_AVX2(8);
> > -}
> > -} else if (bd == 10) {
> > -if (EXTERNAL_SSE4(cpu_flags)) {
> > -MC_LINK_SSE4(10);
> > -}
> > -if (EXTERNAL_AVX2_FAST(cpu_flags)) {
> > -MC_LINKS_AVX2(10);
> > -MC_LINKS_16BPC_AVX2(10);
> > -}
> > -} else if (bd == 12) {
> > -if (EXTERNAL_SSE4(cpu_flags)) {
> > -MC_LINK_SSE4(12);
> > -}
> > -if (EXTERNAL_AVX2_FAST(cpu_flags)) {
> > -MC_LINKS_AVX2(12);
> > -MC_LINKS_16BPC_AVX2(12);
> > -}
> > +if (bd == 8) {
> > +if (EXTERNAL_SSE4(cpu_flags)) {
> > +MC_LINK_SSE4(8);
> >  }
> > +if (EXTERNAL_AVX2_FAST(cpu_flags)) {
> > +MC_LINKS_AVX2(8);
> > +}
> > +} else if (bd == 10) {
> > +if (EXTERNAL_SSE4(cpu_flags)) {
> > +MC_LINK_SSE4(10);
> > +}
> > +if (EXTERNAL_AVX2_FAST(cpu_flags)) {
> > +MC_LINKS_AVX2(10);
> > +MC_LINKS_16BPC_AVX2(10);
> > +}
> > +} else if (bd == 12) {
> > +if (EXTERNAL_SSE4(cpu_flags)) {
> > +MC_LINK_SSE4(12);
> > +}
> > +if (EXTERNAL_AVX2_FAST(cpu_flags)) {
> > +MC_LINKS_AVX2(12);
> > +MC_LINKS_16BPC_AVX2(12);
> > +}
> > +}
> >
> > -if (EXTERNAL_AVX2(cpu_flags)) {
> > -switch (bd) {
> > -case 8:
> > -AVG_INIT(8, avx2);
> > -break;
> > -case 10:
> > -AVG_INIT(10, avx2);
> > -break;
> > -case 12:
> > -AVG_INIT(12, avx2);
> > -break;
> > -default:
> > -break;
> > -}
> > +if (EXTERNAL_AVX2(cpu_flags)) {
> > +switch (bd) {
> > +case 8:
> > +AVG_INIT(8, avx2);
> > +break;
> > +case 10:
> > +AVG_INIT(10, avx2);
> > +break;
> > +case 12:
> > +AVG_INIT(12, avx2);
> > +break;
> > +default:
> > +break;
> >  }
> >  }
> > +#endif
> >  }
>
> Are really all of these functions unavailable for 32bit?
>
> - Andreas
>
>
http://fate.ffmpeg.org/log.cgi?time=20240207055809=x86_32-msvc14-dll-md-windows-native=compile
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v24 6/9] avformat/mov_muxer: Extended MOV muxer to handle EVC video content

2023-06-27 Thread Wang Bin
James Almer  于2023年6月19日周一 10:43写道:

> On 6/18/2023 11:27 PM, Wang Bin wrote:
> > - Changes in mov_write_video_tag function to handle EVC elementary stream
> >> - Provided structure EVCDecoderConfigurationRecord that specifies the
> >> decoder configuration information for ISO/IEC 23094-1 video content
> >>
> >> Signed-off-by: Dawid Kozinski 
> >> ---
> >>   libavformat/Makefile|   2 +-
> >>   libavformat/evc.c   | 422 
> >>   libavformat/evc.h   |  44 +
> >>   libavformat/isom_tags.c |   2 +
> >>   libavformat/movenc.c|  33 
> >>   5 files changed, 502 insertions(+), 1 deletion(-)
> >>   create mode 100644 libavformat/evc.c
> >>   create mode 100644 libavformat/evc.h
> >>
> >> diff --git a/libavformat/Makefile b/libavformat/Makefile
> >> index 6e4231fda2..d3503196e3 100644
> >> --- a/libavformat/Makefile
> >> +++ b/libavformat/Makefile
> >> @@ -364,7 +364,7 @@ OBJS-$(CONFIG_MOV_DEMUXER)   += mov.o
> >> mov_chan.o mov_esds.o \
> >>   OBJS-$(CONFIG_MOV_MUXER) += movenc.o av1.o avc.o
> hevc.o
> >> vpcc.o \
> >>   movenchint.o mov_chan.o
> rtp.o
> >> \
> >>   movenccenc.o movenc_ttml.o
> >> rawutils.o \
> >> -dovi_isom.o
> >> +dovi_isom.o evc.o
> >>   OBJS-$(CONFIG_MP2_MUXER) += rawenc.o
> >>   OBJS-$(CONFIG_MP3_DEMUXER)   += mp3dec.o replaygain.o
> >>   OBJS-$(CONFIG_MP3_MUXER) += mp3enc.o rawenc.o
> id3v2enc.o
> >>
> >
> > This breaks msvc build. golomb_tab.o is required in
> > OBJS-$(CONFIG_MOV_MUXER). otherwise i get this error
> >
> > 2023-06-18T12:48:08.5213414Z LD   libavformat/avformat-60.dll
> > 2023-06-18T12:48:08.6503045Z LINK : warning LNK4044: unrecognized
> > option '/-icf=safe'; ignored
> > 2023-06-18T12:48:08.8081443ZCreating library
> > libavformat/avformat.lib and object libavformat/avformat.exp
> > 2023-06-18T12:48:08.8219530Z evc.o : error LNK2001: unresolved
> > external symbol ff_golomb_vlc_len
> > 2023-06-18T12:48:08.8267361Z evc.o : error LNK2001: unresolved
> > external symbol ff_ue_golomb_vlc_code
> > 2023-06-18T12:48:09.0122434Z libavformat\avformat-60.dll : fatal error
> > LNK1120: 2 unresolved externals
> > 2023-06-18T12:48:09.0517997Z make: ***
> > [/d/a/avbuild/avbuild/ffmpeg-***/ffbuild/library.mak:119:
> > libavformat/avformat-60.dll] Error 96
> >
> >
> > full build log:
> >
> https://github.com/wang-bin/avbuild/actions/runs/5303646918/jobs/9599433665
>
> Should be fixed.
>

broken again, in commit d0fc1b3. why not adding  golomb_tab.o in makefile?

2023-06-27T02:21:02.4793190Z Undefined symbols for architecture x86_64:
2023-06-27T02:21:02.4793570Z   "_ff_golomb_vlc_len", referenced from:
2023-06-27T02:21:02.4793850Z   _get_ue_golomb_31 in evc.o
2023-06-27T02:21:02.4822020Z   "_ff_ue_golomb_vlc_code", referenced from:
2023-06-27T02:21:02.4822390Z   _get_ue_golomb_31 in evc.o
2023-06-27T02:21:02.4849070Z ld: symbol(s) not found for architecture x86_64


Regards
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v24 6/9] avformat/mov_muxer: Extended MOV muxer to handle EVC video content

2023-06-18 Thread Wang Bin
- Changes in mov_write_video_tag function to handle EVC elementary stream
> - Provided structure EVCDecoderConfigurationRecord that specifies the
> decoder configuration information for ISO/IEC 23094-1 video content
>
> Signed-off-by: Dawid Kozinski 
> ---
>  libavformat/Makefile|   2 +-
>  libavformat/evc.c   | 422 
>  libavformat/evc.h   |  44 +
>  libavformat/isom_tags.c |   2 +
>  libavformat/movenc.c|  33 
>  5 files changed, 502 insertions(+), 1 deletion(-)
>  create mode 100644 libavformat/evc.c
>  create mode 100644 libavformat/evc.h
>
> diff --git a/libavformat/Makefile b/libavformat/Makefile
> index 6e4231fda2..d3503196e3 100644
> --- a/libavformat/Makefile
> +++ b/libavformat/Makefile
> @@ -364,7 +364,7 @@ OBJS-$(CONFIG_MOV_DEMUXER)   += mov.o
> mov_chan.o mov_esds.o \
>  OBJS-$(CONFIG_MOV_MUXER) += movenc.o av1.o avc.o hevc.o
> vpcc.o \
>  movenchint.o mov_chan.o rtp.o
> \
>  movenccenc.o movenc_ttml.o
> rawutils.o \
> -dovi_isom.o
> +dovi_isom.o evc.o
>  OBJS-$(CONFIG_MP2_MUXER) += rawenc.o
>  OBJS-$(CONFIG_MP3_DEMUXER)   += mp3dec.o replaygain.o
>  OBJS-$(CONFIG_MP3_MUXER) += mp3enc.o rawenc.o id3v2enc.o
>

This breaks msvc build. golomb_tab.o is required in
OBJS-$(CONFIG_MOV_MUXER). otherwise i get this error

2023-06-18T12:48:08.5213414Z LD libavformat/avformat-60.dll
2023-06-18T12:48:08.6503045Z LINK : warning LNK4044: unrecognized
option '/-icf=safe'; ignored
2023-06-18T12:48:08.8081443ZCreating library
libavformat/avformat.lib and object libavformat/avformat.exp
2023-06-18T12:48:08.8219530Z evc.o : error LNK2001: unresolved
external symbol ff_golomb_vlc_len
2023-06-18T12:48:08.8267361Z evc.o : error LNK2001: unresolved
external symbol ff_ue_golomb_vlc_code
2023-06-18T12:48:09.0122434Z libavformat\avformat-60.dll : fatal error
LNK1120: 2 unresolved externals
2023-06-18T12:48:09.0517997Z make: ***
[/d/a/avbuild/avbuild/ffmpeg-***/ffbuild/library.mak:119:
libavformat/avformat-60.dll] Error 96


full build log:
https://github.com/wang-bin/avbuild/actions/runs/5303646918/jobs/9599433665
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] vulkan build fix

2023-06-08 Thread Wang Bin
2023-06-08T02:04:18.3926210Z CC libavcodec/vulkan_decode.o
2023-06-08T02:04:18.5384903Z src/libavcodec/vulkan_decode.c:148:26:
error: incompatible pointer to integer conversion assigning to
'VkImageView' (aka 'unsigned long long') from 'void *'
[-Wint-conversion]
2023-06-08T02:04:18.5388615Z vkpic->img_view_ref  = NULL;
2023-06-08T02:04:18.5391395Z  ^ 


0001-vulkan-fix-msvc-arm32-build.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] Bump major version of swresample

2023-02-27 Thread Wang Bin
James Almer  于2023年2月27日周一 20:00写道:

> On 2/27/2023 7:03 AM, Michael Niedermayer wrote:
> > essOn Sat, Feb 25, 2023 at 12:03:02AM +0800, Wang Bin wrote:
> >>
> >
> >>   version_major.h |2 +-
> >>   1 file changed, 1 insertion(+), 1 deletion(-)
> >> a87056c2fe65d68b2cf5d1de06be28ea40c69b73
> 0001-Bump-major-version-of-swresample.patch
> >>  From e3e6a3833f2fba743ee9c05962e804e9e570dd75 Mon Sep 17 00:00:00 2001
> >> From: wang-bin 
> >> Date: Fri, 24 Feb 2023 23:54:51 +0800
> >> Subject: [PATCH] Bump major version of swresample
> >>
> >> ---
> >>   libswresample/version_major.h | 2 +-
> >>   1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/libswresample/version_major.h
> b/libswresample/version_major.h
> >> index 7f265c2073..dd13f2bbe3 100644
> >> --- a/libswresample/version_major.h
> >> +++ b/libswresample/version_major.h
> >> @@ -26,6 +26,6 @@
> >>* Libswresample version macros
> >>*/
> >>
> >> -#define LIBSWRESAMPLE_VERSION_MAJOR   4
> >> +#define LIBSWRESAMPLE_VERSION_MAJOR   5
> >
> > No oppinion if this should be changed now before 6.0 or not
> > but if its done it should be done on master and release/6.0 at the same
> time
> > and LIBSWRESAMPLE_VERSION_MINOR needs to be reset too while
> > LIBSWRESAMPLE_VERSION_MINOR needs to be +1 on master compared to
> release/6.0
> >
> > oppinon from others is welcome here. Iam not a user of the releases so
> its
> > hard for me to really guess which way is better. Its a little messy to
> > change now
> >
> > thx
>
> I don't think it's a good idea to do it now. No API was removed from it
> so leaving the major as is should be fine.
>

Currently no api change and even no abi change. But AVFrame is used in the
public api swr_convert_frame, AVFrame abi changes may break swresample
binary compatibility without swresample code change. All other modules
except postproc and avutil public apis also depend on structs from another
module. So it's better to bump major version of all modules.

Regards
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] Bump major version of swresample

2023-02-27 Thread Wang Bin
>
>
> There is no major changes since last bump. Is it an option to keep current
> major version?
>
>
libpostproc changes less
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] FFmpeg 6.0

2023-02-26 Thread Wang Bin
On Fri, Feb 10, 2023 at 06:47:03PM +0100, Michael Niedermayer wrote:
> > Hi all
> >
> > i plan to branch off release/6.0 from master in the next days
> > If theres something blocking and i should wait, please reply here
> >
> > 6.0 release will be maybe 1 week after the branch point
> > once it has branched all important fixes should be backported of course
>
> I intend to make the 6.0 release soon from release/6.0
> if theres anything you want in, backport soon!
>
> thx
>

We forgot to bump swresample major version.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] Bump major version of swresample

2023-02-24 Thread Wang Bin



0001-Bump-major-version-of-swresample.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] lavc/videotoolboxenc: add hevc main42210 and p210

2023-02-11 Thread Wang Bin



0001-lavc-videotoolboxenc-add-hevc-main42210-and-p210.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] patch for ccaption decoder

2023-02-11 Thread Wang Bin
wrong decode() return value


0001-avcodec-ccaption_dec-return-the-number-of-bytes-deco.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v7] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-11-14 Thread Wang, Bin
> >> On 11/14/2022 10:30 AM, Wang, Bin wrote:
> >>>> By using xmm# you're not taking into account any x86inc SWAPing, so
> >>>> this is using xmm0 and xmm1 where the single scalar float input
> >>>> arguments reside (at least on unix64), instead of xm0 and xm1
> >>>> (xmm16 and xmm17) where the broadcasted scalars were stored.
> >>>> This, again, only worked by chance on unix64 because you're using
> >>>> scalar fmadd, and shouldn't work at all on win64.
> >>>>
> >>>> Also, all these as is are being encoded as VEX, not EVEX, but it
> >>>> should be fine leaving them untouched instead of using xm#, since
> >>>> they will be shorter (five bytes instead of six for some) by using
> >>>> the lower,
> >> non callee-saved regs.
> >>>
> >>> Thanks for the help. I'm not familiar with WIN64 asm. So what I need
> >>> to do is
> >> change the WIN64 swap from:
> >>> SWAP xmm0, xmm2
> >>> SWAP xmm1, xmm3
> >>> To:
> >>> VBROADCASTSS m0, xmm2
> >>> VBROADCASTSS m1, xmm3
> >>>
> >>> Is that correct?
> >>
> >> Yes, that will ultimately broadcast the two scalars in xmm2 and xmm3
> >> to
> >> zmm16 and zmm17.
> >> After that what you need to do is either change the fmaddss
> >> instruction to use
> >> xm0 and xm1 macros instead of xmm0 and xmm1 (so xmm16 and xmm17
> with
> >> EVEX encoding is used), or much like the broadcast above use xmm2 and
> >> xmm3 explicitly on win64, so it remains VEX encoded.
> >
> > So, to fix the issue, does this 2 changes looks good for you?
> > First change the WIN64 swap from:
> > SWAP xmm0, xmm2
> > SWAP xmm1, xmm3
> > To:
> > VBROADCASTSS m0, xmm2
> > VBROADCASTSS m1, xmm3
> >
> > Second change the fmaddss from:
> > fmaddss   xmm4, xmm4, xmm0, xmm1
> > To:
> > fmaddss   xmm4, xmm4, xm0, xm1
> 
> Yes.

Appreciate for your help, I commit new patch here:
https://patchwork.ffmpeg.org/project/ffmpeg/patch/20221114143551.9740-1-bin.w...@intel.com/

> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org
> with subject "unsubscribe".
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v7] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-11-14 Thread Wang, Bin



> -Original Message-
> From: ffmpeg-devel  On Behalf Of James
> Almer
> Sent: Monday, November 14, 2022 9:36 PM
> To: ffmpeg-devel@ffmpeg.org
> Subject: Re: [FFmpeg-devel] [PATCH v7] libavfilter/x86/vf_convolution: add
> sobel filter optimization and unit test with intel AVX512 VNNI
> 
> On 11/14/2022 10:30 AM, Wang, Bin wrote:
> >> By using xmm# you're not taking into account any x86inc SWAPing, so
> >> this is using xmm0 and xmm1 where the single scalar float input
> >> arguments reside (at least on unix64), instead of xm0 and xm1 (xmm16
> >> and xmm17) where the broadcasted scalars were stored.
> >> This, again, only worked by chance on unix64 because you're using
> >> scalar fmadd, and shouldn't work at all on win64.
> >>
> >> Also, all these as is are being encoded as VEX, not EVEX, but it
> >> should be fine leaving them untouched instead of using xm#, since
> >> they will be shorter (five bytes instead of six for some) by using the 
> >> lower,
> non callee-saved regs.
> >
> > Thanks for the help. I'm not familiar with WIN64 asm. So what I need to do 
> > is
> change the WIN64 swap from:
> > SWAP xmm0, xmm2
> > SWAP xmm1, xmm3
> > To:
> > VBROADCASTSS m0, xmm2
> > VBROADCASTSS m1, xmm3
> >
> > Is that correct?
> 
> Yes, that will ultimately broadcast the two scalars in xmm2 and xmm3 to
> zmm16 and zmm17.
> After that what you need to do is either change the fmaddss instruction to use
> xm0 and xm1 macros instead of xmm0 and xmm1 (so xmm16 and xmm17 with
> EVEX encoding is used), or much like the broadcast above use xmm2 and xmm3
> explicitly on win64, so it remains VEX encoded.

So, to fix the issue, does this 2 changes looks good for you?
First change the WIN64 swap from:
SWAP xmm0, xmm2
SWAP xmm1, xmm3
To:
VBROADCASTSS m0, xmm2
VBROADCASTSS m1, xmm3

Second change the fmaddss from:
fmaddss   xmm4, xmm4, xmm0, xmm1
To:
fmaddss   xmm4, xmm4, xm0, xm1


> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org
> with subject "unsubscribe".
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v7] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-11-14 Thread Wang, Bin
> By using xmm# you're not taking into account any x86inc SWAPing, so this is
> using xmm0 and xmm1 where the single scalar float input arguments reside (at
> least on unix64), instead of xm0 and xm1 (xmm16 and xmm17) where the
> broadcasted scalars were stored.
> This, again, only worked by chance on unix64 because you're using scalar 
> fmadd,
> and shouldn't work at all on win64.
> 
> Also, all these as is are being encoded as VEX, not EVEX, but it should be 
> fine
> leaving them untouched instead of using xm#, since they will be shorter (five
> bytes instead of six for some) by using the lower, non callee-saved regs.

Thanks for the help. I'm not familiar with WIN64 asm. So what I need to do is 
change the WIN64 swap from:
SWAP xmm0, xmm2
SWAP xmm1, xmm3
To:
VBROADCASTSS m0, xmm2
VBROADCASTSS m1, xmm3

Is that correct?

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with 
subject "unsubscribe".
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v7] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-11-13 Thread Wang, Bin
-Original Message-
From: ffmpeg-devel  On Behalf Of James Almer
Sent: Monday, November 14, 2022 10:43 AM
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH v7] libavfilter/x86/vf_convolution: add 
sobel filter optimization and unit test with intel AVX512 VNNI

On 11/4/2022 5:29 AM, bin.wang-at-intel@ffmpeg.org wrote:
> +%macro FILTER_SOBEL 0
> +%if UNIX64
> +cglobal filter_sobel, 4, 15, 7, dst, width, matrix, ptr, c0, c1, c2, 
> +c3, c4, c5, c6, c7, c8, r, x %else cglobal filter_sobel, 4, 15, 7, 
> +dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, 
> +c8, r, x %endif %if WIN64
> +SWAP xmm0, xmm2
> +SWAP xmm1, xmm3
> +mov  r2q, matrixmp
> +mov  r3q, ptrmp
> +DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, 
> +c7, c8, r, x %endif
> +movsxdifnidn widthq, widthd
> +VBROADCASTSS m0, xmm0
> +VBROADCASTSS m1, xmm1

> + This and every other xmm# case should instead be xm#, to ensure the 
> swapping is taken into account.

Sorry, I can't get your point, could you please help to explain why I have to 
use xm# to ensure the swapping operation(swap xmm# can't work in WIN64 asm)? 
And How to do it ?

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v5] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-10-25 Thread Wang, Bin
-Original Message-
From: Wang, Bin 
Sent: Tuesday, September 20, 2022 6:33 PM
To: ffmpeg-devel@ffmpeg.org
Cc: Wang, Bin 
Subject: [FFmpeg-devel] [PATCH v5] libavfilter/x86/vf_convolution: add sobel 
filter optimization and unit test with intel AVX512 VNNI

From: bwang30 

This commit enabled assembly code with intel AVX512 VNNI and added unit test 
for sobel filter

sobel_c: 4537
sobel_avx512icl 2136

Signed-off-by: bwang30 
---
 libavfilter/convolution.h |  74 +
 libavfilter/vf_convolution.c  |  91 +++-
 libavfilter/x86/vf_convolution.asm| 147 ++
 libavfilter/x86/vf_convolution_init.c |  18 
 tests/checkasm/Makefile   |   1 +
 tests/checkasm/checkasm.c |   3 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/vf_convolution.c   | 103 ++
 8 files changed, 360 insertions(+), 78 deletions(-)  create mode 100644 
tests/checkasm/vf_convolution.c

diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h index 
88aabe9a20..e44bfb5da8 100644
--- a/libavfilter/convolution.h
+++ b/libavfilter/convolution.h
@@ -21,6 +21,7 @@
 #ifndef AVFILTER_CONVOLUTION_H
 #define AVFILTER_CONVOLUTION_H
 #include "avfilter.h"
+#include "libavutil/intreadwrite.h"
 
 enum MatrixMode {
 MATRIX_SQUARE,
@@ -61,4 +62,77 @@ typedef struct ConvolutionContext {  } ConvolutionContext;
 
 void ff_convolution_init_x86(ConvolutionContext *s);
+void ff_sobel_init_x86(ConvolutionContext *s, int depth, int 
+nb_planes);
+
+static void setup_3x3(int radius, const uint8_t *c[], const uint8_t *src, int 
stride,
+  int x, int w, int y, int h, int bpc) {
+int i;
+
+for (i = 0; i < 9; i++) {
+int xoff = FFABS(x + ((i % 3) - 1));
+int yoff = FFABS(y + (i / 3) - 1);
+
+xoff = xoff >= w ? 2 * w - 1 - xoff : xoff;
+yoff = yoff >= h ? 2 * h - 1 - yoff : yoff;
+
+c[i] = src + xoff * bpc + yoff * stride;
+}
+}
+
+static void filter_sobel(uint8_t *dst, int width,
+ float scale, float delta, const int *const matrix,
+ const uint8_t *c[], int peak, int radius,
+ int dstride, int stride, int size) {
+const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2];
+const uint8_t *c3 = c[3], *c5 = c[5];
+const uint8_t *c6 = c[6], *c7 = c[7], *c8 = c[8];
+int x;
+
+for (x = 0; x < width; x++) {
+float suma = c0[x] * -1 + c1[x] * -2 + c2[x] * -1 +
+ c6[x] *  1 + c7[x] *  2 + c8[x] *  1;
+float sumb = c0[x] * -1 + c2[x] *  1 + c3[x] * -2 +
+ c5[x] *  2 + c6[x] * -1 + c8[x] *  1;
+
+dst[x] = av_clip_uint8(sqrtf(suma*suma + sumb*sumb) * scale + delta);
+}
+}
+
+static void filter16_sobel(uint8_t *dstp, int width,
+   float scale, float delta, const int *const matrix,
+   const uint8_t *c[], int peak, int radius,
+   int dstride, int stride, int size) {
+uint16_t *dst = (uint16_t *)dstp;
+int x;
+
+for (x = 0; x < width; x++) {
+float suma = AV_RN16A([0][2 * x]) * -1 + AV_RN16A([1][2 * x]) * -2 
+ AV_RN16A([2][2 * x]) * -1 +
+ AV_RN16A([6][2 * x]) *  1 + AV_RN16A([7][2 * x]) *  2 
+ AV_RN16A([8][2 * x]) *  1;
+float sumb = AV_RN16A([0][2 * x]) * -1 + AV_RN16A([2][2 * x]) *  1 
+ AV_RN16A([3][2 * x]) * -2 +
+ AV_RN16A([5][2 * x]) *  2 + AV_RN16A([6][2 *
+ x]) * -1 + AV_RN16A([8][2 * x]) *  1;
+
+dst[x] = av_clip(sqrtf(suma*suma + sumb*sumb) * scale + delta, 0, 
peak);
+}
+}
+
+static av_unused void ff_sobel_init(ConvolutionContext *s, int depth, 
+int nb_planes) {
+for (int i = 0; i < 4; i++) {
+s->filter[i] = filter_sobel;
+s->copy[i] = !((1 << i) & s->planes);
+s->size[i] = 3;
+s->setup[i] = setup_3x3;
+s->rdiv[i] = s->scale;
+s->bias[i] = s->delta;
+}
+if (s->depth > 8)
+for (int i = 0; i < 4; i++)
+s->filter[i] = filter16_sobel; #if ARCH_X86_64
+ff_sobel_init_x86(s, depth, nb_planes); #endif }
 #endif
diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c index 
9a9c099e6d..7762fa2a05 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c
@@ -139,24 +139,6 @@ static void filter16_roberts(uint8_t *dstp, int width,
 }
 }
 
-static void filter16_sobel(uint8_t *dstp, int width,
-   float scale, float delta, const int *const matrix,
-   const uint8_t *c[], int peak, int radius,
-   int dstride, int stride, int size)
-{
-uint16_t *dst = (uint16_t *)dstp;
-int x;
-
-for (x = 0; x < width; x++) {
-float suma = AV_RN16A([0][2 * x]) * -1 + AV_RN16A([1][2

Re: [FFmpeg-devel] [PATCH v5] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-10-17 Thread Wang, Bin
-Original Message-
From: ffmpeg-devel  On Behalf Of Wang, Bin
Sent: Monday, September 26, 2022 4:56 PM
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH v5] libavfilter/x86/vf_convolution: add 
sobel filter optimization and unit test with intel AVX512 VNNI

-Original Message-
From: Wang, Bin 
Sent: Tuesday, September 20, 2022 6:33 PM
To: ffmpeg-devel@ffmpeg.org
Cc: Wang, Bin 
Subject: [FFmpeg-devel] [PATCH v5] libavfilter/x86/vf_convolution: add sobel 
filter optimization and unit test with intel AVX512 VNNI

From: bwang30 

This commit enabled assembly code with intel AVX512 VNNI and added unit test 
for sobel filter

sobel_c: 4537
sobel_avx512icl 2136

Signed-off-by: bwang30 
---
 libavfilter/convolution.h |  74 +
 libavfilter/vf_convolution.c  |  91 +++-
 libavfilter/x86/vf_convolution.asm| 147 ++
 libavfilter/x86/vf_convolution_init.c |  18 
 tests/checkasm/Makefile   |   1 +
 tests/checkasm/checkasm.c |   3 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/vf_convolution.c   | 103 ++
 8 files changed, 360 insertions(+), 78 deletions(-)  create mode 100644 
tests/checkasm/vf_convolution.c

diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h index 
88aabe9a20..e44bfb5da8 100644
--- a/libavfilter/convolution.h
+++ b/libavfilter/convolution.h
@@ -21,6 +21,7 @@
 #ifndef AVFILTER_CONVOLUTION_H
 #define AVFILTER_CONVOLUTION_H
 #include "avfilter.h"
+#include "libavutil/intreadwrite.h"
 
 enum MatrixMode {
 MATRIX_SQUARE,
@@ -61,4 +62,77 @@ typedef struct ConvolutionContext {  } ConvolutionContext;
 
 void ff_convolution_init_x86(ConvolutionContext *s);
+void ff_sobel_init_x86(ConvolutionContext *s, int depth, int 
+nb_planes);
+
+static void setup_3x3(int radius, const uint8_t *c[], const uint8_t *src, int 
stride,
+  int x, int w, int y, int h, int bpc) {
+int i;
+
+for (i = 0; i < 9; i++) {
+int xoff = FFABS(x + ((i % 3) - 1));
+int yoff = FFABS(y + (i / 3) - 1);
+
+xoff = xoff >= w ? 2 * w - 1 - xoff : xoff;
+yoff = yoff >= h ? 2 * h - 1 - yoff : yoff;
+
+c[i] = src + xoff * bpc + yoff * stride;
+}
+}
+
+static void filter_sobel(uint8_t *dst, int width,
+ float scale, float delta, const int *const matrix,
+ const uint8_t *c[], int peak, int radius,
+ int dstride, int stride, int size) {
+const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2];
+const uint8_t *c3 = c[3], *c5 = c[5];
+const uint8_t *c6 = c[6], *c7 = c[7], *c8 = c[8];
+int x;
+
+for (x = 0; x < width; x++) {
+float suma = c0[x] * -1 + c1[x] * -2 + c2[x] * -1 +
+ c6[x] *  1 + c7[x] *  2 + c8[x] *  1;
+float sumb = c0[x] * -1 + c2[x] *  1 + c3[x] * -2 +
+ c5[x] *  2 + c6[x] * -1 + c8[x] *  1;
+
+dst[x] = av_clip_uint8(sqrtf(suma*suma + sumb*sumb) * scale + delta);
+}
+}
+
+static void filter16_sobel(uint8_t *dstp, int width,
+   float scale, float delta, const int *const matrix,
+   const uint8_t *c[], int peak, int radius,
+   int dstride, int stride, int size) {
+uint16_t *dst = (uint16_t *)dstp;
+int x;
+
+for (x = 0; x < width; x++) {
+float suma = AV_RN16A([0][2 * x]) * -1 + AV_RN16A([1][2 * x]) * -2 
+ AV_RN16A([2][2 * x]) * -1 +
+ AV_RN16A([6][2 * x]) *  1 + AV_RN16A([7][2 * x]) *  2 
+ AV_RN16A([8][2 * x]) *  1;
+float sumb = AV_RN16A([0][2 * x]) * -1 + AV_RN16A([2][2 * x]) *  1 
+ AV_RN16A([3][2 * x]) * -2 +
+ AV_RN16A([5][2 * x]) *  2 + AV_RN16A([6][2 *
+ x]) * -1 + AV_RN16A([8][2 * x]) *  1;
+
+dst[x] = av_clip(sqrtf(suma*suma + sumb*sumb) * scale + delta, 0, 
peak);
+}
+}
+
+static av_unused void ff_sobel_init(ConvolutionContext *s, int depth, 
+int nb_planes) {
+for (int i = 0; i < 4; i++) {
+s->filter[i] = filter_sobel;
+s->copy[i] = !((1 << i) & s->planes);
+s->size[i] = 3;
+s->setup[i] = setup_3x3;
+s->rdiv[i] = s->scale;
+s->bias[i] = s->delta;
+}
+if (s->depth > 8)
+for (int i = 0; i < 4; i++)
+s->filter[i] = filter16_sobel; #if ARCH_X86_64
+ff_sobel_init_x86(s, depth, nb_planes); #endif }
 #endif
diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c index 
9a9c099e6d..7762fa2a05 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c
@@ -139,24 +139,6 @@ static void filter16_roberts(uint8_t *dstp, int width,
 }
 }
 
-static void filter16_sobel(uint8_t *dstp, int width,
-   float scale, float delta, const int *const matrix,
- 

Re: [FFmpeg-devel] [PATCH v5] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-10-07 Thread Wang, Bin
-Original Message-
From: ffmpeg-devel  On Behalf Of Wang, Bin
Sent: Monday, September 26, 2022 4:56 PM
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH v5] libavfilter/x86/vf_convolution: add 
sobel filter optimization and unit test with intel AVX512 VNNI

-Original Message-
From: Wang, Bin 
Sent: Tuesday, September 20, 2022 6:33 PM
To: ffmpeg-devel@ffmpeg.org
Cc: Wang, Bin 
Subject: [FFmpeg-devel] [PATCH v5] libavfilter/x86/vf_convolution: add sobel 
filter optimization and unit test with intel AVX512 VNNI

From: bwang30 

This commit enabled assembly code with intel AVX512 VNNI and added unit test 
for sobel filter

sobel_c: 4537
sobel_avx512icl 2136

Signed-off-by: bwang30 
---
 libavfilter/convolution.h |  74 +
 libavfilter/vf_convolution.c  |  91 +++-
 libavfilter/x86/vf_convolution.asm| 147 ++
 libavfilter/x86/vf_convolution_init.c |  18 
 tests/checkasm/Makefile   |   1 +
 tests/checkasm/checkasm.c |   3 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/vf_convolution.c   | 103 ++
 8 files changed, 360 insertions(+), 78 deletions(-)  create mode 100644 
tests/checkasm/vf_convolution.c

diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h index 
88aabe9a20..e44bfb5da8 100644
--- a/libavfilter/convolution.h
+++ b/libavfilter/convolution.h
@@ -21,6 +21,7 @@
 #ifndef AVFILTER_CONVOLUTION_H
 #define AVFILTER_CONVOLUTION_H
 #include "avfilter.h"
+#include "libavutil/intreadwrite.h"
 
 enum MatrixMode {
 MATRIX_SQUARE,
@@ -61,4 +62,77 @@ typedef struct ConvolutionContext {  } ConvolutionContext;
 
 void ff_convolution_init_x86(ConvolutionContext *s);
+void ff_sobel_init_x86(ConvolutionContext *s, int depth, int 
+nb_planes);
+
+static void setup_3x3(int radius, const uint8_t *c[], const uint8_t *src, int 
stride,
+  int x, int w, int y, int h, int bpc) {
+int i;
+
+for (i = 0; i < 9; i++) {
+int xoff = FFABS(x + ((i % 3) - 1));
+int yoff = FFABS(y + (i / 3) - 1);
+
+xoff = xoff >= w ? 2 * w - 1 - xoff : xoff;
+yoff = yoff >= h ? 2 * h - 1 - yoff : yoff;
+
+c[i] = src + xoff * bpc + yoff * stride;
+}
+}
+
+static void filter_sobel(uint8_t *dst, int width,
+ float scale, float delta, const int *const matrix,
+ const uint8_t *c[], int peak, int radius,
+ int dstride, int stride, int size) {
+const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2];
+const uint8_t *c3 = c[3], *c5 = c[5];
+const uint8_t *c6 = c[6], *c7 = c[7], *c8 = c[8];
+int x;
+
+for (x = 0; x < width; x++) {
+float suma = c0[x] * -1 + c1[x] * -2 + c2[x] * -1 +
+ c6[x] *  1 + c7[x] *  2 + c8[x] *  1;
+float sumb = c0[x] * -1 + c2[x] *  1 + c3[x] * -2 +
+ c5[x] *  2 + c6[x] * -1 + c8[x] *  1;
+
+dst[x] = av_clip_uint8(sqrtf(suma*suma + sumb*sumb) * scale + delta);
+}
+}
+
+static void filter16_sobel(uint8_t *dstp, int width,
+   float scale, float delta, const int *const matrix,
+   const uint8_t *c[], int peak, int radius,
+   int dstride, int stride, int size) {
+uint16_t *dst = (uint16_t *)dstp;
+int x;
+
+for (x = 0; x < width; x++) {
+float suma = AV_RN16A([0][2 * x]) * -1 + AV_RN16A([1][2 * x]) * -2 
+ AV_RN16A([2][2 * x]) * -1 +
+ AV_RN16A([6][2 * x]) *  1 + AV_RN16A([7][2 * x]) *  2 
+ AV_RN16A([8][2 * x]) *  1;
+float sumb = AV_RN16A([0][2 * x]) * -1 + AV_RN16A([2][2 * x]) *  1 
+ AV_RN16A([3][2 * x]) * -2 +
+ AV_RN16A([5][2 * x]) *  2 + AV_RN16A([6][2 *
+ x]) * -1 + AV_RN16A([8][2 * x]) *  1;
+
+dst[x] = av_clip(sqrtf(suma*suma + sumb*sumb) * scale + delta, 0, 
peak);
+}
+}
+
+static av_unused void ff_sobel_init(ConvolutionContext *s, int depth, 
+int nb_planes) {
+for (int i = 0; i < 4; i++) {
+s->filter[i] = filter_sobel;
+s->copy[i] = !((1 << i) & s->planes);
+s->size[i] = 3;
+s->setup[i] = setup_3x3;
+s->rdiv[i] = s->scale;
+s->bias[i] = s->delta;
+}
+if (s->depth > 8)
+for (int i = 0; i < 4; i++)
+s->filter[i] = filter16_sobel; #if ARCH_X86_64
+ff_sobel_init_x86(s, depth, nb_planes); #endif }
 #endif
diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c index 
9a9c099e6d..7762fa2a05 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c
@@ -139,24 +139,6 @@ static void filter16_roberts(uint8_t *dstp, int width,
 }
 }
 
-static void filter16_sobel(uint8_t *dstp, int width,
-   float scale, float delta, const int *const matrix,
- 

Re: [FFmpeg-devel] [PATCH v5] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-09-26 Thread Wang, Bin
-Original Message-
From: Wang, Bin  
Sent: Tuesday, September 20, 2022 6:33 PM
To: ffmpeg-devel@ffmpeg.org
Cc: Wang, Bin 
Subject: [FFmpeg-devel] [PATCH v5] libavfilter/x86/vf_convolution: add sobel 
filter optimization and unit test with intel AVX512 VNNI

From: bwang30 

This commit enabled assembly code with intel AVX512 VNNI and added unit test 
for sobel filter

sobel_c: 4537
sobel_avx512icl 2136

Signed-off-by: bwang30 
---
 libavfilter/convolution.h |  74 +
 libavfilter/vf_convolution.c  |  91 +++-
 libavfilter/x86/vf_convolution.asm| 147 ++
 libavfilter/x86/vf_convolution_init.c |  18 
 tests/checkasm/Makefile   |   1 +
 tests/checkasm/checkasm.c |   3 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/vf_convolution.c   | 103 ++
 8 files changed, 360 insertions(+), 78 deletions(-)  create mode 100644 
tests/checkasm/vf_convolution.c

diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h index 
88aabe9a20..e44bfb5da8 100644
--- a/libavfilter/convolution.h
+++ b/libavfilter/convolution.h
@@ -21,6 +21,7 @@
 #ifndef AVFILTER_CONVOLUTION_H
 #define AVFILTER_CONVOLUTION_H
 #include "avfilter.h"
+#include "libavutil/intreadwrite.h"
 
 enum MatrixMode {
 MATRIX_SQUARE,
@@ -61,4 +62,77 @@ typedef struct ConvolutionContext {  } ConvolutionContext;
 
 void ff_convolution_init_x86(ConvolutionContext *s);
+void ff_sobel_init_x86(ConvolutionContext *s, int depth, int 
+nb_planes);
+
+static void setup_3x3(int radius, const uint8_t *c[], const uint8_t *src, int 
stride,
+  int x, int w, int y, int h, int bpc) {
+int i;
+
+for (i = 0; i < 9; i++) {
+int xoff = FFABS(x + ((i % 3) - 1));
+int yoff = FFABS(y + (i / 3) - 1);
+
+xoff = xoff >= w ? 2 * w - 1 - xoff : xoff;
+yoff = yoff >= h ? 2 * h - 1 - yoff : yoff;
+
+c[i] = src + xoff * bpc + yoff * stride;
+}
+}
+
+static void filter_sobel(uint8_t *dst, int width,
+ float scale, float delta, const int *const matrix,
+ const uint8_t *c[], int peak, int radius,
+ int dstride, int stride, int size) {
+const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2];
+const uint8_t *c3 = c[3], *c5 = c[5];
+const uint8_t *c6 = c[6], *c7 = c[7], *c8 = c[8];
+int x;
+
+for (x = 0; x < width; x++) {
+float suma = c0[x] * -1 + c1[x] * -2 + c2[x] * -1 +
+ c6[x] *  1 + c7[x] *  2 + c8[x] *  1;
+float sumb = c0[x] * -1 + c2[x] *  1 + c3[x] * -2 +
+ c5[x] *  2 + c6[x] * -1 + c8[x] *  1;
+
+dst[x] = av_clip_uint8(sqrtf(suma*suma + sumb*sumb) * scale + delta);
+}
+}
+
+static void filter16_sobel(uint8_t *dstp, int width,
+   float scale, float delta, const int *const matrix,
+   const uint8_t *c[], int peak, int radius,
+   int dstride, int stride, int size) {
+uint16_t *dst = (uint16_t *)dstp;
+int x;
+
+for (x = 0; x < width; x++) {
+float suma = AV_RN16A([0][2 * x]) * -1 + AV_RN16A([1][2 * x]) * -2 
+ AV_RN16A([2][2 * x]) * -1 +
+ AV_RN16A([6][2 * x]) *  1 + AV_RN16A([7][2 * x]) *  2 
+ AV_RN16A([8][2 * x]) *  1;
+float sumb = AV_RN16A([0][2 * x]) * -1 + AV_RN16A([2][2 * x]) *  1 
+ AV_RN16A([3][2 * x]) * -2 +
+ AV_RN16A([5][2 * x]) *  2 + AV_RN16A([6][2 * 
+ x]) * -1 + AV_RN16A([8][2 * x]) *  1;
+
+dst[x] = av_clip(sqrtf(suma*suma + sumb*sumb) * scale + delta, 0, 
peak);
+}
+}
+
+static av_unused void ff_sobel_init(ConvolutionContext *s, int depth, 
+int nb_planes) {
+for (int i = 0; i < 4; i++) {
+s->filter[i] = filter_sobel;
+s->copy[i] = !((1 << i) & s->planes);
+s->size[i] = 3;
+s->setup[i] = setup_3x3;
+s->rdiv[i] = s->scale;
+s->bias[i] = s->delta;
+}
+if (s->depth > 8)
+for (int i = 0; i < 4; i++)
+s->filter[i] = filter16_sobel; #if ARCH_X86_64
+ff_sobel_init_x86(s, depth, nb_planes); #endif }
 #endif
diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c index 
9a9c099e6d..7762fa2a05 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c
@@ -139,24 +139,6 @@ static void filter16_roberts(uint8_t *dstp, int width,
 }
 }
 
-static void filter16_sobel(uint8_t *dstp, int width,
-   float scale, float delta, const int *const matrix,
-   const uint8_t *c[], int peak, int radius,
-   int dstride, int stride, int size)
-{
-uint16_t *dst = (uint16_t *)dstp;
-int x;
-
-for (x = 0; x < width; x++) {
-float suma = AV_RN16A([0][2 * x]) * -1 + AV_RN16A([1][2

Re: [FFmpeg-devel] [PATCH] lavc/pthread_frame: always transfer stashed hwaccel state

2022-09-22 Thread Wang Bin
Anton Khirnov  于2022年9月19日周一 21:22写道:

> Fixes assertion failures after avcodec_flush_buffers(), where
> stashed hwaccel state is present, but prev_thread is NULL.
>
> Found-by: Wang Bin 
> ---
>  libavcodec/pthread_frame.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
> index 066269621d..f8fddc5e4d 100644
> --- a/libavcodec/pthread_frame.c
> +++ b/libavcodec/pthread_frame.c
> @@ -459,14 +459,14 @@ static int submit_packet(PerThreadContext *p,
> AVCodecContext *user_avctx,
>  pthread_mutex_unlock(>mutex);
>  return err;
>  }
> -
> -/* transfer hwaccel state stashed from previous thread, if any */
> -av_assert0(!p->avctx->hwaccel);
> -FFSWAP(const AVHWAccel*, p->avctx->hwaccel,
>  fctx->stash_hwaccel);
> -FFSWAP(void*,p->avctx->hwaccel_context,
>  fctx->stash_hwaccel_context);
> -FFSWAP(void*,p->avctx->internal->hwaccel_priv_data,
> fctx->stash_hwaccel_priv);
>  }
>
> +/* transfer the stashed hwaccel state, if any */
> +av_assert0(!p->avctx->hwaccel);
> +FFSWAP(const AVHWAccel*, p->avctx->hwaccel,
>  fctx->stash_hwaccel);
> +FFSWAP(void*,p->avctx->hwaccel_context,
>  fctx->stash_hwaccel_context);
> +FFSWAP(void*,p->avctx->internal->hwaccel_priv_data,
> fctx->stash_hwaccel_priv);
> +
>  av_packet_unref(p->avpkt);
>  ret = av_packet_ref(p->avpkt, avpkt);
>  if (ret < 0) {
> --
> 2.35.1
>
>
The patch works as expected. I've tested all hwaccels. Thanks.

Regards
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v4] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-09-20 Thread Wang, Bin
Thanks for the review, based on the comments, patch v5 is sent out: 
https://patchwork.ffmpeg.org/project/ffmpeg/patch/20220920103305.7902-1-bin.w...@intel.com/



Changes:

  1.  Remove redundant zero-initializations in asm code
  2.  Pass depth and nb_planes as parameters to ff_sobel_init_x86()
  3.  Filter-agnostic initialization was moved to the beginning of param_init()
  4.  I also noticed the duplicate configurations. But I think it may not 
suitable to change other code path in a sobel patch, just change sobel code 
path.
  5.  In order to move sobel initialization to convolution.h, I have to move 3 
functions to this header file. They are setup_3x3(), filter_sobel() and 
filter16_sobel()



Welcome more advice!



---

libavfilter/convolution.h |  74 +

libavfilter/vf_convolution.c  |  91 +++-

libavfilter/x86/vf_convolution.asm| 147 ++

libavfilter/x86/vf_convolution_init.c |  18 

tests/checkasm/Makefile   |   1 +

tests/checkasm/checkasm.c |   3 +

tests/checkasm/checkasm.h |   1 +

tests/checkasm/vf_convolution.c   | 103 ++

8 files changed, 360 insertions(+), 78 deletions(-)  create mode 100644 
tests/checkasm/vf_convolution.c







Thanks

Bin

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v4] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-09-14 Thread Wang, Bin
-Original Message-
From: Wang, Bin  
Sent: Wednesday, September 7, 2022 2:27 PM
To: ffmpeg-devel@ffmpeg.org
Cc: Wang, Bin 
Subject: [FFmpeg-devel] [PATCH v4] libavfilter/x86/vf_convolution: add sobel 
filter optimization and unit test with intel AVX512 VNNI

From: bwang30 

This commit enabled assembly code with intel AVX512 VNNI and added unit test 
for sobel filter

sobel_c: 4537
sobel_avx512icl 2136

Signed-off-by: bwang30 
---
 libavfilter/convolution.h |   2 +
 libavfilter/vf_convolution.c  |  23 
 libavfilter/x86/vf_convolution.asm| 150 ++
 libavfilter/x86/vf_convolution_init.c |  18 
 tests/checkasm/Makefile   |   1 +
 tests/checkasm/checkasm.c |   3 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/vf_convolution.c   | 103 ++
 8 files changed, 301 insertions(+)
 create mode 100644 tests/checkasm/vf_convolution.c

diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h index 
88aabe9a20..4520ad13e0 100644
--- a/libavfilter/convolution.h
+++ b/libavfilter/convolution.h
@@ -61,4 +61,6 @@ typedef struct ConvolutionContext {  } ConvolutionContext;
 
 void ff_convolution_init_x86(ConvolutionContext *s);
+void ff_sobel_init_x86(ConvolutionContext *s); void 
+ff_convolution_init(ConvolutionContext *s, const char *filter_name);
 #endif
diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c index 
9a9c099e6d..0eeaaa9fc3 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c
@@ -874,6 +874,9 @@ static int param_init(AVFilterContext *ctx)
 if (s->depth > 8)
 for (p = 0; p < s->nb_planes; p++)
 s->filter[p] = filter16_sobel;
+#if CONFIG_CONVOLUTION_FILTER && ARCH_X86_64
+ff_sobel_init_x86(s);
+#endif
 } else if (!strcmp(ctx->filter->name, "kirsch")) {
 if (s->depth > 8)
 for (p = 0; p < s->nb_planes; p++) @@ -887,6 +890,26 @@ static int 
param_init(AVFilterContext *ctx)
 return 0;
 }
 
+void ff_convolution_init(ConvolutionContext *s, const char 
+*filter_name) {
+if (!strcmp(filter_name, "sobel")) {
+for (int i = 0; i < 4; i++) {
+s->filter[i] = filter_sobel;
+s->copy[i] = !((1 << i) & s->planes);
+s->size[i] = 3;
+s->setup[i] = setup_3x3;
+s->rdiv[i] = s->scale;
+s->bias[i] = s->delta;
+}
+if (s->depth > 8)
+for (int i = 0; i < 4; i++)
+s->filter[i] = filter16_sobel; #if 
+CONFIG_CONVOLUTION_FILTER && ARCH_X86_64
+ff_sobel_init_x86(s);
+#endif
+}
+}
+
 static int config_input(AVFilterLink *inlink)  {
 AVFilterContext *ctx = inlink->dst; diff --git 
a/libavfilter/x86/vf_convolution.asm b/libavfilter/x86/vf_convolution.asm
index 754d4d1064..a95d5ad499 100644
--- a/libavfilter/x86/vf_convolution.asm
+++ b/libavfilter/x86/vf_convolution.asm
@@ -22,6 +22,18 @@
 
 SECTION_RODATA
 half:   dd 0.5
+data_p1: dd  1
+data_n1: dd -1
+data_p2: dd  2
+data_n2: dd -2
+
+ALIGN 64
+sobel_perm: db  0, 16, 32, 48,  1, 17, 33, 49,  2, 18, 34, 50,  3, 19, 35, 51
+db  4, 20, 36, 52,  5, 21, 37, 53,  6, 22, 38, 54,  7, 23, 39, 55
+db  8, 24, 40, 56,  9, 25, 41, 57, 10, 26, 42, 58, 11, 27, 43, 59
+db 12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 
+47, 63
+sobel_mulA: db -1,  1, -2,  2
+sobel_mulB: db  1, -1,  2, -2
 
 SECTION .text
 
@@ -154,3 +166,141 @@ cglobal filter_3x3, 4, 15, 7, dst, width, rdiv, bias, 
matrix, ptr, c0, c1, c2, c  INIT_XMM sse4
 FILTER_3X3
 %endif
+
+%macro SOBEL_MUL 2
+movzx ptrd, byte [c%1q + xq]
+imul  ptrd, [%2]
+add   rd, ptrd
+%endmacro
+
+%macro SOBEL_ADD 1
+movzx ptrd, byte [c%1q + xq]
+add   rd, ptrd
+%endmacro
+
+; void filter_sobel_avx512(uint8_t *dst, int width,
+;  float scale, float delta, const int *const matrix,
+;  const uint8_t *c[], int peak, int radius,
+;  int dstride, int stride)
+%macro FILTER_SOBEL 0
+%if UNIX64
+cglobal filter_sobel, 4, 15, 7, dst, width, matrix, ptr, c0, c1, c2, 
+c3, c4, c5, c6, c7, c8, r, x %else cglobal filter_sobel, 4, 15, 7, dst, 
+width, rdiv, bias, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, 
+x %endif %if WIN64
+SWAP xmm0, xmm2
+SWAP xmm1, xmm3
+mov  r2q, matrixmp
+mov  r3q, ptrmp
+DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, 
+c7, c8, r, x %endif
+movsxdifnidn widthq, widthd
+VBROADCASTSS m0, xmm0
+VBROADCASTSS m1, xmm1
+pxor  m6, m6
+mov   c0q, [ptrq + 0*gprsize]
+mov   c1q, [ptrq + 1*gprsize]
+mov   c2q, [ptrq + 2*gprsize]
+mov   c3q, [ptrq + 3*gprsize]
+mov   c4q, [ptrq + 4*gprsize]
+mov   c5q, [ptrq + 5*gprsize]
+mov   c6q, [ptrq + 6*gp

[FFmpeg-devel] multithreading hwaccel is broken on 5.1 and master branch

2022-09-12 Thread Wang Bin
cc867f2c09d2b69cee8a0eccd62aff002cbbfe11 this change breaks hwaccel. The
assertion av_assert0(!p->parent->stash_hwaccel) fails when seeking. So this
makes video players unusable. videotoolbox has another crash when starting
to decode. The crash can be reproduce easily in ffmpeg:
./ffmpeg -stream_loop -1 -an -hwaccel vaapi test.mp4 -f null - >/dev/null

Regards
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 2/2] lavc/pthread_frame: avoid leaving stale hwaccel state in worker threads

2022-09-12 Thread Wang Bin
Wang Bin  于2022年9月12日周一 10:02写道:

>
>>  av_packet_unref(p->avpkt);
>> @@ -655,6 +670,14 @@ void ff_thread_finish_setup(AVCodecContext *avctx) {
>>  async_lock(p->parent);
>>  }
>>
>> +/* save hwaccel state for passing to the next thread;
>> + * this is done here so that this worker thread can wipe its own
>> hwaccel
>> + * state after decoding, without requiring synchronization */
>> +av_assert0(!p->parent->stash_hwaccel);
>> +p->parent->stash_hwaccel = avctx->hwaccel;
>> +p->parent->stash_hwaccel_context = avctx->hwaccel_context;
>> +p->parent->stash_hwaccel_priv=
>> avctx->internal->hwaccel_priv_data;
>>
>
> Assertion failure when seeking. Step to reproduce:
> ./ffmpeg -stream_loop -1 -an -hwaccel vaapi -i test.mp4 -f null >/dev/null
>
> Regards
>
>

The change breaks hwaccel on all platforms. videotoolbox has another crash.

Regards
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 2/2] lavc/pthread_frame: avoid leaving stale hwaccel state in worker threads

2022-09-11 Thread Wang Bin
>
>
>  av_packet_unref(p->avpkt);
> @@ -655,6 +670,14 @@ void ff_thread_finish_setup(AVCodecContext *avctx) {
>  async_lock(p->parent);
>  }
>
> +/* save hwaccel state for passing to the next thread;
> + * this is done here so that this worker thread can wipe its own
> hwaccel
> + * state after decoding, without requiring synchronization */
> +av_assert0(!p->parent->stash_hwaccel);
> +p->parent->stash_hwaccel = avctx->hwaccel;
> +p->parent->stash_hwaccel_context = avctx->hwaccel_context;
> +p->parent->stash_hwaccel_priv= avctx->internal->hwaccel_priv_data;
>

Assertion failure when seeking. Step to reproduce:
./ffmpeg -stream_loop -1 -an -hwaccel vaapi -i test.mp4 -f null >/dev/null

Regards
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v3] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-09-07 Thread Wang, Bin
-Original Message-
From: ffmpeg-devel  On Behalf Of Henrik Gramner
Sent: Tuesday, September 6, 2022 9:02 PM
To: FFmpeg development discussions and patches 
Subject: Re: [FFmpeg-devel] [PATCH v3] libavfilter/x86/vf_convolution: add 
sobel filter optimization and unit test with intel AVX512 VNNI

On Tue, Aug 23, 2022 at 10:43 AM  wrote:
> +.loop1:
> +pxor m4, m4
> +pxor m5, m5
> +
> +;Gx
> +SOBEL_MUL_16 0, data_n1, 4
> +SOBEL_MUL_16 1, data_n2, 4
> +SOBEL_MUL_16 2, data_n1, 4
> +SOBEL_ADD_16 6, 4
> +SOBEL_MUL_16 7, data_p2, 4
> +SOBEL_ADD_16 8, 4
> +
> [...]
> +
> +;Gy
> +SOBEL_MUL_16 0, data_n1, 5
> +SOBEL_ADD_16 2, 5
> +SOBEL_MUL_16 3, data_n2, 5
> +SOBEL_MUL_16 5, data_p2, 5
> +SOBEL_MUL_16 6, data_n1, 5
> +SOBEL_ADD_16 8, 5

This could probably be improved by doing something along the lines of 
(untested, but you get the idea):

ALIGN 64
sobel_perm: db  0, 16, 32, 48,  1, 17, 33, 49,  2, 18, 34, 50,  3, 19, 35, 51
db  4, 20, 36, 52,  5, 21, 37, 53,  6, 22, 38, 54,  7, 23, 39, 55
db  8, 24, 40, 56,  9, 25, 41, 57, 10, 26, 42, 58, 11, 27, 43, 59
db 12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63
sobel_mulA: db -1,  1, -2,  2
sobel_mulB: db  1, -1,  2, -2

[...]

mova  m6, [sobel_perm]
.loop1:
movu xm3, [c2q + xq]
pmovzxbd  m5, [c0q + xq]
vinserti32x4 ym3, [c6q + xq], 1
pmovzxbd  m4, [c8q + xq]
vinserti32x4  m2, m3, [c1q + xq], 2
vinserti32x4  m3, [c5q + xq], 2
vinserti32x4  m2, [c7q + xq], 3
vinserti32x4  m3, [c3q + xq], 3
vpermbm2, m6, m2
psubd m4, m5
vpermbm3, m6, m3
mova  m5, m4
vpdpbusd  m4, m2, [sobel_mulA] {1to16}
vpdpbusd  m5, m3, [sobel_mulB] {1to16}

> +mulps m4, m0   ; sum *= scale
> +addps m4, m1   ; sum += delta

fmaddps m4, m4, m0, m1

> +vpmovusdb xmm4, m4
> +movdqu[dstq + xq], xmm4

vpmovusdb [dstq + xq], m4

> +mulss xmm5, xmm5; b1 * b1
> +addss xmm4, xmm5

fmaddss xmm4, xmm5, xmm5, xmm4

> +mulss xmm4, xmm0 ; sum *= rdiv
> +addss xmm4, xmm1 ; sum += bias

fmaddss xmm4, xmm4, xmm0, xmm1



> +
> +cvtdq2ps  m4, m4
> +mulps m4, m4
> +
> +;Gy
> +SOBEL_MUL_16 0, data_n1, 5
> +SOBEL_ADD_16 2, 5
> +SOBEL_MUL_16 3, data_n2, 5
> +SOBEL_MUL_16 5, data_p2, 5
> +SOBEL_MUL_16 6, data_n1, 5
> +SOBEL_ADD_16 8, 5
> +
> +cvtdq2psm5, m5
> +VFMADD231PS m4, m5, m5
> +
> +sqrtpsm4, m4
> +mulps m4, m0   ; sum *= scale
> +addps m4, m1   ; sum += delta
> +cvttps2dq m4, m4
> +vpmovusdb xmm4, m4
> +movdqu[dstq + xq], xmm4
> +
> +add xq, mmsize/4
> +cmp xq, widthq
> +jl .loop1
> +
> +add widthq, rq
> +cmp xq, widthq
> +jge .end
> +
> +.loop2:
> +xor  rd, rd
> +pxor m4, m4
> +
> +;Gx
> +SOBEL_MUL 0, data_n1
> +SOBEL_MUL 1, data_n2
> +SOBEL_MUL 2, data_n1
> +SOBEL_ADD 6
> +SOBEL_MUL 7, data_p2
> +SOBEL_ADD 8
> +
> +cvtsi2ss xmm4, rd
> +mulssxmm4, xmm4
> +
> +xor rd, rd
> +;Gy
> +SOBEL_MUL 0, data_n1
> +SOBEL_ADD 2
> +SOBEL_MUL 3, data_n2
> +SOBEL_MUL 5, data_p2
> +SOBEL_MUL 6, data_n1
> +SOBEL_ADD 8
> +
> +cvtsi2ss  xmm5, rd
> +mulss xmm5, xmm5; b1 * b1
> +addss xmm4, xmm5
> +
> +sqrtpsxmm4, xmm4
> +mulss xmm4, xmm0 ; sum *= rdiv
> +addss xmm4, xmm1 ; sum += bias
> +cvttps2dq xmm4, xmm4 ; trunc to integer
> +packssdw  xmm4, xmm4
> +packuswb  xmm4, xmm4
> +movd  rd, xmm4
> +mov   [dstq + xq], rb
> +
> +add xq, 1
> +cmp xq, widthq
> +jl .loop2
> +.end:
> +RET
> +%endmacro
> +
> +%if ARCH_X86_64
> +%if HAVE_AVX512ICL_EXTERNAL
> +INIT_ZMM avx512icl
> +FILTER_SOBEL
> +%endif
> +%endif
> diff --git a/libavfilter/x86/vf_convolution_init.c 
> b/libavfilter/x86/vf_convolution_init.c
> index b78a47d02b..52a3d28991 100644
> --- a/libavfilter/x86/vf_convolution_init.c
> +++ b/libavfilter/x86/vf_convolution_init.c
> @@ -29,6 +29,11 @@ void ff_filter_3x3_sse4(uint8_t *dst, int width,
>  const uint8_t *c[], int peak, int radius,
>  int dstride, int stride, int size);
>
> +void ff_filter_sobel_avx512icl(uint8_t *dst, int width,
> + float scale, float delta, const int *const matrix,
> + const uint8_t *c[], int peak, int radius,
> + int dstride, int stride, int size);
> +
>  av_cold void ff_convolution_init_x86(ConvolutionContext *s)  {  #if 
> ARCH_X86_64 @@ -44,3 +49,16 @@ av_cold void 
> ff_convolution_init_x86(ConvolutionContext *s)
>  }
>  #endif
>  }
> +
> +av_cold void ff_sobel_init_x86(ConvolutionContext *s) { #if 
> +ARCH_X86_64
> +int cpu_flags = 

Re: [FFmpeg-devel] [PATCH v3] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-09-06 Thread Wang, Bin
-Original Message-
From: Wang, Bin  
Sent: Tuesday, August 23, 2022 4:23 PM
To: ffmpeg-devel@ffmpeg.org
Cc: Wang, Bin 
Subject: [FFmpeg-devel] [PATCH v3] libavfilter/x86/vf_convolution: add sobel 
filter optimization and unit test with intel AVX512 VNNI

From: bwang30 

This commit enabled assembly code with intel AVX512 VNNI and added unit test 
for sobel filter

sobel_c: 4537
sobel_avx512icl 2470

Signed-off-by: bwang30 
---
 libavfilter/convolution.h |   2 +
 libavfilter/vf_convolution.c  |  23 
 libavfilter/x86/vf_convolution.asm| 162 ++
 libavfilter/x86/vf_convolution_init.c |  18 +++
 tests/checkasm/Makefile   |   1 +
 tests/checkasm/checkasm.c |   3 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/vf_convolution.c   | 103 
 8 files changed, 313 insertions(+)
 create mode 100644 tests/checkasm/vf_convolution.c

diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h index 
88aabe9a20..4520ad13e0 100644
--- a/libavfilter/convolution.h
+++ b/libavfilter/convolution.h
@@ -61,4 +61,6 @@ typedef struct ConvolutionContext {  } ConvolutionContext;
 
 void ff_convolution_init_x86(ConvolutionContext *s);
+void ff_sobel_init_x86(ConvolutionContext *s); void 
+ff_convolution_init(ConvolutionContext *s, const char *filter_name);
 #endif
diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c index 
9a9c099e6d..0eeaaa9fc3 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c
@@ -874,6 +874,9 @@ static int param_init(AVFilterContext *ctx)
 if (s->depth > 8)
 for (p = 0; p < s->nb_planes; p++)
 s->filter[p] = filter16_sobel;
+#if CONFIG_CONVOLUTION_FILTER && ARCH_X86_64
+ff_sobel_init_x86(s);
+#endif
 } else if (!strcmp(ctx->filter->name, "kirsch")) {
 if (s->depth > 8)
 for (p = 0; p < s->nb_planes; p++) @@ -887,6 +890,26 @@ static int 
param_init(AVFilterContext *ctx)
 return 0;
 }
 
+void ff_convolution_init(ConvolutionContext *s, const char 
+*filter_name) {
+if (!strcmp(filter_name, "sobel")) {
+for (int i = 0; i < 4; i++) {
+s->filter[i] = filter_sobel;
+s->copy[i] = !((1 << i) & s->planes);
+s->size[i] = 3;
+s->setup[i] = setup_3x3;
+s->rdiv[i] = s->scale;
+s->bias[i] = s->delta;
+}
+if (s->depth > 8)
+for (int i = 0; i < 4; i++)
+s->filter[i] = filter16_sobel; #if 
+CONFIG_CONVOLUTION_FILTER && ARCH_X86_64
+ff_sobel_init_x86(s);
+#endif
+}
+}
+
 static int config_input(AVFilterLink *inlink)  {
 AVFilterContext *ctx = inlink->dst; diff --git 
a/libavfilter/x86/vf_convolution.asm b/libavfilter/x86/vf_convolution.asm
index 754d4d1064..8ad2452418 100644
--- a/libavfilter/x86/vf_convolution.asm
+++ b/libavfilter/x86/vf_convolution.asm
@@ -22,6 +22,10 @@
 
 SECTION_RODATA
 half:   dd 0.5
+data_p1: dd  1
+data_n1: dd -1
+data_p2: dd  2
+data_n2: dd -2
 
 SECTION .text
 
@@ -154,3 +158,161 @@ cglobal filter_3x3, 4, 15, 7, dst, width, rdiv, bias, 
matrix, ptr, c0, c1, c2, c  INIT_XMM sse4
 FILTER_3X3
 %endif
+
+
+%macro SOBEL_MUL_16 3
+movd xmm2, [%2]
+VPBROADCASTD m2, xmm2
+movdqu xmm3, [c%1q + xq]
+vpmovzxbd m3, xmm3
+vpdpbusd  m%3, m3, m2
+%endmacro
+
+%macro SOBEL_ADD_16 2
+movdqu xmm3, [c%1q + xq]
+vpmovzxbd m3, xmm3
+vpaddd  m%2, m3
+%endmacro
+
+
+%macro SOBEL_MUL 2
+movzx ptrd, byte [c%1q + xq]
+imul  ptrd, [%2]
+add   rd, ptrd
+%endmacro
+
+%macro SOBEL_ADD 1
+movzx ptrd, byte [c%1q + xq]
+add   rd, ptrd
+%endmacro
+
+; void filter_sobel_avx512(uint8_t *dst, int width,
+;  float scale, float delta, const int *const matrix,
+;  const uint8_t *c[], int peak, int radius,
+;  int dstride, int stride)
+%macro FILTER_SOBEL 0
+%if UNIX64
+cglobal filter_sobel, 4, 15, 7, dst, width, matrix, ptr, c0, c1, c2, 
+c3, c4, c5, c6, c7, c8, r, x %else cglobal filter_sobel, 4, 15, 7, dst, 
+width, rdiv, bias, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, 
+x %endif %if WIN64
+SWAP xmm0, xmm2
+SWAP xmm1, xmm3
+mov  r2q, matrixmp
+mov  r3q, ptrmp
+DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, 
+c7, c8, r, x %endif
+movsxdifnidn widthq, widthd
+VBROADCASTSS m0, xmm0
+VBROADCASTSS m1, xmm1
+pxor  m6, m6
+mov   c0q, [ptrq + 0*gprsize]
+mov   c1q, [ptrq + 1*gprsize]
+mov   c2q, [ptrq + 2*gprsize]
+mov   c3q, [ptrq + 3*gprsize]
+mov   c4q, [ptrq + 4*gprsize]
+mov   c5q, [ptrq + 5*gprsize]
+mov   c6q, [ptrq + 6*gprsize]
+mov   c7q, [ptrq + 7*gprsize]
+mov   c8q, [ptrq + 8*gprsize]
+
+xor   xq, xq
+cmp   widthq,

[FFmpeg-devel] [PATCH v2] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-08-18 Thread Wang, Bin
Any comments on this patch V2?



-Original Message-
From: Wang, Bin  
Sent: Monday, August 15, 2022 1:39 PM
To: ffmpeg-devel@ffmpeg.org
Cc: Wang, Bin 
Subject: [FFmpeg-devel] [PATCH v2] libavfilter/x86/vf_convolution: add sobel 
filter optimization and unit test with intel AVX512 VNNI

From: bwang30 

This commit enabled assembly code with intel AVX512 VNNI and added unit test 
for sobel filter

sobel_c: 4537
sobel_avx512icl 2470

Signed-off-by: bwang30 
---
 libavfilter/convolution.h |   2 +
 libavfilter/vf_convolution.c  |  23 
 libavfilter/x86/vf_convolution.asm| 162 ++
 libavfilter/x86/vf_convolution_init.c |  18 +++
 tests/checkasm/Makefile   |   1 +
 tests/checkasm/checkasm.c |   3 +
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/vf_convolution.c   | 103 
 8 files changed, 313 insertions(+)
 create mode 100644 tests/checkasm/vf_convolution.c

diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h index 
88aabe9a20..4520ad13e0 100644
--- a/libavfilter/convolution.h
+++ b/libavfilter/convolution.h
@@ -61,4 +61,6 @@ typedef struct ConvolutionContext {  } ConvolutionContext;
 
 void ff_convolution_init_x86(ConvolutionContext *s);
+void ff_sobel_init_x86(ConvolutionContext *s); void 
+ff_convolution_init(ConvolutionContext *s, const char *filter_name);
 #endif
diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c index 
9a9c099e6d..f27385a7fb 100644
--- a/libavfilter/vf_convolution.c
+++ b/libavfilter/vf_convolution.c
@@ -874,6 +874,9 @@ static int param_init(AVFilterContext *ctx)
 if (s->depth > 8)
 for (p = 0; p < s->nb_planes; p++)
 s->filter[p] = filter16_sobel;
+#if CONFIG_CONVOLUTION_FILTER && ARCH_X86_64
+ff_sobel_init_x86(s);
+#endif
 } else if (!strcmp(ctx->filter->name, "kirsch")) {
 if (s->depth > 8)
 for (p = 0; p < s->nb_planes; p++) @@ -887,6 +890,26 @@ static int 
param_init(AVFilterContext *ctx)
 return 0;
 }
 
+void ff_convolution_init(ConvolutionContext *s, const char 
+*filter_name) {
+if (!strcmp(filter_name, "sobel")) {
+for (int i = 0; i < 4; i++) {
+s->filter[i] = filter_sobel;
+s->copy[i] = !((1 << i) & s->planes);
+s->size[i] = 3;
+s->setup[i] = setup_3x3;
+s->rdiv[i] = s->scale;
+s->bias[i] = s->delta;
+}
+if (s->depth > 8)
+for (int i = 0; i < 4; i++)
+s->filter[i] = filter16_sobel; #if 
+CONFIG_CONVOLUTION_FILTER && ARCH_X86_64
+ff_sobel_init_x86(s);
+#endif
+}
+}
+
 static int config_input(AVFilterLink *inlink)  {
 AVFilterContext *ctx = inlink->dst; diff --git 
a/libavfilter/x86/vf_convolution.asm b/libavfilter/x86/vf_convolution.asm
index 754d4d1064..59c807b218 100644
--- a/libavfilter/x86/vf_convolution.asm
+++ b/libavfilter/x86/vf_convolution.asm
@@ -22,6 +22,10 @@
 
 SECTION_RODATA
 half:   dd 0.5
+data_p1: dd  1
+data_n1: dd -1
+data_p2: dd  2
+data_n2: dd -2
 
 SECTION .text
 
@@ -154,3 +158,161 @@ cglobal filter_3x3, 4, 15, 7, dst, width, rdiv, bias, 
matrix, ptr, c0, c1, c2, c  INIT_XMM sse4
 FILTER_3X3
 %endif
+
+
+%macro SOBEL_MUL_16 3
+movd xmm2, [%2]
+VPBROADCASTD m2, xmm2
+movdqu xmm3, [c%1q + xq]
+vpmovzxbd m3, xmm3
+vpdpbusd  m%3, m3, m2
+%endmacro
+
+%macro SOBEL_ADD_16 2
+movdqu xmm3, [c%1q + xq]
+vpmovzxbd m3, xmm3
+vpaddd  m%2, m3
+%endmacro
+
+
+%macro SOBEL_MUL 2
+movzx ptrd, byte [c%1q + xq]
+imul  ptrd, [%2]
+add   rd, ptrd
+%endmacro
+
+%macro SOBEL_ADD 1
+movzx ptrd, byte [c%1q + xq]
+add   rd, ptrd
+%endmacro
+
+; void filter_sobel_avx512(uint8_t *dst, int width,
+;  float scale, float delta, const int *const matrix,
+;  const uint8_t *c[], int peak, int radius,
+;  int dstride, int stride)
+%macro FILTER_SOBEL 0
+%if UNIX64
+cglobal filter_sobel, 4, 15, 7, dst, width, matrix, ptr, c0, c1, c2, 
+c3, c4, c5, c6, c7, c8, r, x %else cglobal filter_sobel, 4, 15, 7, dst, 
+width, rdiv, bias, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, c8, r, 
+x %endif %if WIN64
+SWAP xmm0, xmm2
+SWAP xmm1, xmm3
+mov  r2q, matrixmp
+mov  r3q, ptrmp
+DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, 
+c7, c8, r, x %endif
+movsxdifnidn widthq, widthd
+VBROADCASTSS m0, xmm0
+VBROADCASTSS m1, xmm1
+pxor  m6, m6
+mov   c0q, [ptrq + 0*gprsize]
+mov   c1q, [ptrq + 1*gprsize]
+mov   c2q, [ptrq + 2*gprsize]
+mov   c3q, [ptrq + 3*gprsize]
+mov   c4q, [ptrq + 4*gprsize]
+mov   c5q, [ptrq + 5*gprsize]
+mov   c6q, [ptrq + 6*gprsize]
+mov   c7q, [ptrq + 7*gprsize]
+mov   c8q, [ptrq + 8*gprsize]
+
+xor

Re: [FFmpeg-devel] [PATCH] libavfilter/x86/vf_convolution: add sobel filter optimization and unit test with intel AVX512 VNNI

2022-08-15 Thread Wang, Bin
-Original Message-
From: ffmpeg-devel  On Behalf Of Andreas 
Rheinhardt
Sent: Friday, August 12, 2022 5:11 PM
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH] libavfilter/x86/vf_convolution: add sobel 
filter optimization and unit test with intel AVX512 VNNI

bin.wang-at-intel@ffmpeg.org:
> From: bwang30 
> 
> This commit enabled assembly code with intel AVX512 VNNI and added 
> unit test for sobel filter
> 
> sobel_c: 4537
> sobel_avx512icl 2470
> 
> Signed-off-by: bwang30 
> ---
>  libavfilter/convolution.h |   2 +
>  libavfilter/vf_convolution.c  |   8 ++
>  libavfilter/x86/vf_convolution.asm| 162 ++
>  libavfilter/x86/vf_convolution_init.c |  18 +++
>  tests/checkasm/Makefile   |   1 +
>  tests/checkasm/checkasm.c |   3 +
>  tests/checkasm/checkasm.h |   1 +
>  tests/checkasm/vf_convolution.c   | 116 ++
>  8 files changed, 311 insertions(+)
>  create mode 100644 tests/checkasm/vf_convolution.c
> 
> diff --git a/libavfilter/convolution.h b/libavfilter/convolution.h 
> index 88aabe9a20..143b0fb2d9 100644
> --- a/libavfilter/convolution.h
> +++ b/libavfilter/convolution.h
> @@ -61,4 +61,6 @@ typedef struct ConvolutionContext {  } 
> ConvolutionContext;
>  
>  void ff_convolution_init_x86(ConvolutionContext *s);
> +void ff_sobel_init_x86(ConvolutionContext *s); int 
> +ff_filter_param_init(AVFilterContext *ctx);
>  #endif
> diff --git a/libavfilter/vf_convolution.c 
> b/libavfilter/vf_convolution.c index 9a9c099e6d..98aa952258 100644
> --- a/libavfilter/vf_convolution.c
> +++ b/libavfilter/vf_convolution.c
> @@ -874,6 +874,9 @@ static int param_init(AVFilterContext *ctx)
>  if (s->depth > 8)
>  for (p = 0; p < s->nb_planes; p++)
>  s->filter[p] = filter16_sobel;
> +#if CONFIG_CONVOLUTION_FILTER && ARCH_X86_64
> +ff_sobel_init_x86(s);
> +#endif
>  } else if (!strcmp(ctx->filter->name, "kirsch")) {
>  if (s->depth > 8)
>  for (p = 0; p < s->nb_planes; p++) @@ -887,6 +890,11 @@ 
> static int param_init(AVFilterContext *ctx)
>  return 0;
>  }
>  
> +int ff_filter_param_init(AVFilterContext *ctx) {
> +return param_init(ctx);
> +}
> +
>  static int config_input(AVFilterLink *inlink)  {
>  AVFilterContext *ctx = inlink->dst; diff --git 
> a/libavfilter/x86/vf_convolution.asm 
> b/libavfilter/x86/vf_convolution.asm
> index 754d4d1064..59c807b218 100644
> --- a/libavfilter/x86/vf_convolution.asm
> +++ b/libavfilter/x86/vf_convolution.asm
> @@ -22,6 +22,10 @@
>  
>  SECTION_RODATA
>  half:   dd 0.5
> +data_p1: dd  1
> +data_n1: dd -1
> +data_p2: dd  2
> +data_n2: dd -2
>  
>  SECTION .text
>  
> @@ -154,3 +158,161 @@ cglobal filter_3x3, 4, 15, 7, dst, width, rdiv, 
> bias, matrix, ptr, c0, c1, c2, c  INIT_XMM sse4
>  FILTER_3X3
>  %endif
> +
> +
> +%macro SOBEL_MUL_16 3
> +movd xmm2, [%2]
> +VPBROADCASTD m2, xmm2
> +movdqu xmm3, [c%1q + xq]
> +vpmovzxbd m3, xmm3
> +vpdpbusd  m%3, m3, m2
> +%endmacro
> +
> +%macro SOBEL_ADD_16 2
> +movdqu xmm3, [c%1q + xq]
> +vpmovzxbd m3, xmm3
> +vpaddd  m%2, m3
> +%endmacro
> +
> +
> +%macro SOBEL_MUL 2
> +movzx ptrd, byte [c%1q + xq]
> +imul  ptrd, [%2]
> +add   rd, ptrd
> +%endmacro
> +
> +%macro SOBEL_ADD 1
> +movzx ptrd, byte [c%1q + xq]
> +add   rd, ptrd
> +%endmacro
> +
> +; void filter_sobel_avx512(uint8_t *dst, int width,
> +;  float scale, float delta, const int *const matrix,
> +;  const uint8_t *c[], int peak, int radius,
> +;  int dstride, int stride)
> +%macro FILTER_SOBEL 0
> +%if UNIX64
> +cglobal filter_sobel, 4, 15, 7, dst, width, matrix, ptr, c0, c1, c2, 
> +c3, c4, c5, c6, c7, c8, r, x %else cglobal filter_sobel, 4, 15, 7, 
> +dst, width, rdiv, bias, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, c7, 
> +c8, r, x %endif %if WIN64
> +SWAP xmm0, xmm2
> +SWAP xmm1, xmm3
> +mov  r2q, matrixmp
> +mov  r3q, ptrmp
> +DEFINE_ARGS dst, width, matrix, ptr, c0, c1, c2, c3, c4, c5, c6, 
> +c7, c8, r, x %endif
> +movsxdifnidn widthq, widthd
> +VBROADCASTSS m0, xmm0
> +VBROADCASTSS m1, xmm1
> +pxor  m6, m6
> +mov   c0q, [ptrq + 0*gprsize]
> +mov   c1q, [ptrq + 1*gprsize]
> +mov   c2q, [ptrq + 2*gprsize]
> +mov   c3q, [ptrq + 3*gprsize]
> +mov   c4q, [ptrq + 4*gprsize]
> +mov   c5q, [ptrq + 5*gprsize]
> +mov   c6q, [ptrq + 6*gprsize]
> +mov   c7q, [ptrq + 7*gprsize]
> +mov   c8q, [ptrq + 8*gprsize]
> +
> +xor   xq, xq
> +cmp   widthq, mmsize/4
> +jl .loop2
> +
> +mov   rq, widthq
> +and   rq, mmsize/4-1
> +sub   widthq, rq
> +
> +.loop1:
> +pxor m4, m4 
> +pxor m5, m5 
> +
> +;Gx
> +SOBEL_MUL_16 0, data_n1, 4
> +SOBEL_MUL_16 1, data_n2, 4
> +SOBEL_MUL_16 2, data_n1, 4
> +SOBEL_ADD_16 6, 4
> +SOBEL_MUL_16 7, data_p2, 4
> +

Re: [FFmpeg-devel] Weird cross platform support in ffmpeg

2022-05-31 Thread Wang Bin
Nicolas George  于2022年5月31日周二 22:40写道:

> Александр (12022-05-31):
> > I analyzed problem a little bit. Only gcc/clang allows to use this trick.
> > Msvc compiler forbidens such code (even with different enabled
> > optimizations like remove unused references, whole program optimization
> > etc) I have made patch, which uses preprocessor #if directive instead. If
> > it will be helpful, I can share the patch.
>
> FATE shows FFmpeg builds fine on several MSVC instances, so no, we do
> not need such a patch at this time.
>
> I have asked you to show on ffmpeg-users the exact thing you are
> attempting to do, you have neglected to do that. It is obvious to me
> that you are trying to tweak the build process in a way that is not
> supported: stop.
>
>
If MSVC whole program optimization is enabled (compiler flag /GL), DCE will
not work.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 03/15] lavu/videotoolbox: add 422 and 444 pixel format mappings

2021-11-15 Thread Wang Bin
Ridley Combs  于2021年11月16日周二 上午11:03写道:

>
>
> > On Nov 15, 2021, at 19:35, Wang Bin  wrote:
> >
> >>
> >> +#if HAVE_KCVPIXELFORMATTYPE_422YPCBCR10BIPLANARVIDEORANGE
> >> +{ kCVPixelFormatType_422YpCbCr10BiPlanarVideoRange, false,
> >> AV_PIX_FMT_NV20 },
> >> +{ kCVPixelFormatType_422YpCbCr10BiPlanarFullRange,  true,
> >> AV_PIX_FMT_NV20 },
> >> +#endif
> >>
> >
> > It's p210, not nv20.
>
> I didn't add a P210 format (since that would've been equivalent to the
> existing NV20), only P410/P216/P416.


P210 != NV20. The lower 6 bits of P210 are zeros and must be shifted away.


> I guess I could add P210 as an alias with appropriate enum values and
> macros?


Not an alias. Add a new one like p010, p410 etc.


> In which case maybe defining the rest of the group (P008 for NV12, P208
> for NV16, P408 for NV24) would be worthwhile.
>
>
Maybe.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 03/15] lavu/videotoolbox: add 422 and 444 pixel format mappings

2021-11-15 Thread Wang Bin
>
> +#if HAVE_KCVPIXELFORMATTYPE_422YPCBCR10BIPLANARVIDEORANGE
> +{ kCVPixelFormatType_422YpCbCr10BiPlanarVideoRange, false,
> AV_PIX_FMT_NV20 },
> +{ kCVPixelFormatType_422YpCbCr10BiPlanarFullRange,  true,
> AV_PIX_FMT_NV20 },
> +#endif
>

It's p210, not nv20.

Regards
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 1/3] nvenc: use runtime api version to support old drivers

2020-07-15 Thread Wang Bin
Timo Rothenpieler  于2020年7月15日周三 下午11:16写道:

> On 15.07.2020 16:34, wangbin wrote:
> > From: wang-bin 
> >
> > There are reserved bit fields in nvEncoderAPI.h structs, so driver's abi
> > is stable. Requesting runtime version of these structs should work. I've
> > compared 7.0~9.x headers, and confirmed on some devices.
> > ---
> >   libavcodec/nvenc.c | 72 ++
> >   libavcodec/nvenc.h |  3 ++
> >   2 files changed, 44 insertions(+), 31 deletions(-)
> >
> > diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
> > index c6740c1842..ac35cb9f48 100644
> > --- a/libavcodec/nvenc.c
> > +++ b/libavcodec/nvenc.c
> > @@ -193,14 +193,26 @@ static void
> nvenc_print_driver_requirement(AVCodecContext *avctx, int level)
> >   av_log(avctx, level, "The minimum required Nvidia driver for nvenc
> is %s or newer\n", minver);
> >   }
> >
> > +static inline uint32_t struct_ver_rt(NvencContext* ctx, uint32_t
> struct_ver)
> > +{
> > +return ((uint32_t)ctx->apiver_rt | ((struct_ver)<<16) | (0x7 <<
> 28));
> > +}
>
> I'm really not a fan of hard-coding magic numbers, that can change in
> the future, into the ffmpeg implementation.
>
> > +static inline uint32_t api_ver(uint32_t major_ver, uint32_t minor_ver)
> > +{
> > +return major_ver | (minor_ver << 24);
> > +}
> > +
> >   static av_cold int nvenc_load_libraries(AVCodecContext *avctx)
> >   {
> >   NvencContext *ctx= avctx->priv_data;
> >   NvencDynLoadFunctions *dl_fn = >nvenc_dload_funcs;
> >   NVENCSTATUS err;
> >   uint32_t nvenc_max_ver;
> > +uint32_t nvenc_max_major;
> > +uint32_t nvenc_max_minor;
> > +uint32_t func_ver = NV_ENCODE_API_FUNCTION_LIST_VER;
> >   int ret;
> > -
> >   ret = cuda_load_functions(_fn->cuda_dl, avctx);
> >   if (ret < 0)
> >   return ret;
> > @@ -214,19 +226,17 @@ static av_cold int
> nvenc_load_libraries(AVCodecContext *avctx)
> >   err =
> dl_fn->nvenc_dl->NvEncodeAPIGetMaxSupportedVersion(_max_ver);
> >   if (err != NV_ENC_SUCCESS)
> >   return nvenc_print_error(avctx, err, "Failed to query nvenc
> max version");
> > +nvenc_max_major = nvenc_max_ver >> 4;
> > +nvenc_max_minor = nvenc_max_ver & 0xf;
> > +av_log(avctx, AV_LOG_VERBOSE, "nvenc build version: %d.%d, runtime
> version: %d.%d\n", NVENCAPI_MAJOR_VERSION, NVENCAPI_MINOR_VERSION,
> nvenc_max_major, nvenc_max_minor);
> >
> > -av_log(avctx, AV_LOG_VERBOSE, "Loaded Nvenc version %d.%d\n",
> nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
> > -
> > -if ((NVENCAPI_MAJOR_VERSION << 4 | NVENCAPI_MINOR_VERSION) >
> nvenc_max_ver) {
> > -av_log(avctx, AV_LOG_ERROR, "Driver does not support the
> required nvenc API version. "
> > -   "Required: %d.%d Found: %d.%d\n",
> > -   NVENCAPI_MAJOR_VERSION, NVENCAPI_MINOR_VERSION,
> > -   nvenc_max_ver >> 4, nvenc_max_ver & 0xf);
> > -nvenc_print_driver_requirement(avctx, AV_LOG_ERROR);
> > -return AVERROR(ENOSYS);
> > -}
> > +ctx->apiver_rt = api_ver(nvenc_max_major, nvenc_max_minor); /*
> NVENCAPI_VERSION */
> > +ctx->config_ver_rt = struct_ver_rt(ctx, 7) | (1<<31); /*
> NV_ENC_CONFIG_VER */
> > +if (ctx->apiver_rt < api_ver(8, 1))
> > +ctx->config_ver_rt = struct_ver_rt(ctx, 6) | (1<<31);
> > +func_ver = struct_ver_rt(ctx, 2);
> >
> > -dl_fn->nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
> > +dl_fn->nvenc_funcs.version = func_ver;
> >
> >   err =
> dl_fn->nvenc_dl->NvEncodeAPICreateInstance(_fn->nvenc_funcs);
> >   if (err != NV_ENC_SUCCESS)
> > @@ -267,8 +277,8 @@ static av_cold int nvenc_open_session(AVCodecContext
> *avctx)
> >   NV_ENCODE_API_FUNCTION_LIST *p_nvenc =
> >nvenc_dload_funcs.nvenc_funcs;
> >   NVENCSTATUS ret;
> >
> > -params.version= NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
> > -params.apiVersion = NVENCAPI_VERSION;
> > +params.version= struct_ver_rt(ctx, 1); //
> NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER
> > +params.apiVersion = ctx->apiver_rt;
> >   if (ctx->d3d11_device) {
> >   params.device = ctx->d3d11_device;
> >   params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
> &

Re: [FFmpeg-devel] [PATCH 3/3] nvenc: check sdk 10.0 ptrs at runtime

2020-07-15 Thread Wang Bin
Timo Rothenpieler  于2020年7月15日周三 下午11:19写道:

> On 15.07.2020 16:34, wangbin wrote:
> > From: wang-bin 
> >
> > ---
> >   libavcodec/nvenc.c | 11 ++-
> >   1 file changed, 6 insertions(+), 5 deletions(-)
> >
> > diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
> > index cec59f02f3..c421c292c8 100644
> > --- a/libavcodec/nvenc.c
> > +++ b/libavcodec/nvenc.c
> > @@ -1248,11 +1248,12 @@ static av_cold int
> nvenc_setup_encoder(AVCodecContext *avctx)
> >   #ifdef NVENC_HAVE_NEW_PRESETS
> >   ctx->init_encode_params.tuningInfo = ctx->tuning_info;
> >
> > -nv_status =
> p_nvenc->nvEncGetEncodePresetConfigEx(ctx->nvencoder,
> > -ctx->init_encode_params.encodeGUID,
> > -ctx->init_encode_params.presetGUID,
> > -ctx->init_encode_params.tuningInfo,
> > -_config);
> > +if (p_nvenc->nvEncGetEncodePresetConfigEx)
> > +nv_status =
> p_nvenc->nvEncGetEncodePresetConfigEx(ctx->nvencoder,
> > +ctx->init_encode_params.encodeGUID,
> > +ctx->init_encode_params.presetGUID,
> > +ctx->init_encode_params.tuningInfo,
> > +_config);
> >   #endif
> >   } else {
> >   #ifdef NVENC_HAVE_NEW_PRESETS
> >
>
> Same here. Just not calling that function will lead to weird behaviour
> down the chain.
> Needs proper handling plus error message.
>
> This will also be the case for a lot of failure cases where non-existent
> parameters for the old version are passed to the older driver.
> Like, newer profiles, entire new options, ...
>
> If a new parameter is not zero, or a new parameter value is set, check
driver version and on old drivers give a warning about a new driver is
required.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] nvenc: support old drivers

2020-07-15 Thread Wang Bin



0003-nvenc-check-sdk-10.0-ptrs-at-runtime.patch
Description: Binary data


0002-nvenc-check-runtime-9.1-function-ptrs-before-use.patch
Description: Binary data


0001-nvenc-use-runtime-api-version-to-support-old-drivers.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH] libavcodec: vp8 neon optimizations for aarch64

2019-02-14 Thread Wang Bin
>
> 2019-02-09 16:46 GMT+01:00, Wang Bin :
> >>
> >> 2019-02-05 0:59 GMT+01:00, James Almer :
> >> > On 1/31/2019 4:23 PM, Carl Eugen Hoyos wrote:
> >> >> 2019-01-31 17:04 GMT+01:00, Magnus Röös :
> >> >>> Partial port of the ARM Neon for aarch64.
> >> >>
> >> >> Reproduced a >20% speedup for fate-vp8 and applied.
> >> >>
> >> >> Thank you, Carl Eugen
> >> >
> >> > This seems to have broken builds with Clang on aarch64. GCC seems
> fine.
> >> >
> >> >
> >>
> http://fate.ffmpeg.org/report.cgi?time=20190204212326=armv8-linux-clang-7
> >>
> >> Yes.
> >>
> >> Do you know if Martin plans to push his patch?
> >> I can do it but I cannot test MS.
> >>
> >
> > This breaks android(clang), ios and msvc.
> Please test Martin's patches or "--extra-cflags=-fno-integrated-as",
> see ticket #7728.


The patch in ticket #7728 works for ios and clang-cl, armasm and android
clang w/o -fno-integrated-as are still broken.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] libavcodec: vp8 neon optimizations for aarch64

2019-02-09 Thread Wang Bin
>
> 2019-02-05 0:59 GMT+01:00, James Almer :
> > On 1/31/2019 4:23 PM, Carl Eugen Hoyos wrote:
> >> 2019-01-31 17:04 GMT+01:00, Magnus Röös :
> >>> Partial port of the ARM Neon for aarch64.
> >>
> >> Reproduced a >20% speedup for fate-vp8 and applied.
> >>
> >> Thank you, Carl Eugen
> >
> > This seems to have broken builds with Clang on aarch64. GCC seems fine.
> >
> >
> http://fate.ffmpeg.org/report.cgi?time=20190204212326=armv8-linux-clang-7
>
> Yes.
>
> Do you know if Martin plans to push his patch?
> I can do it but I cannot test MS.
>

This breaks android(clang), ios and msvc.
msvc:
https://ci.appveyor.com/project/wang-bin/avbuild/builds/22246378/job/swwuj2t4djg2pec7
ios: https://travis-ci.org/wang-bin/avbuild/jobs/490977431
android: https://travis-ci.org/wang-bin/avbuild/jobs/490977437
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] lavfi/atempo: fix range check if tempo is set by command

2018-10-03 Thread Wang Bin
Nicolas George  于2018年10月3日周三 下午11:05写道:

> wbse...@gmail.com (2018-10-03):
> > From: wang-bin 
> >
> > ---
> >  libavfilter/af_atempo.c | 11 ++-
> >  1 file changed, 6 insertions(+), 5 deletions(-)
> >
> > diff --git a/libavfilter/af_atempo.c b/libavfilter/af_atempo.c
> > index 52f15f2769..1a004212a7 100644
> > --- a/libavfilter/af_atempo.c
> > +++ b/libavfilter/af_atempo.c
> > @@ -331,9 +331,10 @@ static int yae_set_tempo(AVFilterContext *ctx,
> const char *arg_tempo)
> >  return AVERROR(EINVAL);
> >  }
> >
> > -if (tempo < 0.5 || tempo > 2.0) {
> > -av_log(ctx, AV_LOG_ERROR, "Tempo value %f exceeds [0.5, 2.0]
> range\n",
> > -   tempo);
>
> > +const AVOption *o = av_opt_find(>class, "tempo", NULL, 0,
> AV_OPT_SEARCH_FAKE_OBJ);
> > +if (tempo < o->min || tempo > o->max) {
>
> The option is defined in the very same file and the min and max values
> are hardcoded there. There is no need for a fragile lookup.
>
>
Less mistakes. For example the author forgot to change this line. This
function is not frequently called.


> > +av_log(ctx, AV_LOG_ERROR, "Tempo value %f exceeds [%.1f, %.1f]
> range\n",
> > +   tempo, o->min, o->max);
> >  return AVERROR(EINVAL);
> >  }
> >
> > @@ -439,8 +440,8 @@ static int yae_load_data(ATempoContext *atempo,
> >  return 0;
> >  }
> >
>
> > -// samples are not expected to be skipped, unless tempo is greater
> than 2:
> > -av_assert0(read_size <= atempo->ring || atempo->tempo > 2.0);
> > +// samples are not expected to be skipped:
> > +av_assert0(read_size <= atempo->ring);
>
> Looks unrelated. Can you explain?
>

av_opt_set or avfilter_process_command already returns error if out of
range. So no need to check it again.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/6] avcodec/nvdec: avoid needless copy of output frame

2018-05-09 Thread Wang Bin
>
>
>
> -offset += cpy.Height;
> +frame->buf[1] = av_buffer_create((uint8_t *)unmap_data,
> sizeof(*unmap_data),
> + nvdec_unmap_mapped_frame,
> (void*)devptr,
> + AV_BUFFER_FLAG_READONLY);
> +if (!frame->buf[1]) {
> +ret = AVERROR(ENOMEM);
> +goto copy_fail;
>  }
>
>
If AVFrame.buf[i] is non-NULL, then buf[j] must be non-NULL for all j < i,
see libavutil/frame.h. So either change the comment in frame.h or change
your implementation is required
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avutil/hwcontext_cuda: add AVCUDAFramesContext and AVCUDAFramesContext.flags

2018-05-07 Thread Wang Bin
2018-05-08 5:10 GMT+08:00 Timo Rothenpieler <t...@rothenpieler.org>:

> Frames can be mapped from nvdec/cuvid, not needing any actual memory
> allocation, but all other features of the hw_frames_ctx.
> Hence the dummy-mode, which does not allocate any (notable amounts of)
> memory but otherwise behaves the exact same.
> ---
>  doc/APIchanges |  3 +++
>  libavutil/hwcontext_cuda.c | 10 ++
>  libavutil/hwcontext_cuda.h | 18 +-
>  libavutil/version.h|  2 +-
>  4 files changed, 31 insertions(+), 2 deletions(-)
>
> diff --git a/doc/APIchanges b/doc/APIchanges
> index ede5b186ae..82ec888fd8 100644
> --- a/doc/APIchanges
> +++ b/doc/APIchanges
> @@ -15,6 +15,9 @@ libavutil: 2017-10-21
>
>  API changes, most recent first:
>
> +2018-05-xx - xx - lavu 56.19.100 - hwcontext.h
> +  Add AVCUDAFramesContext and AVCUDAFramesContext.flags.
> +
>  2018-04-xx - xx - lavu 56.18.100 - pixdesc.h
>Add AV_PIX_FMT_FLAG_ALPHA to AV_PIX_FMT_PAL8.
>
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index 37827a770c..0d867ef0f5 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -83,6 +83,7 @@ static void cuda_buffer_free(void *opaque, uint8_t *data)
>  static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
>  {
>  AVHWFramesContext *ctx = opaque;
> +AVCUDAFramesContext *frctx = ctx->hwctx;
>  AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
>  CudaFunctions  *cu = hwctx->internal->cuda_dl;
>
> @@ -97,6 +98,10 @@ static AVBufferRef *cuda_pool_alloc(void *opaque, int
> size)
>  return NULL;
>  }
>
> +// A lot of places expect the pointer to be !=NULL, so make minimum
> allocation instead.
> +if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE)
> +size = 1;
> +
>  err = cu->cuMemAlloc(, size);
>  if (err != CUDA_SUCCESS)
>  goto fail;
> @@ -161,6 +166,7 @@ static int cuda_frames_init(AVHWFramesContext *ctx)
>
>  static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
>  {
> +AVCUDAFramesContext *frctx = ctx->hwctx;
>  int aligned_width;
>  int width_in_bytes = ctx->width;
>
> @@ -210,6 +216,9 @@ static int cuda_get_buffer(AVHWFramesContext *ctx,
> AVFrame *frame)
>  frame->width  = ctx->width;
>  frame->height = ctx->height;
>
> +if (frctx->flags & AV_CUDA_HWFRAMES_DUMMY_MODE)
> +frame->data[0] = frame->data[1] = frame->data[2] = NULL;
> +
>  return 0;
>  }
>
> @@ -402,6 +411,7 @@ const HWContextType ff_hwcontext_type_cuda = {
>  .name = "CUDA",
>
>  .device_hwctx_size= sizeof(AVCUDADeviceContext),
> +.frames_hwctx_size= sizeof(AVCUDAFramesContext),
>  .frames_priv_size = sizeof(CUDAFramesContext),
>
>  .device_create= cuda_device_create,
> diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h
> index 12dae8449e..388d6f8f1c 100644
> --- a/libavutil/hwcontext_cuda.h
> +++ b/libavutil/hwcontext_cuda.h
> @@ -45,7 +45,23 @@ typedef struct AVCUDADeviceContext {
>  } AVCUDADeviceContext;
>
>  /**
> - * AVHWFramesContext.hwctx is currently not used
> + * This struct is allocated as AVHWFramesContext.hwctx
>   */
> +typedef struct AVCUDAFramesContext {
> +/**
> + * Special implementation-specific flags.
> + *
> + * Must be set by the user before calling av_hwframe_ctx_init().
> + */
> +int flags;
> +} AVCUDAFramesContext;
> +
> +/**
> + * No actual allocation will happen, but otherwise behaves like normal.
> + *
> + * This is to be used if a AVHWFramesContext is required, but the actual
> + * allocation has to happen outside of it.
> + */
> +#define AV_CUDA_HWFRAMES_DUMMY_MODE (1 << 0)
>
>  #endif /* AVUTIL_HWCONTEXT_CUDA_H */
> diff --git a/libavutil/version.h b/libavutil/version.h
> index 5185454d9b..84409b1d69 100644
> --- a/libavutil/version.h
> +++ b/libavutil/version.h
> @@ -79,7 +79,7 @@
>   */
>
>  #define LIBAVUTIL_VERSION_MAJOR  56
> -#define LIBAVUTIL_VERSION_MINOR  18
> +#define LIBAVUTIL_VERSION_MINOR  19
>  #define LIBAVUTIL_VERSION_MICRO 100
>
>  #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
> --
>

I think the flag should be added in avcodec, maybe AVCodecContext.flags2,
becuase
1. This flag is used by cuda decoder only, but not works avfilter and other
cuda apis.
2. Other hw codecs may support referencing the decoded memory instead of
copying it, for example, mmal decoder. see
https://github.com/wang-bin/FFmpeg/commit/74390ec836743dd337c92dd5da5f4f9ff638316b#diff-26a23a40f16babe0068c7340c6160472
3. the flag can be turned on/off at any time
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] configure: fix clang-cl detection

2018-04-18 Thread Wang Bin
2018-04-18 16:27 GMT+08:00 Timo Rothenpieler <t...@rothenpieler.org>:

> On 18.04.2018 10:05, Wang Bin wrote:
> >>
> >>
> >> -elif $_cc -nologo- 2>&1 | grep -q Microsoft; then
> >> +elif $_cc -nologo- 2>&1 | grep -q Microsoft || $_cc -v 2>&1 | grep
> -q
> >> clang && $_cc -? > /dev/null 2>&1; then
> >>  _type=msvc
> >>  _ident=$($_cc 2>&1 | head -n1)
> >>  _DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< 2>&1 |
> >> awk '\''/including/ { sub(/^.*file: */, ""); gsub(/\\/, "/"); if
> >> (!match($$0, / /)) print "$@:", $$0 }'\'' > $(@:.o=.d)'
> >>
> >>
> > This breaks msvc build because msys's link.exe is tested instead of
> mslink
> > script
>
> Sounds more like an issue with your build environment to me? Make sure
> the msvc build tools take precedence in your PATH.
>
>
I build on windows. My environment is correct. $PATH always starts with
msys2 dirs. This is why mslink exists .
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] configure: fix clang-cl detection

2018-04-18 Thread Wang Bin
>
>
> -elif $_cc -nologo- 2>&1 | grep -q Microsoft; then
> +elif $_cc -nologo- 2>&1 | grep -q Microsoft || $_cc -v 2>&1 | grep -q
> clang && $_cc -? > /dev/null 2>&1; then
>  _type=msvc
>  _ident=$($_cc 2>&1 | head -n1)
>  _DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< 2>&1 |
> awk '\''/including/ { sub(/^.*file: */, ""); gsub(/\\/, "/"); if
> (!match($$0, / /)) print "$@:", $$0 }'\'' > $(@:.o=.d)'
>
>
This breaks msvc build because msys's link.exe is tested instead of mslink
script
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 13/14] mmal: add option copy_frame to support retrieving sw frames w/o copy

2017-12-16 Thread Wang Bin
2017-12-16 19:47 GMT+08:00 wm4 <nfx...@googlemail.com>:
> On Sat, 16 Dec 2017 13:48:05 +0800
> Wang Bin <wbse...@gmail.com> wrote:
>
>> 2017-12-16 2:50 GMT+08:00 wm4 <nfx...@googlemail.com>:
>> > On Fri, 15 Dec 2017 15:05:50 +0800
>> > wbse...@gmail.com wrote:
>> >
>> >> From: wang-bin <wbse...@gmail.com>
>> >>
>> >> mmal buffer->data is already in host memory. AFAIK decoders implemented 
>> >> in omx must
>> >> be configured to output frames to either memory or something directly 
>> >> used by renderer,
>> >> for example mediacodec surface, mmal buffer and omxil eglimage.
>> >> test result: big buck bunny 1080p fps increases from about 100 to 110 if 
>> >> copy_frame is
>> >> turned off
>> >> ---
>> >>  libavcodec/mmaldec.c | 31 +++
>> >>  1 file changed, 23 insertions(+), 8 deletions(-)
>> >>
>> >> diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c
>> >> index c1cfb09283..9cd6c6558f 100644
>> >> --- a/libavcodec/mmaldec.c
>> >> +++ b/libavcodec/mmaldec.c
>> >> @@ -69,6 +69,7 @@ typedef struct MMALDecodeContext {
>> >>  AVClass *av_class;
>> >>  int extra_buffers;
>> >>  int extra_decoder_buffers;
>> >> +int copy_frame;
>> >>
>> >>  MMAL_COMPONENT_T *decoder;
>> >>  MMAL_QUEUE_T *queue_decoded_frames;
>> >> @@ -139,7 +140,6 @@ static int ffmmal_set_ref(AVFrame *frame, FFPoolRef 
>> >> *pool,
>> >>  atomic_fetch_add_explicit(>pool->refcount, 1, 
>> >> memory_order_relaxed);
>> >>  mmal_buffer_header_acquire(buffer);
>> >>
>> >> -frame->format = AV_PIX_FMT_MMAL;
>> >>  frame->data[3] = (uint8_t *)ref->buffer;
>> >>  return 0;
>> >>  }
>> >> @@ -650,20 +650,34 @@ static int ffmal_copy_frame(AVCodecContext *avctx,  
>> >> AVFrame *frame,
>> >>
>> >>  if ((ret = ffmmal_set_ref(frame, ctx->pool_out, buffer)) < 0)
>> >>  goto done;
>> >> +frame->format = AV_PIX_FMT_MMAL;
>> >>  } else {
>> >>  int w = FFALIGN(avctx->width, 32);
>> >>  int h = FFALIGN(avctx->height, 16);
>> >>  uint8_t *src[4];
>> >>  int linesize[4];
>> >>
>> >> -if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
>> >> -goto done;
>> >> +if (ctx->copy_frame) {
>> >> +if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
>> >> +goto done;
>> >>
>> >> -av_image_fill_arrays(src, linesize,
>> >> - buffer->data + 
>> >> buffer->type->video.offset[0],
>> >> - avctx->pix_fmt, w, h, 1);
>> >> -av_image_copy(frame->data, frame->linesize, src, linesize,
>> >> -  avctx->pix_fmt, avctx->width, avctx->height);
>> >> +av_image_fill_arrays(src, linesize,
>> >> +buffer->data + 
>> >> buffer->type->video.offset[0],
>> >> +avctx->pix_fmt, w, h, 1);
>> >> +av_image_copy(frame->data, frame->linesize, src, linesize,
>> >> +avctx->pix_fmt, avctx->width, avctx->height);
>> >> +} else {
>> >> +if ((ret = ff_decode_frame_props(avctx, frame)) < 0)
>> >> +goto done;
>> >> +/* buffer->type->video.offset/pitch[i]; is always 0 */
>> >> +av_image_fill_arrays(src, linesize,
>> >> +buffer->data + 
>> >> buffer->type->video.offset[0],
>> >> +avctx->pix_fmt, w, h, 1);
>> >> +if ((ret = ffmmal_set_ref(frame, ctx->pool_out, buffer)) < 0)
>> >> +goto done;
>> >> +memcpy(frame->data, src, sizeof(src));
>> >> +memcpy(frame->linesize, linesize, sizeof(linesize));
>> >> +}
>> >>  }
>> >>
>> >>  frame->pts = buffer->pts == MMAL_TIME_UNKNOWN ? AV_N

Re: [FFmpeg-devel] [PATCH 07/14] mediacodec: check whether cropping is set before use

2017-12-16 Thread Wang Bin
2017-12-16 17:12 GMT+08:00 Matthieu Bouron <matthieu.bou...@gmail.com>:
> On Sat, Dec 16, 2017 at 01:40:18PM +0800, Wang Bin wrote:
>> > > +width = s->crop_right + 1 - s->crop_left;
>> > > +else
>> > > +width = s->width;
>> > >
>> > >  av_log(avctx, AV_LOG_INFO,
>> > >  "Output crop parameters top=%d bottom=%d left=%d right=%d, "
>> >
>> > On which device does this happen ?
>>
>>
>> None of my devices have such problem. It happens if replace jni by ndk
>> mediacodec functions(maybe another patch later). original code:
>> https://github.com/aosp-mirror/platform_frameworks_base/blob/master/media/java/android/media/MediaCodec.java#L190
>
> OK. I will soon apply the patch.
>
> I'm however not in favor of replacing the MediaCodec jni code by its ndk
> counterpart now as it would drop compatibility with Android 4.4.

Just load libmediandk.so and resolve symbols at runtime, and fallback
to jni if the library does not exist, i.e. on android < 5.0
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 14/14] videotoolbox: remove opengl compatibility attribute

2017-12-16 Thread Wang Bin
2017-12-16 15:33 GMT+08:00 Xiaolei Yu <dreifachst...@gmail.com>:
> On 12/16/2017 02:07 PM, Wang Bin wrote:
>> 2017-12-16 2:52 GMT+08:00 wm4 <nfx...@googlemail.com>:
>>> On Fri, 15 Dec 2017 15:02:44 +0800
>>> wbse...@gmail.com wrote:
>>>
>>>> From: wang-bin <wbse...@gmail.com>
>>>>
>>>> 1. a cvpixelbuffer backed by iosurface can always be converted to an 
>>>> opengl texture, using CGLTexImageIOSurface2D for macOS, and undocumented 
>>>> api texImageIOSurface(which is internally used by public api 
>>>> CVOpenGLESTextureCacheCreateTextureFromImage) for iOS4.0+.
>>>> 2. enabling the attribute can slow down decoding speed a lot. I tested 
>>>> many video clips on my macbook air. for example: ffmpeg -ss 00:00:00 -t 
>>>> 00:03:00 -hwaccel videotoolbox -an -i big_buck_bunny_1080p_h264.mov -f 
>>>> null ->/dev/null, result with the attribute
>>>> enabled: frame= 2082 fps= 85 q=-0.0 Lsize=N/A time=00:03:00.00 bitrate=N/A 
>>>> speed=7.34x
>>>> disabled: frame= 2031 fps=104 q=-0.0 Lsize=N/A time=00:03:00.00 
>>>> bitrate=N/A speed=9.22x
>>>> ---
>>>>  libavcodec/videotoolbox.c | 5 -
>>>>  1 file changed, 5 deletions(-)
>>>>
>>>> diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c
>>>> index 9d2f0afa20..24631684d7 100644
>>>> --- a/libavcodec/videotoolbox.c
>>>> +++ b/libavcodec/videotoolbox.c
>>>> @@ -664,11 +664,6 @@ static CFDictionaryRef 
>>>> videotoolbox_buffer_attributes_create(int width,
>>>>  CFDictionarySetValue(buffer_attributes, 
>>>> kCVPixelBufferIOSurfacePropertiesKey, io_surface_properties);
>>>>  CFDictionarySetValue(buffer_attributes, kCVPixelBufferWidthKey, w);
>>>>  CFDictionarySetValue(buffer_attributes, kCVPixelBufferHeightKey, h);
>>>> -#if TARGET_OS_IPHONE
>>>> -CFDictionarySetValue(buffer_attributes, 
>>>> kCVPixelBufferOpenGLESCompatibilityKey, kCFBooleanTrue);
>>>> -#else
>>>> -CFDictionarySetValue(buffer_attributes, 
>>>> kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey, kCFBooleanTrue);
>>>> -#endif
>>>>
>>>>  CFRelease(io_surface_properties);
>>>>  CFRelease(cv_pix_fmt);
>>>
>>> Does this have a negative effect on compatibility or performance? (In
>>> both cases I'm asking about the case when actually using GL rendering.)
>>
>> Disabling the attribute improves performance in my tests. I can not
>> find any document about these keys. What i know is the decoded
>> cvpixelbuffer is backed by iosurface, and the api to create texture
>> from iosurface is available since macOS10.6
>
> Maybe you can add a flag to make them optional?
> They are documented at:
> https://developer.apple.com/documentation/corevideo/kcvpixelbufferopenglescompatibilitykey
> https://developer.apple.com/documentation/corevideo/kcvpixelbufferiosurfaceopengltexturecompatibilitykey
>
> Things may still work without them but I would like to follow the 
> documentation whenever possible. And I think iOS DOES require the flag for 
> those buffers to be consumed through GLES.
>

Your links explain nothing. iOS does not need it as mentioned in the
patch. I tested on iphone4s+iOS9 and some new devices.

> Pure speculation here. GPU texture units usually require special memory 
> layouts that may be suboptimal for the decoder. When these constraints are 
> not met either the driver has to perform the conversion or you pay the 
> penalty at access time.

Not sure about memory layout. The fact is on mac the performance is
much better without it. No difference on ios.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 14/14] videotoolbox: remove opengl compatibility attribute

2017-12-15 Thread Wang Bin
2017-12-16 2:52 GMT+08:00 wm4 <nfx...@googlemail.com>:
> On Fri, 15 Dec 2017 15:02:44 +0800
> wbse...@gmail.com wrote:
>
>> From: wang-bin <wbse...@gmail.com>
>>
>> 1. a cvpixelbuffer backed by iosurface can always be converted to an opengl 
>> texture, using CGLTexImageIOSurface2D for macOS, and undocumented api 
>> texImageIOSurface(which is internally used by public api 
>> CVOpenGLESTextureCacheCreateTextureFromImage) for iOS4.0+.
>> 2. enabling the attribute can slow down decoding speed a lot. I tested many 
>> video clips on my macbook air. for example: ffmpeg -ss 00:00:00 -t 00:03:00 
>> -hwaccel videotoolbox -an -i big_buck_bunny_1080p_h264.mov -f null 
>> ->/dev/null, result with the attribute
>> enabled: frame= 2082 fps= 85 q=-0.0 Lsize=N/A time=00:03:00.00 bitrate=N/A 
>> speed=7.34x
>> disabled: frame= 2031 fps=104 q=-0.0 Lsize=N/A time=00:03:00.00 bitrate=N/A 
>> speed=9.22x
>> ---
>>  libavcodec/videotoolbox.c | 5 -
>>  1 file changed, 5 deletions(-)
>>
>> diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c
>> index 9d2f0afa20..24631684d7 100644
>> --- a/libavcodec/videotoolbox.c
>> +++ b/libavcodec/videotoolbox.c
>> @@ -664,11 +664,6 @@ static CFDictionaryRef 
>> videotoolbox_buffer_attributes_create(int width,
>>  CFDictionarySetValue(buffer_attributes, 
>> kCVPixelBufferIOSurfacePropertiesKey, io_surface_properties);
>>  CFDictionarySetValue(buffer_attributes, kCVPixelBufferWidthKey, w);
>>  CFDictionarySetValue(buffer_attributes, kCVPixelBufferHeightKey, h);
>> -#if TARGET_OS_IPHONE
>> -CFDictionarySetValue(buffer_attributes, 
>> kCVPixelBufferOpenGLESCompatibilityKey, kCFBooleanTrue);
>> -#else
>> -CFDictionarySetValue(buffer_attributes, 
>> kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey, kCFBooleanTrue);
>> -#endif
>>
>>  CFRelease(io_surface_properties);
>>  CFRelease(cv_pix_fmt);
>
> Does this have a negative effect on compatibility or performance? (In
> both cases I'm asking about the case when actually using GL rendering.)

Disabling the attribute improves performance in my tests. I can not
find any document about these keys. What i know is the decoded
cvpixelbuffer is backed by iosurface, and the api to create texture
from iosurface is available since macOS10.6
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 13/14] mmal: add option copy_frame to support retrieving sw frames w/o copy

2017-12-15 Thread Wang Bin
2017-12-16 2:50 GMT+08:00 wm4 <nfx...@googlemail.com>:
> On Fri, 15 Dec 2017 15:05:50 +0800
> wbse...@gmail.com wrote:
>
>> From: wang-bin <wbse...@gmail.com>
>>
>> mmal buffer->data is already in host memory. AFAIK decoders implemented in 
>> omx must
>> be configured to output frames to either memory or something directly used 
>> by renderer,
>> for example mediacodec surface, mmal buffer and omxil eglimage.
>> test result: big buck bunny 1080p fps increases from about 100 to 110 if 
>> copy_frame is
>> turned off
>> ---
>>  libavcodec/mmaldec.c | 31 +++
>>  1 file changed, 23 insertions(+), 8 deletions(-)
>>
>> diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c
>> index c1cfb09283..9cd6c6558f 100644
>> --- a/libavcodec/mmaldec.c
>> +++ b/libavcodec/mmaldec.c
>> @@ -69,6 +69,7 @@ typedef struct MMALDecodeContext {
>>  AVClass *av_class;
>>  int extra_buffers;
>>  int extra_decoder_buffers;
>> +int copy_frame;
>>
>>  MMAL_COMPONENT_T *decoder;
>>  MMAL_QUEUE_T *queue_decoded_frames;
>> @@ -139,7 +140,6 @@ static int ffmmal_set_ref(AVFrame *frame, FFPoolRef 
>> *pool,
>>  atomic_fetch_add_explicit(>pool->refcount, 1, 
>> memory_order_relaxed);
>>  mmal_buffer_header_acquire(buffer);
>>
>> -frame->format = AV_PIX_FMT_MMAL;
>>  frame->data[3] = (uint8_t *)ref->buffer;
>>  return 0;
>>  }
>> @@ -650,20 +650,34 @@ static int ffmal_copy_frame(AVCodecContext *avctx,  
>> AVFrame *frame,
>>
>>  if ((ret = ffmmal_set_ref(frame, ctx->pool_out, buffer)) < 0)
>>  goto done;
>> +frame->format = AV_PIX_FMT_MMAL;
>>  } else {
>>  int w = FFALIGN(avctx->width, 32);
>>  int h = FFALIGN(avctx->height, 16);
>>  uint8_t *src[4];
>>  int linesize[4];
>>
>> -if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
>> -goto done;
>> +if (ctx->copy_frame) {
>> +if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
>> +goto done;
>>
>> -av_image_fill_arrays(src, linesize,
>> - buffer->data + buffer->type->video.offset[0],
>> - avctx->pix_fmt, w, h, 1);
>> -av_image_copy(frame->data, frame->linesize, src, linesize,
>> -  avctx->pix_fmt, avctx->width, avctx->height);
>> +av_image_fill_arrays(src, linesize,
>> +buffer->data + 
>> buffer->type->video.offset[0],
>> +avctx->pix_fmt, w, h, 1);
>> +av_image_copy(frame->data, frame->linesize, src, linesize,
>> +avctx->pix_fmt, avctx->width, avctx->height);
>> +} else {
>> +if ((ret = ff_decode_frame_props(avctx, frame)) < 0)
>> +goto done;
>> +/* buffer->type->video.offset/pitch[i]; is always 0 */
>> +av_image_fill_arrays(src, linesize,
>> +buffer->data + 
>> buffer->type->video.offset[0],
>> +avctx->pix_fmt, w, h, 1);
>> +if ((ret = ffmmal_set_ref(frame, ctx->pool_out, buffer)) < 0)
>> +goto done;
>> +memcpy(frame->data, src, sizeof(src));
>> +memcpy(frame->linesize, linesize, sizeof(linesize));
>> +}
>>  }
>>
>>  frame->pts = buffer->pts == MMAL_TIME_UNKNOWN ? AV_NOPTS_VALUE : 
>> buffer->pts;
>> @@ -842,6 +856,7 @@ AVHWAccel ff_wmv3_mmal_hwaccel = {
>>  static const AVOption options[]={
>>  {"extra_buffers", "extra buffers", offsetof(MMALDecodeContext, 
>> extra_buffers), AV_OPT_TYPE_INT, {.i64 = 10}, 0, 256, 0},
>>  {"extra_decoder_buffers", "extra MMAL internal buffered frames", 
>> offsetof(MMALDecodeContext, extra_decoder_buffers), AV_OPT_TYPE_INT, {.i64 = 
>> 10}, 0, 256, 0},
>> +{"copy_frame", "copy deocded data to avframe", 
>> offsetof(MMALDecodeContext, copy_frame), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 
>> 256, 0},
>>  {NULL}
>>  };
>>
>
> Didn't check too closely what exactly the patch does, but adding an
> option for it sounds very wrong. The user select in the get_format
> callback whether a GPU surface is output (MMAL pixfmt), or software.

Avoid copying data from mmal buffer->data to avframe data. Instead,
just fill strides and address of each plane in avframe, and add a
reference to mmal buffer.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 07/14] mediacodec: check whether cropping is set before use

2017-12-15 Thread Wang Bin
> > +width = s->crop_right + 1 - s->crop_left;
> > +else
> > +width = s->width;
> >
> >  av_log(avctx, AV_LOG_INFO,
> >  "Output crop parameters top=%d bottom=%d left=%d right=%d, "
>
> On which device does this happen ?


None of my devices have such problem. It happens if replace jni by ndk
mediacodec functions(maybe another patch later). original code:
https://github.com/aosp-mirror/platform_frameworks_base/blob/master/media/java/android/media/MediaCodec.java#L190
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/mmaldec: use zero-copy for AV_PIX_FMT_MMAL

2017-11-02 Thread Wang Bin


?? Outlook for Android


From: ffmpeg-devel  on behalf of wm4 

Sent: Monday, September 4, 2017 6:04:22 PM
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH] avcodec/mmaldec: use zero-copy for 
AV_PIX_FMT_MMAL

On Sun,  3 Sep 2017 15:44:36 +0300
Yevhen Kyriukha  wrote:

> Signed-off-by: Yevhen Kyriukha 
> ---
>  libavcodec/mmaldec.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c
> index 0b1195dc3e..19ca6ce7e7 100644
> --- a/libavcodec/mmaldec.c
> +++ b/libavcodec/mmaldec.c
> @@ -295,6 +295,8 @@ static int ffmal_update_format(AVCodecContext *avctx)
>  goto fail;
>
>  if (avctx->pix_fmt == AV_PIX_FMT_MMAL) {
> +if ((status = mmal_port_parameter_set_boolean(decoder->output[0], 
> MMAL_PARAMETER_ZERO_COPY, 1)))
> +goto fail;
>  format_out->encoding = MMAL_ENCODING_OPAQUE;
>  } else {
>  format_out->encoding_variant = format_out->encoding = 
> MMAL_ENCODING_I420;
> @@ -332,7 +334,8 @@ static int ffmal_update_format(AVCodecContext *avctx)
>  FFMAX(decoder->output[0]->buffer_size_min, 
> decoder->output[0]->buffer_size_recommended);
>  decoder->output[0]->buffer_num =
>  FFMAX(decoder->output[0]->buffer_num_min, 
> decoder->output[0]->buffer_num_recommended) + ctx->extra_buffers;
> -ctx->pool_out->pool = mmal_pool_create(decoder->output[0]->buffer_num,
> +ctx->pool_out->pool = mmal_port_pool_create(decoder->output[0],
> +   decoder->output[0]->buffer_num,
> decoder->output[0]->buffer_size);
>  if (!ctx->pool_out->pool) {
>  ret = AVERROR(ENOMEM);

Why the change, and what are the implications? (The commit message
should contain this.)
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/mmaldec: use zero-copy for AV_PIX_FMT_MMAL

2017-11-02 Thread Wang Bin


获取 Outlook for Android<https://aka.ms/ghei36>



发件人: Wang Bin
发送时间: 11月2日星期四 16:40
主题: Re: [FFmpeg-devel] [PATCH] avcodec/mmaldec: use zero-copy for 
AV_PIX_FMT_MMAL
收件人: FFmpeg development discussions and patches




获取 Outlook for Android<https://aka.ms/ghei36>

From: ffmpeg-devel <ffmpeg-devel-boun...@ffmpeg.org> on behalf of wm4 
<nfx...@googlemail.com>
Sent: Monday, September 4, 2017 6:04:22 PM
To: ffmpeg-devel@ffmpeg.org
Subject: Re: [FFmpeg-devel] [PATCH] avcodec/mmaldec: use zero-copy for 
AV_PIX_FMT_MMAL

On Sun,  3 Sep 2017 15:44:36 +0300
Yevhen Kyriukha <kirg...@gmail.com> wrote:

> Signed-off-by: Yevhen Kyriukha <kirg...@gmail.com>
> ---
>  libavcodec/mmaldec.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c
> index 0b1195dc3e..19ca6ce7e7 100644
> --- a/libavcodec/mmaldec.c
> +++ b/libavcodec/mmaldec.c
> @@ -295,6 +295,8 @@ static int ffmal_update_format(AVCodecContext *avctx)
>  goto fail;
>
>  if (avctx->pix_fmt == AV_PIX_FMT_MMAL) {
> +if ((status = mmal_port_parameter_set_boolean(decoder->output[0], 
> MMAL_PARAMETER_ZERO_COPY, 1)))
> +goto fail;
>  format_out->encoding = MMAL_ENCODING_OPAQUE;
>  } else {
>  format_out->encoding_variant = format_out->encoding = 
> MMAL_ENCODING_I420;
> @@ -332,7 +334,8 @@ static int ffmal_update_format(AVCodecContext *avctx)
>  FFMAX(decoder->output[0]->buffer_size_min, 
> decoder->output[0]->buffer_size_recommended);
>  decoder->output[0]->buffer_num =
>  FFMAX(decoder->output[0]->buffer_num_min, 
> decoder->output[0]->buffer_num_recommended) + ctx->extra_buffers;
> -ctx->pool_out->pool = mmal_pool_create(decoder->output[0]->buffer_num,
> +ctx->pool_out->pool = mmal_port_pool_create(decoder->output[0],
> +   decoder->output[0]->buffer_num,
> decoder->output[0]->buffer_size);
>  if (!ctx->pool_out->pool) {
>  ret = AVERROR(ENOMEM);

Why the change, and what are the implications? (The commit message
should contain this.)
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] configure: clang -Oz for small size build to reduce size further

2017-03-13 Thread Wang Bin
http://stackoverflow.com/questions/15548023/clang-optimization-levels

-Oz disables -slp-vectorizer and -vectorize-loops
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] configure: clang -Oz for small size build to reduce size further

2017-03-12 Thread Wang Bin
How to benchmark codecs?
Here is my configuration and result size.  The total size of -Oz static or
dynamic libraries is about 1M smaller.

configure --disable-debug --enable-shared --enable-runtime-cpudetect
--enable-hwaccels --enable-avresample --disable-postproc
--install_name_dir=@rpath --enable-pic
--extra-cflags="-mmacosx-version-min=10.7"
--extra-ldflags="-mmacosx-version-min=10.7 -Wl,-rpath,@loader_path
-Wl,-rpath,@loader_path/../Frameworks -Wl,-rpath,@loader_path/lib
-Wl,-rpath,@loader_path/../lib" --enable-small

-Os
-rwxr-xr-x  1 501  20  10545284  3 10 11:11 ./libavcodec.57.64.101.dylib
-rw-r--r--  1 501  20  13434432  3 10 11:11 ./libavcodec.a
-rwxr-xr-x  1 501  20  57784  3 10 11:11 ./libavdevice.57.1.100.dylib
-rw-r--r--  1 501  20  53464  3 10 11:11 ./libavdevice.a
-rwxr-xr-x  1 501  20  1564396  3 10 11:11 ./libavfilter.6.65.100.dylib
-rw-r--r--  1 501  20  2268040  3 10 11:11 ./libavfilter.a
-rwxr-xr-x  1 501  20  1774664  3 10 11:11 ./libavformat.57.56.101.dylib
-rw-r--r--  1 501  20  2823792  3 10 11:11 ./libavformat.a
-rwxr-xr-x  1 501  20  117700  3 10 11:11 ./libavresample.3.1.0.dylib
-rw-r--r--  1 501  20  162760  3 10 11:11 ./libavresample.a
-rwxr-xr-x  1 501  20  298520  3 10 11:11 ./libavutil.55.34.101.dylib
-rw-r--r--  1 501  20  395512  3 10 11:11 ./libavutil.a
-rwxr-xr-x  1 501  20  106968  3 10 11:11 ./libswresample.2.3.100.dylib
-rw-r--r--  1 501  20  153400  3 10 11:11 ./libswresample.a
-rwxr-xr-x  1 501  20  497460  3 10 11:11 ./libswscale.4.2.100.dylib
-rw-r--r--  1 501  20  614184  3 10 11:11 ./libswscale.a

-Oz
-rwxr-xr-x  1 501  20  9665732  3 10 11:39 ./libavcodec.57.64.101.dylib
-rw-r--r--  1 501  20  12597816  3 10 11:39 ./libavcodec.a
-rwxr-xr-x  1 501  20  57736  3 10 11:39 ./libavdevice.57.1.100.dylib
-rw-r--r--  1 501  20  53376  3 10 11:39 ./libavdevice.a
-rwxr-xr-x  1 501  20  1515268  3 10 11:39 ./libavfilter.6.65.100.dylib
-rw-r--r--  1 501  20  2223024  3 10 11:39 ./libavfilter.a
-rwxr-xr-x  1 501  20  1733824  3 10 11:39 ./libavformat.57.56.101.dylib
-rw-r--r--  1 501  20  2787728  3 10 11:39 ./libavformat.a
-rwxr-xr-x  1 501  20  113652  3 10 11:39 ./libavresample.3.1.0.dylib
-rw-r--r--  1 501  20  160920  3 10 11:39 ./libavresample.a
-rwxr-xr-x  1 501  20  294424  3 10 11:39 ./libavutil.55.34.101.dylib
-rw-r--r--  1 501  20  390624  3 10 11:39 ./libavutil.a
-rwxr-xr-x  1 501  20  102864  3 10 11:39 ./libswresample.2.3.100.dylib
-rw-r--r--  1 501  20  148336  3 10 11:39 ./libswresample.a
-rwxr-xr-x  1 501  20  481020  3 10 11:39 ./libswscale.4.2.100.dylib
-rw-r--r--  1 501  20  599000  3 10 11:39 ./libswscale.a
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] configure: clang -Oz for small size build to reduce size further

2017-03-09 Thread Wang Bin



0001-configure-clang-Oz-for-small-size-build-to-reduce-si.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] lavc/videotoolbox: set kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey

2017-03-03 Thread Wang Bin
From 011d03c4d2b6b138de539dcf5019169781ee7fb2 Mon Sep 17 00:00:00 2001
From: wang-bin <wbse...@gmail.com>
Date: Fri, 3 Mar 2017 18:10:54 +0800
Subject: [PATCH] avcodec/videotoolbox: set
 kCVPixelBufferOpenGLESCompatibilityKey for iOS

kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey is not available in
iOS
---
 libavcodec/videotoolbox.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/videotoolbox.c b/libavcodec/videotoolbox.c
index 824f2d8e68..9199b02055 100644
--- a/libavcodec/videotoolbox.c
+++ b/libavcodec/videotoolbox.c
@@ -31,6 +31,7 @@
 #include "bytestream.h"
 #include "h264dec.h"
 #include "mpegvideo.h"
+#include 

 #ifndef kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder
 #  define
kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder
CFSTR("RequireHardwareAcceleratedVideoDecoder")
@@ -477,7 +478,11 @@ static CFDictionaryRef
videotoolbox_buffer_attributes_create(int width,
 CFDictionarySetValue(buffer_attributes,
kCVPixelBufferIOSurfacePropertiesKey, io_surface_properties);
 CFDictionarySetValue(buffer_attributes, kCVPixelBufferWidthKey, w);
 CFDictionarySetValue(buffer_attributes, kCVPixelBufferHeightKey, h);
+#if TARGET_OS_IPHONE
+CFDictionarySetValue(buffer_attributes,
kCVPixelBufferOpenGLESCompatibilityKey, kCFBooleanTrue);
+#else
 CFDictionarySetValue(buffer_attributes,
kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey, kCFBooleanTrue);
+#endif

 CFRelease(io_surface_properties);
 CFRelease(cv_pix_fmt);
-- 
2.11.1
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] lavc/videotoolbox: set kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey

2017-03-03 Thread Wang Bin
>
> So what happens on iOS with the current code?


Compile error. VLC uses kCVPixelBufferOpenGLESCompatibilityKey for iOS and
kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey for OSX. Should I send
a patch?
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] lavc/videotoolbox: set kCVPixelBufferIOSurfaceOpenGLTextureCompatibilityKey

2017-03-02 Thread Wang Bin
It's macOS only. For iOS, we can use kCVPixelBufferOpenGLESCompatibilityKey
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] mac, ios clock_gettime patch

2017-02-04 Thread Wang Bin
>
> So how is it possible that clock_gettime() must not be defined to allow
> starting the executable, but this object has to be?


Because I build ffmpeg with cc and linker flag "-mmacosx-version-min=10.7"
to support lower os versions. If no such flag, the target os is the sdk
version you use, e.g. 10.x. Then the symbol is not compiled as a weak
symbol and it must be resolved at runtime.
You can see all weak symbols using `nm -mg libavcodec.dylib |grep -E
"undefined.*weak"`
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] mac, ios clock_gettime patch

2017-02-04 Thread Wang Bin
>
> dyld: Symbol not found: _kVTCompressionPropertyKey_H264EntropyMode


What system? kVTCompressionPropertyKey_H264EntropyMode is available since
macOS 10.9 and iOS 10.8. If targeting older system, we have to define it's
value
like kVTVideoDecoderSpecification_RequireHardwareAcceleratedVideoDecoder in
videotoolbox.c
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] mac, ios clock_gettime patch

2017-01-08 Thread Wang Bin
ping

2017-01-03 14:26 GMT+08:00 Wang Bin <wbse...@gmail.com>:

> If ffmpeg is build with iOS 10 sdk, program will crash on iOS 9. This
> patch fixes the crash.
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] mac, ios clock_gettime patch

2017-01-02 Thread Wang Bin
If ffmpeg is build with iOS 10 sdk, program will crash on iOS 9. This patch
fixes the crash.


0001-check-clock_gettime-at-runtime-for-apple-platforms.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] add missing hw pixel format in mediacodec decoder

2016-11-24 Thread Wang Bin
Sorry, please ignore this patch. I forgot to add a suffix "_mediacodec" in
codec name.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] add missing hw pixel format in mediacodec decoder

2016-11-24 Thread Wang Bin
this attachment is not base64 encoded


0001-add-missing-hw-pixel-format-in-mediacodec-decoder.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] add missing hw pixel format in mediacodec decoder

2016-11-24 Thread Wang Bin
This can fix AV_PIX_FMT_MEDIACODEC does not appear in get_format


0001-add-missing-hw-pixel-format-in-mediacodec-decoder.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] FFmpeg 3.1 name

2016-06-23 Thread Wang Bin
What about choosing a scientist who was born in FFmpeg release month?
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH][RFC] avcodec: disallow hwaccel with frame threads

2015-10-22 Thread Wang Bin
VLC is using frame threading with hwaccel
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] winrt: multithreading support

2015-10-14 Thread Wang Bin



0001-winrt-multithreading-support.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] winrt: multithreading support

2015-10-09 Thread Wang Bin
I will update the patch next week
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] winrt: multithreading support

2015-10-01 Thread Wang Bin
Seems that the function style macro is always evaluated. So gcc gives an
error at WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) if
WINAPI_FAMILY_PARTITION is not defined. Now I copy the macros from
configure.


0001-winrt-multithreading-support.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] winrt: multithreading support

2015-09-30 Thread Wang Bin
>
> with minng64:
> compat/w32pthreads.h:40:55: error: missing binary operator before token "("


Which toolchain? What host os? Usually I use the toolchain from
https://sourceforge.net/projects/mingw-w64/files/Toolchains%20targetting%20Win32/Personal%20Builds/mingw-builds/
I can try mingw about 24h later on windows. I tested on osx with
i686-w64-mingw32-gcc 4.9 just now and no such error.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] winrt: multithreading support

2015-09-29 Thread wang-bin
---
 compat/w32pthreads.h | 11 +++
 configure|  4 
 libavutil/cpu.c  | 11 ++-
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/compat/w32pthreads.h b/compat/w32pthreads.h
index 87e816f..9828e8a 100644
--- a/compat/w32pthreads.h
+++ b/compat/w32pthreads.h
@@ -37,7 +37,13 @@
 
 #define WIN32_LEAN_AND_MEAN
 #include 
+#if defined(WINAPI_FAMILY) && 
!WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#define TARGET_OS_WINRT
+#endif
+#ifndef TARGET_OS_WINRT
 #include 
+#endif
+
 
 #include "libavutil/attributes.h"
 #include "libavutil/common.h"
@@ -82,8 +88,13 @@ static av_unused int pthread_create(pthread_t *thread, const 
void *unused_attr,
 {
 thread->func   = start_routine;
 thread->arg= arg;
+#ifndef TARGET_OS_WINRT
 thread->handle = (void*)_beginthreadex(NULL, 0, win32thread_worker, thread,
0, NULL);
+#else
+thread->handle = (void*)CreateThread(NULL, 0, win32thread_worker, thread,
+   0, NULL);
+#endif
 return !thread->handle;
 }
 
diff --git a/configure b/configure
index 01f1797..4a7cdff 100755
--- a/configure
+++ b/configure
@@ -5189,6 +5189,10 @@ check_type "vdpau/vdpau.h" "VdpPictureInfoHEVC"
 if ! disabled w32threads && ! enabled pthreads; then
 check_func_headers "windows.h process.h" _beginthreadex &&
 enable w32threads || disable w32threads
+if ! enabled w32threads; then
+check_func_headers "windows.h" CreateThread &&
+enable w32threads || disable w32threads
+fi
 fi
 
 # check for some common methods of building with pthread support
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 780368d..5968bc5 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -30,8 +30,11 @@
 #endif
 #include 
 #endif
-#if HAVE_GETPROCESSAFFINITYMASK
+#if HAVE_WINDOWS_H
 #include 
+#if defined(WINAPI_FAMILY) && 
!WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+#define TARGET_OS_WINRT
+#endif
 #endif
 #if HAVE_SYSCTL
 #if HAVE_SYS_PARAM_H
@@ -253,6 +256,9 @@ int av_cpu_count(void)
 static volatile int printed;
 
 int nb_cpus = 1;
+#ifdef TARGET_OS_WINRT
+SYSTEM_INFO sysinfo;
+#endif
 #if HAVE_SCHED_GETAFFINITY && defined(CPU_COUNT)
 cpu_set_t cpuset;
 
@@ -274,6 +280,9 @@ int av_cpu_count(void)
 nb_cpus = sysconf(_SC_NPROC_ONLN);
 #elif HAVE_SYSCONF && defined(_SC_NPROCESSORS_ONLN)
 nb_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+#elif defined(TARGET_OS_WINRT)
+GetNativeSystemInfo();
+nb_cpus = sysinfo.dwNumberOfProcessors;
 #endif
 
 if (!printed) {
-- 
2.1.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel