Re: [FFmpeg-devel] [PATCH V2 2/2] libswscale/x86/yuv2rgb: add ssse3 version

2019-12-03 Thread Fu, Ting


> -Original Message-
> From: ffmpeg-devel  On Behalf Of
> Michael Niedermayer
> Sent: Tuesday, December 3, 2019 04:11 PM
> To: FFmpeg development discussions and patches 
> Subject: Re: [FFmpeg-devel] [PATCH V2 2/2] libswscale/x86/yuv2rgb: add ssse3
> version
> 
> On Mon, Dec 02, 2019 at 11:12:42AM +0800, Ting Fu wrote:
> > Tested using this command:
> > /ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \ -vcodec
> > rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null
> >
> > The fps increase from 389 to 640 on my local machine.
> >
> > Signed-off-by: Ting Fu 
> > ---
> >  libswscale/x86/yuv2rgb.c  |   8 +-
> >  libswscale/x86/yuv2rgb_template.c |  58 ++-
> >  libswscale/x86/yuv_2_rgb.asm  | 162 +++---
> >  3 files changed, 209 insertions(+), 19 deletions(-)
> 
> one of these patches seems to produce new warnings like:
> libswscale/x86/yuv2rgb_template.c: In function ‘yuv420_rgb15’:
> libswscale/x86/yuv2rgb_template.c:113:5: warning: passing argument 5 of
> ‘ff_yuv_420_rgb15_ssse3’ from
> 

Hi Michael,

This warning is because that the type of one formal parameter in 
ff_yuv_420_rgbXX_() has been set as uint8_t.
But it is uint64_t actually. I have corrected it in PATCH V3.
Thank you for your review, I would pay more attention to the warning.

Ting Fu

> 
> 
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
> 
> The real ebay dictionary, page 2
> "100% positive feedback" - "All either got their money back or didnt complain"
> "Best seller ever, very honest" - "Seller refunded buyer after failed scam"
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH V2 2/2] libswscale/x86/yuv2rgb: add ssse3 version

2019-12-03 Thread Michael Niedermayer
On Mon, Dec 02, 2019 at 11:12:42AM +0800, Ting Fu wrote:
> Tested using this command:
> /ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
> -vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null
> 
> The fps increase from 389 to 640 on my local machine.
> 
> Signed-off-by: Ting Fu 
> ---
>  libswscale/x86/yuv2rgb.c  |   8 +-
>  libswscale/x86/yuv2rgb_template.c |  58 ++-
>  libswscale/x86/yuv_2_rgb.asm  | 162 +++---
>  3 files changed, 209 insertions(+), 19 deletions(-)

one of these patches seems to produce new warnings like:
libswscale/x86/yuv2rgb_template.c: In function ‘yuv420_rgb15’:
libswscale/x86/yuv2rgb_template.c:113:5: warning: passing argument 5 of 
‘ff_yuv_420_rgb15_ssse3’ from 



[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The real ebay dictionary, page 2
"100% positive feedback" - "All either got their money back or didnt complain"
"Best seller ever, very honest" - "Seller refunded buyer after failed scam"


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-devel] [PATCH V2 2/2] libswscale/x86/yuv2rgb: add ssse3 version

2019-12-01 Thread Ting Fu
Tested using this command:
/ffmpeg -pix_fmt yuv420p -s 1920*1080 -i ArashRawYuv420.yuv \
-vcodec rawvideo -s 1920*1080 -pix_fmt rgb24 -f null /dev/null

The fps increase from 389 to 640 on my local machine.

Signed-off-by: Ting Fu 
---
 libswscale/x86/yuv2rgb.c  |   8 +-
 libswscale/x86/yuv2rgb_template.c |  58 ++-
 libswscale/x86/yuv_2_rgb.asm  | 162 +++---
 3 files changed, 209 insertions(+), 19 deletions(-)

diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c
index ed9b613cab..b83dd7089a 100644
--- a/libswscale/x86/yuv2rgb.c
+++ b/libswscale/x86/yuv2rgb.c
@@ -61,13 +61,19 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 
0x0707070707070707ULL;
 #define COMPILE_TEMPLATE_MMXEXT 1
 #endif /* HAVE_MMXEXT */
 
+//SSSE3 versions
+#if HAVE_SSSE3
+#define COMPILE_TEMPLATE_SSSE3 1
+#endif
+
 #include "yuv2rgb_template.c"
 
 av_cold SwsFunc ff_yuv2rgb_init_x86(SwsContext *c)
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags)) {
+if (EXTERNAL_MMX(cpu_flags) || EXTERNAL_MMXEXT(cpu_flags) ||
+EXTERNAL_SSSE3(cpu_flags)) {
 switch (c->dstFormat) {
 case AV_PIX_FMT_RGB32:
 if (c->srcFormat == AV_PIX_FMT_YUVA420P) {
diff --git a/libswscale/x86/yuv2rgb_template.c 
b/libswscale/x86/yuv2rgb_template.c
index efe6356f30..fe586047f0 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -40,6 +40,30 @@
 const uint8_t *pv = src[2] +   (y >> vshift) * srcStride[2]; \
 x86_reg index = -h_size / 2; \
 
+extern void ff_yuv_420_rgb24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr24_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb15_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb16_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuv_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+   const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+   const uint8_t *py_2index);
+extern void ff_yuva_420_rgb32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
+extern void ff_yuva_420_bgr32_ssse3(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
+const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
+const uint8_t *py_2index, const uint8_t 
*pa_2index);
 extern void ff_yuv_420_rgb24_mmxext(x86_reg index, uint8_t *image, const 
uint8_t *pu_index,
 const uint8_t *pv_index, const uint8_t 
*pointer_c_dither,
 const uint8_t *py_2index);
@@ -84,7 +108,12 @@ static inline int yuv420_rgb15(SwsContext *c, const uint8_t 
*src[],
 c->greenDither = ff_dither8[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb15_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb15_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -102,7 +131,12 @@ static inline int yuv420_rgb16(SwsContext *c, const 
uint8_t *src[],
 c->greenDither = ff_dither4[y   & 1];
 c->redDither   = ff_dither8[(y + 1) & 1];
 #endif
+
+#if COMPILE_TEMPLATE_SSSE3
+ff_yuv_420_rgb16_ssse3(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#else
 ff_yuv_420_rgb16_mmx(index, image, pu - index, pv - index, 
&(c->redDither), py - 2 * index);
+#endif
 }
 return srcSliceH;
 }
@@ -115,7 +149,9 @@ static inline int yuv420_rgb24(SwsContext *c, const uint8_t 
*src[],
 int