Re: [FFmpeg-devel] [PATCH 2/3] swscale: Add support for NV24 and NV42

2019-05-12 Thread Philip Langdale
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA256

On Sun, 12 May 2019 12:54:24 +0200
Michael Niedermayer  wrote:

> 
> all these + 1 in the code above look a bit suspect. Can you explain
> what they do assuming they are intended

Good catch. I should have removed them when I removed the divide by 2.
 
> [...]
> > diff --git a/libswscale/utils.c b/libswscale/utils.c
> > index df68bcc0d9..1b1f779532 100644
> > --- a/libswscale/utils.c
> > +++ b/libswscale/utils.c
> > @@ -264,6 +264,8 @@ static const FormatEntry
> > format_entries[AV_PIX_FMT_NB] = { [AV_PIX_FMT_YUVA422P12LE] = { 1,
> > 1 }, [AV_PIX_FMT_YUVA444P12BE] = { 1, 1 },
> >  [AV_PIX_FMT_YUVA444P12LE] = { 1, 1 },
> > +[AV_PIX_FMT_NV24]= { 1, 1 },
> > +[AV_PIX_FMT_NV42]= { 1, 1 },  
> 
> nitpick: this can be aligned prettier

They are aligned to the primary alignment within the list. The lines
above are exceptions to the existing pattern due to their length.

Thanks,

- --phil
-BEGIN PGP SIGNATURE-

iHUEARYIAB0WIQRokRbWmcX6x+Nv+3hgE8jODULZ6QUCXNgw0gAKCRBgE8jODULZ
6ZHpAPoDp3JcLdqjlWr/mo0luxS0UNHwonKuFU04Dpz8FYkw9AD8DL1DC4czUMT7
qPodwqeGY6xKpeNwf6f7OL78pHP3FAM=
=+hv7
-END PGP SIGNATURE-
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

Re: [FFmpeg-devel] [PATCH 2/3] swscale: Add support for NV24 and NV42

2019-05-12 Thread Michael Niedermayer
On Sat, May 11, 2019 at 11:31:56AM -0700, Philip Langdale wrote:

> For the sake of completeness, I'm added NV24/NV42 support to swscale,
> but the specific use-case I noted when adding the pixel formats
> doesn't require swscale support (because it's OpenGL interop).

not sure this adds value to the commit message / would help
someone reading this commit in the future


> 
> The implementation is pretty straight-forward. Most of the existing
> NV12 codepaths work regardless of subsampling and are re-used as is.
> Where necessary I wrote the slightly different NV24 versions.
> 
> Finally, the one thing that confused me for a long time was the
> asm specific x86 path that did an explicit exclusion check for NV12.
> I replaced that with a semi-planar check and also updated the
> equivalent PPC code, which Lauri kindly checked.
> 
> Signed-off-by: Philip Langdale 
> ---
>  libswscale/input.c   |  2 +
>  libswscale/output.c  |  6 ++-
>  libswscale/ppc/swscale_altivec.c |  3 +-
>  libswscale/ppc/swscale_vsx.c |  3 +-
>  libswscale/swscale_unscaled.c| 51 
>  libswscale/utils.c   |  2 +
>  libswscale/version.h |  2 +-
>  libswscale/x86/swscale_template.c|  4 +-
>  tests/ref/fate/filter-pixfmts-copy   |  2 +
>  tests/ref/fate/filter-pixfmts-crop   |  2 +
>  tests/ref/fate/filter-pixfmts-field  |  2 +
>  tests/ref/fate/filter-pixfmts-fieldorder |  2 +
>  tests/ref/fate/filter-pixfmts-hflip  |  2 +
>  tests/ref/fate/filter-pixfmts-il |  2 +
>  tests/ref/fate/filter-pixfmts-null   |  2 +
>  tests/ref/fate/filter-pixfmts-pad|  2 +
>  tests/ref/fate/filter-pixfmts-scale  |  2 +
>  tests/ref/fate/filter-pixfmts-transpose  |  2 +
>  tests/ref/fate/filter-pixfmts-vflip  |  2 +
>  tests/ref/fate/sws-pixdesc-query |  6 +++
>  20 files changed, 92 insertions(+), 9 deletions(-)
> 
> diff --git a/libswscale/input.c b/libswscale/input.c
> index c2dc356b5d..064f8da314 100644
> --- a/libswscale/input.c
> +++ b/libswscale/input.c
> @@ -1020,9 +1020,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
>  c->chrToYV12 = uyvyToUV_c;
>  break;
>  case AV_PIX_FMT_NV12:
> +case AV_PIX_FMT_NV24:
>  c->chrToYV12 = nv12ToUV_c;
>  break;
>  case AV_PIX_FMT_NV21:
> +case AV_PIX_FMT_NV42:
>  c->chrToYV12 = nv21ToUV_c;
>  break;
>  case AV_PIX_FMT_RGB8:
> diff --git a/libswscale/output.c b/libswscale/output.c
> index d3401f0cd1..26b0ff3d48 100644
> --- a/libswscale/output.c
> +++ b/libswscale/output.c
> @@ -410,7 +410,8 @@ static void yuv2nv12cX_c(SwsContext *c, const int16_t 
> *chrFilter, int chrFilterS
>  const uint8_t *chrDither = c->chrDither8;
>  int i;
>  
> -if (dstFormat == AV_PIX_FMT_NV12)
> +if (dstFormat == AV_PIX_FMT_NV12 ||
> +dstFormat == AV_PIX_FMT_NV24)
>  for (i=0; i  int u = chrDither[i & 7] << 12;
>  int v = chrDither[(i + 3) & 7] << 12;
> @@ -2496,7 +2497,8 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c,
>  } else {
>  *yuv2plane1 = yuv2plane1_8_c;
>  *yuv2planeX = yuv2planeX_8_c;
> -if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21)
> +if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21 ||
> +dstFormat == AV_PIX_FMT_NV24 || dstFormat == AV_PIX_FMT_NV42)
>  *yuv2nv12cX = yuv2nv12cX_c;
>  }
>  
> diff --git a/libswscale/ppc/swscale_altivec.c 
> b/libswscale/ppc/swscale_altivec.c
> index 3cd9782da4..6b8cc2c194 100644
> --- a/libswscale/ppc/swscale_altivec.c
> +++ b/libswscale/ppc/swscale_altivec.c
> @@ -247,8 +247,7 @@ av_cold void ff_sws_init_swscale_ppc(SwsContext *c)
>  if (c->srcBpc == 8 && c->dstBpc <= 14) {
>  c->hyScale = c->hcScale = hScale_real_altivec;
>  }
> -if (!is16BPS(dstFormat) && !isNBPS(dstFormat) &&
> -dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
> +if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && 
> !isSemiPlanarYUV(dstFormat) &&
>  dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != 
> AV_PIX_FMT_GRAYF32LE &&
>  !c->needAlpha) {
>  c->yuv2planeX = yuv2planeX_altivec;
> diff --git a/libswscale/ppc/swscale_vsx.c b/libswscale/ppc/swscale_vsx.c
> index a617f76741..75dee5ea58 100644
> --- a/libswscale/ppc/swscale_vsx.c
> +++ b/libswscale/ppc/swscale_vsx.c
> @@ -2096,8 +2096,7 @@ av_cold void ff_sws_init_swscale_vsx(SwsContext *c)
>   : hScale16To15_vsx;
>  }
>  }
> -if (!is16BPS(dstFormat) && !isNBPS(dstFormat) &&
> -dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
> +if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && 
> !isSemiPlanarYUV(dstFormat) &&
>  dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != 
> 

[FFmpeg-devel] [PATCH 2/3] swscale: Add support for NV24 and NV42

2019-05-11 Thread Philip Langdale
For the sake of completeness, I'm added NV24/NV42 support to swscale,
but the specific use-case I noted when adding the pixel formats
doesn't require swscale support (because it's OpenGL interop).

The implementation is pretty straight-forward. Most of the existing
NV12 codepaths work regardless of subsampling and are re-used as is.
Where necessary I wrote the slightly different NV24 versions.

Finally, the one thing that confused me for a long time was the
asm specific x86 path that did an explicit exclusion check for NV12.
I replaced that with a semi-planar check and also updated the
equivalent PPC code, which Lauri kindly checked.

Signed-off-by: Philip Langdale 
---
 libswscale/input.c   |  2 +
 libswscale/output.c  |  6 ++-
 libswscale/ppc/swscale_altivec.c |  3 +-
 libswscale/ppc/swscale_vsx.c |  3 +-
 libswscale/swscale_unscaled.c| 51 
 libswscale/utils.c   |  2 +
 libswscale/version.h |  2 +-
 libswscale/x86/swscale_template.c|  4 +-
 tests/ref/fate/filter-pixfmts-copy   |  2 +
 tests/ref/fate/filter-pixfmts-crop   |  2 +
 tests/ref/fate/filter-pixfmts-field  |  2 +
 tests/ref/fate/filter-pixfmts-fieldorder |  2 +
 tests/ref/fate/filter-pixfmts-hflip  |  2 +
 tests/ref/fate/filter-pixfmts-il |  2 +
 tests/ref/fate/filter-pixfmts-null   |  2 +
 tests/ref/fate/filter-pixfmts-pad|  2 +
 tests/ref/fate/filter-pixfmts-scale  |  2 +
 tests/ref/fate/filter-pixfmts-transpose  |  2 +
 tests/ref/fate/filter-pixfmts-vflip  |  2 +
 tests/ref/fate/sws-pixdesc-query |  6 +++
 20 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/libswscale/input.c b/libswscale/input.c
index c2dc356b5d..064f8da314 100644
--- a/libswscale/input.c
+++ b/libswscale/input.c
@@ -1020,9 +1020,11 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c)
 c->chrToYV12 = uyvyToUV_c;
 break;
 case AV_PIX_FMT_NV12:
+case AV_PIX_FMT_NV24:
 c->chrToYV12 = nv12ToUV_c;
 break;
 case AV_PIX_FMT_NV21:
+case AV_PIX_FMT_NV42:
 c->chrToYV12 = nv21ToUV_c;
 break;
 case AV_PIX_FMT_RGB8:
diff --git a/libswscale/output.c b/libswscale/output.c
index d3401f0cd1..26b0ff3d48 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -410,7 +410,8 @@ static void yuv2nv12cX_c(SwsContext *c, const int16_t 
*chrFilter, int chrFilterS
 const uint8_t *chrDither = c->chrDither8;
 int i;
 
-if (dstFormat == AV_PIX_FMT_NV12)
+if (dstFormat == AV_PIX_FMT_NV12 ||
+dstFormat == AV_PIX_FMT_NV24)
 for (i=0; isrcBpc == 8 && c->dstBpc <= 14) {
 c->hyScale = c->hcScale = hScale_real_altivec;
 }
-if (!is16BPS(dstFormat) && !isNBPS(dstFormat) &&
-dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
+if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && 
!isSemiPlanarYUV(dstFormat) &&
 dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE 
&&
 !c->needAlpha) {
 c->yuv2planeX = yuv2planeX_altivec;
diff --git a/libswscale/ppc/swscale_vsx.c b/libswscale/ppc/swscale_vsx.c
index a617f76741..75dee5ea58 100644
--- a/libswscale/ppc/swscale_vsx.c
+++ b/libswscale/ppc/swscale_vsx.c
@@ -2096,8 +2096,7 @@ av_cold void ff_sws_init_swscale_vsx(SwsContext *c)
  : hScale16To15_vsx;
 }
 }
-if (!is16BPS(dstFormat) && !isNBPS(dstFormat) &&
-dstFormat != AV_PIX_FMT_NV12 && dstFormat != AV_PIX_FMT_NV21 &&
+if (!is16BPS(dstFormat) && !isNBPS(dstFormat) && 
!isSemiPlanarYUV(dstFormat) &&
 dstFormat != AV_PIX_FMT_GRAYF32BE && dstFormat != AV_PIX_FMT_GRAYF32LE 
&&
 !c->needAlpha) {
 c->yuv2planeX = yuv2planeX_vsx;
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index be04a236d8..d7cc0bd4c5 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -180,6 +180,47 @@ static int nv12ToPlanarWrapper(SwsContext *c, const 
uint8_t *src[],
 return srcSliceH;
 }
 
+static int planarToNv24Wrapper(SwsContext *c, const uint8_t *src[],
+   int srcStride[], int srcSliceY,
+   int srcSliceH, uint8_t *dstParam[],
+   int dstStride[])
+{
+uint8_t *dst = dstParam[1] + dstStride[1] * srcSliceY;
+
+copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
+  dstParam[0], dstStride[0]);
+
+if (c->dstFormat == AV_PIX_FMT_NV24)
+interleaveBytes(src[1], src[2], dst, c->chrSrcW, (srcSliceH + 1),
+srcStride[1], srcStride[2], dstStride[1]);
+else
+interleaveBytes(src[2], src[1], dst, c->chrSrcW, (srcSliceH + 1),
+srcStride[2], srcStride[1], dstStride[1]);
+
+return srcSliceH;
+}
+
+static int