[FFmpeg-devel] [PATCH 2/3] avfilter/vf_yadif_cuda: CUDA accelerated deinterlacer

2018-11-01 Thread Philip Langdale
Signed-off-by: Philip Langdale 
---
 Changelog|   1 +
 configure|   1 +
 doc/filters.texi |  58 +
 libavfilter/Makefile |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/vf_yadif_cuda.c  | 426 +++
 libavfilter/vf_yadif_cuda.cu | 296 
 7 files changed, 784 insertions(+)
 create mode 100644 libavfilter/vf_yadif_cuda.c
 create mode 100644 libavfilter/vf_yadif_cuda.cu

diff --git a/Changelog b/Changelog
index 8430da3c6a..f92886fc2e 100644
--- a/Changelog
+++ b/Changelog
@@ -44,6 +44,7 @@ version 4.1:
 - xstack filter
 - pcm vidc decoder and encoder
 - (a)graphmonitor filter
+- yadif_cuda filter
 
 
 version 4.0:
diff --git a/configure b/configure
index 2606b885b0..f3fa0cde86 100755
--- a/configure
+++ b/configure
@@ -3482,6 +3482,7 @@ zscale_filter_deps="libzimg const_nan"
 scale_vaapi_filter_deps="vaapi"
 vpp_qsv_filter_deps="libmfx"
 vpp_qsv_filter_select="qsvvpp"
+yadif_cuda_filter_deps="cuda_sdk"
 
 # examples
 avio_dir_cmd_deps="avformat avutil"
diff --git a/doc/filters.texi b/doc/filters.texi
index 4345a4931b..5d4bfd2e8e 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -17943,6 +17943,64 @@ filter").
 It accepts the following parameters:
 
 
+@table @option
+
+@item mode
+The interlacing mode to adopt. It accepts one of the following values:
+
+@table @option
+@item 0, send_frame
+Output one frame for each frame.
+@item 1, send_field
+Output one frame for each field.
+@item 2, send_frame_nospatial
+Like @code{send_frame}, but it skips the spatial interlacing check.
+@item 3, send_field_nospatial
+Like @code{send_field}, but it skips the spatial interlacing check.
+@end table
+
+The default value is @code{send_frame}.
+
+@item parity
+The picture field parity assumed for the input interlaced video. It accepts one
+of the following values:
+
+@table @option
+@item 0, tff
+Assume the top field is first.
+@item 1, bff
+Assume the bottom field is first.
+@item -1, auto
+Enable automatic detection of field parity.
+@end table
+
+The default value is @code{auto}.
+If the interlacing is unknown or the decoder does not export this information,
+top field first will be assumed.
+
+@item deint
+Specify which frames to deinterlace. Accept one of the following
+values:
+
+@table @option
+@item 0, all
+Deinterlace all frames.
+@item 1, interlaced
+Only deinterlace frames marked as interlaced.
+@end table
+
+The default value is @code{all}.
+@end table
+
+@section yadif_cuda
+
+Deinterlace the input video using the @ref{yadif} algorithm, but implemented
+in CUDA so that it can work as part of a GPU accelerated pipeline with nvdec
+and/or nvenc.
+
+It accepts the following parameters:
+
+
 @table @option
 
 @item mode
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index ffbcb40806..4b78b29fad 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -408,6 +408,7 @@ OBJS-$(CONFIG_WEAVE_FILTER)  += vf_weave.o
 OBJS-$(CONFIG_XBR_FILTER)+= vf_xbr.o
 OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o
 OBJS-$(CONFIG_YADIF_FILTER)  += vf_yadif.o yadif_common.o
+OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o 
vf_yadif_cuda.ptx.o yadif_common.o
 OBJS-$(CONFIG_ZMQ_FILTER)+= f_zmq.o
 OBJS-$(CONFIG_ZOOMPAN_FILTER)+= vf_zoompan.o
 OBJS-$(CONFIG_ZSCALE_FILTER) += vf_zscale.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index d5a211bda5..c40c7e3a3c 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -389,6 +389,7 @@ extern AVFilter ff_vf_weave;
 extern AVFilter ff_vf_xbr;
 extern AVFilter ff_vf_xstack;
 extern AVFilter ff_vf_yadif;
+extern AVFilter ff_vf_yadif_cuda;
 extern AVFilter ff_vf_zmq;
 extern AVFilter ff_vf_zoompan;
 extern AVFilter ff_vf_zscale;
diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif_cuda.c
new file mode 100644
index 00..be22344d9d
--- /dev/null
+++ b/libavfilter/vf_yadif_cuda.c
@@ -0,0 +1,426 @@
+/*
+ * Copyright (C) 2018 Philip Langdale 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include 
+#include "libavutil/avassert.h"
+#include 

Re: [FFmpeg-devel] [PATCH 2/3] avfilter/vf_yadif_cuda: CUDA accelerated deinterlacer

2018-11-01 Thread Philip Langdale
On Thu, 1 Nov 2018 22:16:53 +0100
Hendrik Leppkes  wrote:

> One might do something like this:
> 
> NVDEC -> hwdownload -> yadif -> x264
> NVDEC -> cuda_yadif -> hwdownload -> x264
> 
> How do those compare, maybe when you replace x264 with null?

I set my baseline with NVDEC -> hwdownload -> null.

I then compared hwdownload->yadif and cuda_yadif->hwdownload with
same_frame and same_field.

* hwdownload->yadif=same_frame: 70%
* hwdownload->yadif=same_field: 56%
* cuda_yadif=same_frame->hwdownload: 88%
* cuda_yadif=same_field->hwdownload: 69%

--phil
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/3] avfilter/vf_yadif_cuda: CUDA accelerated deinterlacer

2018-11-01 Thread Hendrik Leppkes
On Thu, Nov 1, 2018 at 10:12 PM Philip Langdale  wrote:
>
> On 2018-11-01 14:05, Timo Rothenpieler wrote:
> > On 01.11.2018 21:54, Carl Eugen Hoyos wrote:
> >> 2018-10-26 17:56 GMT+02:00, Philip Langdale :
> >>
> >> Could you add some sample numbers about how fast the cuda
> >> variant is compared to cpu?
> >
> > I don't think such numbers are overly useful by themselves.
> > The primary benefit here is that it's now possible to decode,
> > deinterlace and encode all without pulling the frames out of VRAM.
> >
> > Though it would definitely be interesting. I guess hwupload +
> > yadif_cuda + hwdownload vs. normal yadif is a fair comparison?
>
> Yeah, the comparison is a bit fuzzy, because you completely
> change how you think about solving the problem depending on whether
> you have a filter available or not. But I did get some data previously.
>
> For cpu decode + cpu yadif, the yadif slowdown is ~50%
> For gpu decode + gpu yadif, the yadif slowdown is ~25%
>
> That means, the fps reported by `ffmpeg` when down by 50%/25%
> respectively. This was with null encoding.
>
> I can collect data for the up/down case, but I do think it's
> unrealistic - no one would actually do that.
>

One might do something like this:

NVDEC -> hwdownload -> yadif -> x264
NVDEC -> cuda_yadif -> hwdownload -> x264

How do those compare, maybe when you replace x264 with null?

- Hendrik
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/3] avfilter/vf_yadif_cuda: CUDA accelerated deinterlacer

2018-11-01 Thread Carl Eugen Hoyos
2018-11-01 22:12 GMT+01:00, Philip Langdale :
> On 2018-11-01 14:05, Timo Rothenpieler wrote:
>> On 01.11.2018 21:54, Carl Eugen Hoyos wrote:
>>> 2018-10-26 17:56 GMT+02:00, Philip Langdale :
>>>
>>> Could you add some sample numbers about how fast the cuda
>>> variant is compared to cpu?
>>
>> I don't think such numbers are overly useful by themselves.
>> The primary benefit here is that it's now possible to decode,
>> deinterlace and encode all without pulling the frames out of VRAM.
>>
>> Though it would definitely be interesting. I guess hwupload +
>> yadif_cuda + hwdownload vs. normal yadif is a fair comparison?
>
> Yeah, the comparison is a bit fuzzy, because you completely
> change how you think about solving the problem depending on whether
> you have a filter available or not. But I did get some data previously.
>
> For cpu decode + cpu yadif, the yadif slowdown is ~50%
> For gpu decode + gpu yadif, the yadif slowdown is ~25%

Thank you!

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/3] avfilter/vf_yadif_cuda: CUDA accelerated deinterlacer

2018-11-01 Thread Philip Langdale

On 2018-11-01 14:05, Timo Rothenpieler wrote:

On 01.11.2018 21:54, Carl Eugen Hoyos wrote:

2018-10-26 17:56 GMT+02:00, Philip Langdale :

Could you add some sample numbers about how fast the cuda
variant is compared to cpu?


I don't think such numbers are overly useful by themselves.
The primary benefit here is that it's now possible to decode,
deinterlace and encode all without pulling the frames out of VRAM.

Though it would definitely be interesting. I guess hwupload +
yadif_cuda + hwdownload vs. normal yadif is a fair comparison?


Yeah, the comparison is a bit fuzzy, because you completely
change how you think about solving the problem depending on whether
you have a filter available or not. But I did get some data previously.

For cpu decode + cpu yadif, the yadif slowdown is ~50%
For gpu decode + gpu yadif, the yadif slowdown is ~25%

That means, the fps reported by `ffmpeg` when down by 50%/25%
respectively. This was with null encoding.

I can collect data for the up/down case, but I do think it's
unrealistic - no one would actually do that.

--phil
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/3] avfilter/vf_yadif_cuda: CUDA accelerated deinterlacer

2018-11-01 Thread Timo Rothenpieler

On 01.11.2018 21:54, Carl Eugen Hoyos wrote:

2018-10-26 17:56 GMT+02:00, Philip Langdale :

Could you add some sample numbers about how fast the cuda
variant is compared to cpu?


I don't think such numbers are overly useful by themselves.
The primary benefit here is that it's now possible to decode, 
deinterlace and encode all without pulling the frames out of VRAM.


Though it would definitely be interesting. I guess hwupload + yadif_cuda 
+ hwdownload vs. normal yadif is a fair comparison?




smime.p7s
Description: S/MIME Cryptographic Signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/3] avfilter/vf_yadif_cuda: CUDA accelerated deinterlacer

2018-11-01 Thread Carl Eugen Hoyos
2018-10-26 17:56 GMT+02:00, Philip Langdale :

Could you add some sample numbers about how fast the cuda
variant is compared to cpu?

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/3] avfilter/vf_yadif_cuda: CUDA accelerated deinterlacer

2018-11-01 Thread Timo Rothenpieler
Not an export on CUDA code but it looks sensible to me, C part looks 
good as well.


GTM once the yadiff changes have been acked.



smime.p7s
Description: S/MIME Cryptographic Signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/3] avfilter/vf_yadif_cuda: CUDA accelerated deinterlacer

2018-10-26 Thread Philip Langdale
Signed-off-by: Philip Langdale 
---
 Changelog|   1 +
 configure|   1 +
 doc/filters.texi |  58 +
 libavfilter/Makefile |   1 +
 libavfilter/allfilters.c |   1 +
 libavfilter/version.h|   2 +-
 libavfilter/vf_yadif_cuda.c  | 426 +++
 libavfilter/vf_yadif_cuda.cu | 296 
 8 files changed, 785 insertions(+), 1 deletion(-)
 create mode 100644 libavfilter/vf_yadif_cuda.c
 create mode 100644 libavfilter/vf_yadif_cuda.cu

diff --git a/Changelog b/Changelog
index de0383047e..5c053503b5 100644
--- a/Changelog
+++ b/Changelog
@@ -41,6 +41,7 @@ version :
 - decoding S12M timecode in h264
 - xstack filter
 - pcm vidc decoder and encoder
+- yadif_cuda filter
 
 
 version 4.0:
diff --git a/configure b/configure
index 01c3a1011d..5a5d0b0868 100755
--- a/configure
+++ b/configure
@@ -3481,6 +3481,7 @@ zscale_filter_deps="libzimg const_nan"
 scale_vaapi_filter_deps="vaapi"
 vpp_qsv_filter_deps="libmfx"
 vpp_qsv_filter_select="qsvvpp"
+yadif_cuda_filter_deps="cuda_sdk"
 
 # examples
 avio_dir_cmd_deps="avformat avutil"
diff --git a/doc/filters.texi b/doc/filters.texi
index 7811c25ddb..41da25081a 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -17862,6 +17862,64 @@ filter").
 It accepts the following parameters:
 
 
+@table @option
+
+@item mode
+The interlacing mode to adopt. It accepts one of the following values:
+
+@table @option
+@item 0, send_frame
+Output one frame for each frame.
+@item 1, send_field
+Output one frame for each field.
+@item 2, send_frame_nospatial
+Like @code{send_frame}, but it skips the spatial interlacing check.
+@item 3, send_field_nospatial
+Like @code{send_field}, but it skips the spatial interlacing check.
+@end table
+
+The default value is @code{send_frame}.
+
+@item parity
+The picture field parity assumed for the input interlaced video. It accepts one
+of the following values:
+
+@table @option
+@item 0, tff
+Assume the top field is first.
+@item 1, bff
+Assume the bottom field is first.
+@item -1, auto
+Enable automatic detection of field parity.
+@end table
+
+The default value is @code{auto}.
+If the interlacing is unknown or the decoder does not export this information,
+top field first will be assumed.
+
+@item deint
+Specify which frames to deinterlace. Accept one of the following
+values:
+
+@table @option
+@item 0, all
+Deinterlace all frames.
+@item 1, interlaced
+Only deinterlace frames marked as interlaced.
+@end table
+
+The default value is @code{all}.
+@end table
+
+@section yadif_cuda
+
+Deinterlace the input video using the @ref{yadif} algorithm, but implemented
+in CUDA so that it can work as part of a GPU accelerated pipeline with nvdec
+and/or nvenc.
+
+It accepts the following parameters:
+
+
 @table @option
 
 @item mode
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 6729b62b44..d2957c6403 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -407,6 +407,7 @@ OBJS-$(CONFIG_WEAVE_FILTER)  += vf_weave.o
 OBJS-$(CONFIG_XBR_FILTER)+= vf_xbr.o
 OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o
 OBJS-$(CONFIG_YADIF_FILTER)  += vf_yadif.o yadif_common.o
+OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o 
vf_yadif_cuda.ptx.o yadif_common.o
 OBJS-$(CONFIG_ZMQ_FILTER)+= f_zmq.o
 OBJS-$(CONFIG_ZOOMPAN_FILTER)+= vf_zoompan.o
 OBJS-$(CONFIG_ZSCALE_FILTER) += vf_zscale.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index b2cb58fc38..daabb2aa65 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -388,6 +388,7 @@ extern AVFilter ff_vf_weave;
 extern AVFilter ff_vf_xbr;
 extern AVFilter ff_vf_xstack;
 extern AVFilter ff_vf_yadif;
+extern AVFilter ff_vf_yadif_cuda;
 extern AVFilter ff_vf_zmq;
 extern AVFilter ff_vf_zoompan;
 extern AVFilter ff_vf_zscale;
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 77e1a77b50..e2572d623e 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,7 +30,7 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   7
-#define LIBAVFILTER_VERSION_MINOR  38
+#define LIBAVFILTER_VERSION_MINOR  39
 #define LIBAVFILTER_VERSION_MICRO 100
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif_cuda.c
new file mode 100644
index 00..728b33076b
--- /dev/null
+++ b/libavfilter/vf_yadif_cuda.c
@@ -0,0 +1,426 @@
+/*
+ * Copyright (C) 2018 Philip Langdale 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope