Currently works only with RGBA on top of NV12. --- configure | 1 + libavfilter/Makefile | 2 + libavfilter/allfilters.c | 1 + libavfilter/opencl/overlay.cl | 73 +++++++++ libavfilter/opencl_source.h | 1 + libavfilter/vf_overlay_opencl.c | 326 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 404 insertions(+) create mode 100644 libavfilter/opencl/overlay.cl create mode 100644 libavfilter/vf_overlay_opencl.c
diff --git a/configure b/configure index 95f066a85..079217c5e 100755 --- a/configure +++ b/configure @@ -2524,6 +2524,7 @@ hqdn3d_filter_deps="gpl" interlace_filter_deps="gpl" movie_filter_deps="avcodec avformat" ocv_filter_deps="libopencv" +overlay_opencl_filter_deps="opencl" program_opencl_filter_deps="opencl" resample_filter_deps="avresample" scale_filter_deps="swscale" diff --git a/libavfilter/Makefile b/libavfilter/Makefile index f2f829d63..c5c994941 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -75,6 +75,8 @@ OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o OBJS-$(CONFIG_NULL_FILTER) += vf_null.o OBJS-$(CONFIG_OCV_FILTER) += vf_libopencv.o OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o +OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \ + opencl/overlay.o opencl/rgbyuv.o OBJS-$(CONFIG_PAD_FILTER) += vf_pad.o OBJS-$(CONFIG_PIXDESCTEST_FILTER) += vf_pixdesctest.o OBJS-$(CONFIG_PROGRAM_OPENCL_FILTER) += vf_program_opencl.o opencl.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index 2ac2cc48c..fe69035e2 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -98,6 +98,7 @@ void avfilter_register_all(void) REGISTER_FILTER(NULL, null, vf); REGISTER_FILTER(OCV, ocv, vf); REGISTER_FILTER(OVERLAY, overlay, vf); + REGISTER_FILTER(OVERLAY_OPENCL, overlay_opencl, vf); REGISTER_FILTER(PAD, pad, vf); REGISTER_FILTER(PIXDESCTEST, pixdesctest, vf); REGISTER_FILTER(PROGRAM_OPENCL, program_opencl, vf); diff --git a/libavfilter/opencl/overlay.cl b/libavfilter/opencl/overlay.cl new file mode 100644 index 000000000..0667767c5 --- /dev/null +++ b/libavfilter/opencl/overlay.cl @@ -0,0 +1,73 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +__kernel void overlay_nv12_rgba(__write_only image2d_t dst_y, + __write_only image2d_t dst_uv, + __read_only image2d_t src_y, + __read_only image2d_t src_uv, + __read_only image2d_t overlay_rgba, + int x_position, + int y_position) +{ + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | + CLK_FILTER_NEAREST); + int i; + + int2 overlay_size = get_image_dim(overlay_rgba); + + int2 loc_y[4]; + int2 loc_uv = (int2)(get_global_id(0), get_global_id(1)); + int2 loc_overlay = (int2)(x_position, y_position); + float4 in_y[4]; + float4 in_uv; + for (i = 0; i < 4; i++) { + loc_y[i] = 2 * loc_uv + (int2)(i & 1, !!(i & 2)); + in_y[i] = read_imagef(src_y, sampler, loc_y[i]); + } + in_uv = read_imagef(src_uv, sampler, loc_uv); + + if (loc_y[0].x < x_position || + loc_y[0].y < y_position || + loc_y[3].x >= overlay_size.x + x_position || + loc_y[3].y >= overlay_size.y + y_position) { + for (i = 0; i < 4; i++) + write_imagef(dst_y, loc_y[i], in_y[i]); + write_imagef(dst_uv, loc_uv, in_uv); + return; + } + + float4 in_yuv[4]; + float4 uval_rgb[4], oval_rgb[4]; + float4 out_rgb[4], out_yuv[4]; + float4 out_uv = 0.0f; + for (i = 0; i < 4; i++) { + in_yuv[i].x = in_y[i].x; + in_yuv[i].yz = in_uv.xy; + + uval_rgb[i] = yuv_to_rgb_input(in_yuv[i]); + oval_rgb[i] = read_imagef(overlay_rgba, sampler, loc_y[i] - loc_overlay); + + out_rgb[i] = uval_rgb[i] * (1.0f - oval_rgb[i].w) + + oval_rgb[i] * oval_rgb[i].w; + out_yuv[i] = rgb_to_yuv_output(out_rgb[i]); + + write_imagef(dst_y, loc_y[i], out_yuv[i].x); + out_uv.xy += out_yuv[i].yz; + } + write_imagef(dst_uv, loc_uv, 0.25f * out_uv); +} diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h index d66c0b182..c117d05e5 100644 --- a/libavfilter/opencl_source.h +++ b/libavfilter/opencl_source.h @@ -19,6 +19,7 @@ #ifndef AVFILTER_OPENCL_SOURCE_H #define AVFILTER_OPENCL_SOURCE_H +extern const char *ff_opencl_source_overlay; extern const char *ff_opencl_source_rgbyuv; #endif /* AVFILTER_OPENCL_SOURCE_H */ diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c new file mode 100644 index 000000000..21a0cc4e3 --- /dev/null +++ b/libavfilter/vf_overlay_opencl.c @@ -0,0 +1,326 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/buffer.h" +#include "libavutil/common.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_opencl.h" +#include "libavutil/log.h" +#include "libavutil/mathematics.h" +#include "libavutil/mem.h" +#include "libavutil/pixdesc.h" +#include "libavutil/opt.h" + +#include "avfilter.h" +#include "internal.h" +#include "opencl.h" +#include "opencl_source.h" +#include "video.h" + +typedef struct OverlayOpenCLContext { + OpenCLFilterContext ocf; + + int initialised; + cl_kernel kernel; + cl_command_queue command_queue; + + AVFrame *main; + AVFrame *overlay; + AVFrame *overlay_next; + + int x_position; + int y_position; +} OverlayOpenCLContext; + + +static int overlay_opencl_load(AVFilterContext *avctx, + enum AVColorSpace colorspace) +{ + OverlayOpenCLContext *ctx = avctx->priv; + cl_int cle; + const char *source[4]; + int err; + + source[0] = ff_opencl_source_rgbyuv; + source[1] = ff_opencl_make_rgbyuv("input", colorspace, 0); + source[2] = ff_opencl_make_rgbyuv("output", colorspace, 1); + source[3] = ff_opencl_source_overlay; + + err = ff_opencl_filter_load_program(avctx, source, 4); + + av_freep(&source[1]); + av_freep(&source[2]); + + if (err < 0) + goto fail; + + ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context, + ctx->ocf.hwctx->device_id, + 0, &cle); + if (!ctx->command_queue) { + av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL " + "command queue: %d.\n", cle); + err = AVERROR(EIO); + goto fail; + } + + ctx->kernel = clCreateKernel(ctx->ocf.program, "overlay_nv12_rgba", &cle); + if (!ctx->kernel) { + av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle); + err = AVERROR(EIO); + goto fail; + } + + ctx->initialised = 1; + return 0; + +fail: + if (ctx->command_queue) + clReleaseCommandQueue(ctx->command_queue); + if (ctx->kernel) + clReleaseKernel(ctx->kernel); + return err; +} + +static int overlay_opencl_filter_main(AVFilterLink *inlink, AVFrame *input) +{ + AVFilterContext *avctx = inlink->dst; + OverlayOpenCLContext *ctx = avctx->priv; + + av_log(avctx, AV_LOG_DEBUG, "Filter main: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(input->format), + input->width, input->height, input->pts); + + av_assert0(!ctx->main); + ctx->main = input; + + return 0; +} + +static int overlay_opencl_filter_overlay(AVFilterLink *inlink, AVFrame *input) +{ + AVFilterContext *avctx = inlink->dst; + OverlayOpenCLContext *ctx = avctx->priv; + + av_log(avctx, AV_LOG_DEBUG, "Filter overlay: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(input->format), + input->width, input->height, input->pts); + + av_assert0(!ctx->overlay_next); + ctx->overlay_next = input; + + return 0; +} + +static int overlay_opencl_request_frame(AVFilterLink *outlink) +{ + AVFilterContext *avctx = outlink->src; + OverlayOpenCLContext *ctx = avctx->priv; + AVFrame *output; + cl_mem mem; + cl_int cle, x, y; + size_t global_work[2]; + int kernel_arg = 0; + int err; + + av_log(avctx, AV_LOG_DEBUG, "Filter request frame.\n"); + + if (!ctx->main) { + err = ff_request_frame(avctx->inputs[0]); + if (err < 0) + return err; + } + if (!ctx->main) + return AVERROR(EAGAIN); + + if (!ctx->initialised) { + err = overlay_opencl_load(avctx, ctx->main->colorspace); + if (err < 0) + return err; + } + + if (!ctx->overlay_next) { + err = ff_request_frame(avctx->inputs[1]); + if (err < 0) + return err; + } + + while (!ctx->overlay || + av_compare_ts(ctx->main->pts, + avctx->inputs[0]->time_base, + ctx->overlay_next->pts, + avctx->inputs[1]->time_base) > 0) { + av_frame_free(&ctx->overlay); + ctx->overlay = ctx->overlay_next; + ctx->overlay_next = NULL; + + err = ff_request_frame(avctx->inputs[1]); + if (err < 0) + return err; + } + + output = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!output) { + err = AVERROR(ENOMEM); + goto fail; + } + + mem = (cl_mem)output->data[0]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + mem = (cl_mem)output->data[1]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + mem = (cl_mem)ctx->main->data[0]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + mem = (cl_mem)ctx->main->data[1]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + mem = (cl_mem)ctx->overlay->data[0]; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_mem), &mem); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + + x = ctx->x_position; + y = ctx->y_position; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &x); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + cle = clSetKernelArg(ctx->kernel, kernel_arg++, sizeof(cl_int), &y); + if (cle != CL_SUCCESS) goto fail_kernel_arg; + + // The kernel processes a 2x2 block. + global_work[0] = output->width / 2; + global_work[1] = output->height / 2; + + cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, + global_work, NULL, 0, NULL, NULL); + if (cle != CL_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to enqueue " + "overlay kernel: %d.\n", cle); + err = AVERROR(EIO); + goto fail; + } + + cle = clFinish(ctx->command_queue); + if (cle != CL_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to finish " + "command queue: %d.\n", cle); + err = AVERROR(EIO); + goto fail; + } + + err = av_frame_copy_props(output, ctx->main); + + av_frame_free(&ctx->main); + + av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n", + av_get_pix_fmt_name(output->format), + output->width, output->height, output->pts); + + return ff_filter_frame(outlink, output); +fail_kernel_arg: + av_log(avctx, AV_LOG_ERROR, "Failed to set kernel arg %d: %d.\n", + kernel_arg, cle); + err = AVERROR(EIO); +fail: + return err; +} + +static av_cold void overlay_opencl_uninit(AVFilterContext *avctx) +{ + OverlayOpenCLContext *ctx = avctx->priv; + cl_int cle; + + av_frame_free(&ctx->main); + av_frame_free(&ctx->overlay); + av_frame_free(&ctx->overlay_next); + + if (ctx->kernel) { + cle = clReleaseKernel(ctx->kernel); + if (cle != CL_SUCCESS) + av_log(avctx, AV_LOG_ERROR, "Failed to release " + "kernel: %d.\n", cle); + } + + if (ctx->command_queue) { + cle = clReleaseCommandQueue(ctx->command_queue); + if (cle != CL_SUCCESS) + av_log(avctx, AV_LOG_ERROR, "Failed to release " + "command queue: %d.\n", cle); + } + + ff_opencl_filter_uninit(avctx); +} + +#define OFFSET(x) offsetof(OverlayOpenCLContext, x) +#define FLAGS (AV_OPT_FLAG_VIDEO_PARAM) +static const AVOption overlay_opencl_options[] = { + { "x", "Overlay x position", + OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, + { "y", "Overlay y position", + OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS }, + { NULL }, +}; + +static const AVClass overlay_opencl_class = { + .class_name = "overlay_opencl", + .item_name = av_default_item_name, + .option = overlay_opencl_options, + .version = LIBAVUTIL_VERSION_INT, +}; + +static const AVFilterPad overlay_opencl_inputs[] = { + { + .name = "main", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_opencl_filter_config_input, + .filter_frame = &overlay_opencl_filter_main, + }, + { + .name = "overlay", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_opencl_filter_config_input, + .filter_frame = &overlay_opencl_filter_overlay, + .needs_fifo = 1, + }, + { NULL } +}; + +static const AVFilterPad overlay_opencl_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .config_props = &ff_opencl_filter_config_output, + .request_frame = &overlay_opencl_request_frame, + }, + { NULL } +}; + +AVFilter ff_vf_overlay_opencl = { + .name = "overlay_opencl", + .description = NULL_IF_CONFIG_SMALL("Overlay one video on top of another"), + .priv_size = sizeof(OverlayOpenCLContext), + .priv_class = &overlay_opencl_class, + .init = &ff_opencl_filter_init, + .uninit = &overlay_opencl_uninit, + .query_formats = &ff_opencl_filter_query_formats, + .inputs = overlay_opencl_inputs, + .outputs = overlay_opencl_outputs, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; -- 2.11.0 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel