Added an OpenCL lut3d video filter implementation for the current lut3d video filter.
- This OpenCL implementation currently supports only .cube LUT format and 3 interpolations - nearest,trilinear,tetrahedral (default). - Outputs match CPU version up to floating point errors. Regards, Jan Studeny On Apr 28, 2025 at 13:39 +0300, Jan Studený via ffmpeg-devel <ffmpeg-devel@ffmpeg.org>, wrote: > --- > libavfilter/Makefile | 1 + > libavfilter/allfilters.c | 1 + > libavfilter/opencl/lut3d.cl | 177 ++++++++++++++ > libavfilter/opencl_source.h | 2 + > libavfilter/vf_lut3d_opencl.c | 444 ++++++++++++++++++++++++++++++++++ > 5 files changed, 625 insertions(+) > create mode 100644 libavfilter/opencl/lut3d.cl > create mode 100644 libavfilter/vf_lut3d_opencl.c > > diff --git a/libavfilter/Makefile b/libavfilter/Makefile > index 7c0d879ec9..6524d0f91a 100644 > --- a/libavfilter/Makefile > +++ b/libavfilter/Makefile > @@ -378,6 +378,7 @@ OBJS-$(CONFIG_LUT1D_FILTER) += vf_lut3d.o > OBJS-$(CONFIG_LUT_FILTER) += vf_lut.o > OBJS-$(CONFIG_LUT2_FILTER) += vf_lut2.o framesync.o > OBJS-$(CONFIG_LUT3D_FILTER) += vf_lut3d.o framesync.o > +OBJS-$(CONFIG_LUT3D_OPENCL_FILTER) += vf_lut3d_opencl.o opencl.o > opencl/lut3d.o > OBJS-$(CONFIG_LUTRGB_FILTER) += vf_lut.o > OBJS-$(CONFIG_LUTYUV_FILTER) += vf_lut.o > OBJS-$(CONFIG_MASKEDCLAMP_FILTER) += vf_maskedclamp.o framesync.o > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index 740d9ab265..72c2f48ac4 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -353,6 +353,7 @@ extern const FFFilter ff_vf_lut; > extern const FFFilter ff_vf_lut1d; > extern const FFFilter ff_vf_lut2; > extern const FFFilter ff_vf_lut3d; > +extern const FFFilter ff_vf_lut3d_opencl; > extern const FFFilter ff_vf_lutrgb; > extern const FFFilter ff_vf_lutyuv; > extern const FFFilter ff_vf_maskedclamp; > diff --git a/libavfilter/opencl/lut3d.cl b/libavfilter/opencl/lut3d.cl > new file mode 100644 > index 0000000000..16dfecdc4e > --- /dev/null > +++ b/libavfilter/opencl/lut3d.cl > @@ -0,0 +1,177 @@ > +/* > + * Copyright (c) 2025 Jan Studeny > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +typedef struct rgbvec { > + float r, g, b, a; > +} rgbvec; > + > +#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) > + > +#define NEAR(x) ((int)((x) + .5)) > +#define PREV(x) ((int)(x)) > +#define NEXT(x) (MIN((int)(x) + 1, lut_edge_size - 1)) > + > +/** > + * Get the nearest defined point > + */ > +static rgbvec interp_nearest(float4 px, __global const rgbvec *lut, int > lut_edge_size) > +{ > + int r = NEAR(px[0]); > + int g = NEAR(px[1]); > + int b = NEAR(px[2]); > + int index = r * lut_edge_size * lut_edge_size + g * lut_edge_size + b; > + return lut[index]; > +} > + > +static float lerpf(float v0, float v1, float f) > +{ > + return v0 + (v1 - v0) * f; > +} > + > +static rgbvec lerp(const rgbvec *v0, const rgbvec *v1, float f) > +{ > + rgbvec v = { > + lerpf(v0->r, v1->r, f), lerpf(v0->g, v1->g, f), lerpf(v0->b, v1->b, f) > + }; > + return v; > +} > +/** > + * Interpolate using the 8 vertices of a cube > + * @see https://en.wikipedia.org/wiki/Trilinear_interpolation > + */ > +static rgbvec interp_trilinear(float4 px, __global const rgbvec *lut, int > lut_edge_size) > +{ > + const int lutsize2 = lut_edge_size * lut_edge_size; > + const int lutsize = lut_edge_size; > + > + const int prev[] = { PREV(px[0]), PREV(px[1]), PREV(px[2]) }; > + const int next[] = { NEXT(px[0]), NEXT(px[1]), NEXT(px[2]) }; > + > + const rgbvec d = { > + px[0] - prev[0], > + px[1] - prev[1], > + px[2] - prev[2] > + }; > + > + const rgbvec c000 = lut[prev[0] * lutsize2 + prev[1] * lutsize + prev[2]]; > + const rgbvec c001 = lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]]; > + const rgbvec c010 = lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]]; > + const rgbvec c011 = lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]]; > + const rgbvec c100 = lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]]; > + const rgbvec c101 = lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]]; > + const rgbvec c110 = lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]]; > + const rgbvec c111 = lut[next[0] * lutsize2 + next[1] * lutsize + next[2]]; > + > + const rgbvec c00 = lerp(&c000, &c100, d.r); > + const rgbvec c10 = lerp(&c010, &c110, d.r); > + const rgbvec c01 = lerp(&c001, &c101, d.r); > + const rgbvec c11 = lerp(&c011, &c111, d.r); > + > + const rgbvec c0 = lerp(&c00, &c10, d.g); > + const rgbvec c1 = lerp(&c01, &c11, d.g); > + > + return lerp(&c0, &c1, d.b); > +} > + > +/** > + * Tetrahedral interpolation. Based on code found in Truelight Software > Library paper. > + * @see > http://www.filmlight.ltd.uk/pdf/whitepapers/FL-TL-TN-0057-SoftwareLib.pdf > + */ > + > +static rgbvec interp_tetrahedral(float4 px, __global const rgbvec *lut, int > lut_edge_size) > +{ > + const int lutsize2 = lut_edge_size*lut_edge_size; > + const int lutsize = lut_edge_size; > + const int prev[] = {PREV(px[0]), PREV(px[1]), PREV(px[2])}; > + const int next[] = {NEXT(px[0]), NEXT(px[1]), NEXT(px[2])}; > + const rgbvec d = {px[0] - prev[0], px[1] - prev[1], px[2] - prev[2]}; > + const rgbvec c000 = lut[prev[0] * lutsize2 + prev[1] * lutsize + prev[2]]; > + const rgbvec c111 = lut[next[0] * lutsize2 + next[1] * lutsize + next[2]]; > + rgbvec c; > + if (d.r > d.g) { > + if (d.g > d.b) { > + const rgbvec c100 = lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]]; > + const rgbvec c110 = lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]]; > + c.r = (1-d.r) * c000.r + (d.r-d.g) * c100.r + (d.g-d.b) * c110.r + (d.b) * > c111.r; > + c.g = (1-d.r) * c000.g + (d.r-d.g) * c100.g + (d.g-d.b) * c110.g + (d.b) * > c111.g; > + c.b = (1-d.r) * c000.b + (d.r-d.g) * c100.b + (d.g-d.b) * c110.b + (d.b) * > c111.b; > + } else if (d.r > d.b) { > + const rgbvec c100 = lut[next[0] * lutsize2 + prev[1] * lutsize + prev[2]]; > + const rgbvec c101 = lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]]; > + c.r = (1-d.r) * c000.r + (d.r-d.b) * c100.r + (d.b-d.g) * c101.r + (d.g) * > c111.r; > + c.g = (1-d.r) * c000.g + (d.r-d.b) * c100.g + (d.b-d.g) * c101.g + (d.g) * > c111.g; > + c.b = (1-d.r) * c000.b + (d.r-d.b) * c100.b + (d.b-d.g) * c101.b + (d.g) * > c111.b; > + } else { > + const rgbvec c001 = lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]]; > + const rgbvec c101 = lut[next[0] * lutsize2 + prev[1] * lutsize + next[2]]; > + c.r = (1-d.b) * c000.r + (d.b-d.r) * c001.r + (d.r-d.g) * c101.r + (d.g) * > c111.r; > + c.g = (1-d.b) * c000.g + (d.b-d.r) * c001.g + (d.r-d.g) * c101.g + (d.g) * > c111.g; > + c.b = (1-d.b) * c000.b + (d.b-d.r) * c001.b + (d.r-d.g) * c101.b + (d.g) * > c111.b; > + } > + } else { > + if (d.b > d.g) { > + const rgbvec c001 = lut[prev[0] * lutsize2 + prev[1] * lutsize + next[2]]; > + const rgbvec c011 = lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]]; > + c.r = (1-d.b) * c000.r + (d.b-d.g) * c001.r + (d.g-d.r) * c011.r + (d.r) * > c111.r; > + c.g = (1-d.b) * c000.g + (d.b-d.g) * c001.g + (d.g-d.r) * c011.g + (d.r) * > c111.g; > + c.b = (1-d.b) * c000.b + (d.b-d.g) * c001.b + (d.g-d.r) * c011.b + (d.r) * > c111.b; > + } else if (d.b > d.r) { > + const rgbvec c010 = lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]]; > + const rgbvec c011 = lut[prev[0] * lutsize2 + next[1] * lutsize + next[2]]; > + c.r = (1-d.g) * c000.r + (d.g-d.b) * c010.r + (d.b-d.r) * c011.r + (d.r) * > c111.r; > + c.g = (1-d.g) * c000.g + (d.g-d.b) * c010.g + (d.b-d.r) * c011.g + (d.r) * > c111.g; > + c.b = (1-d.g) * c000.b + (d.g-d.b) * c010.b + (d.b-d.r) * c011.b + (d.r) * > c111.b; > + } else { > + const rgbvec c010 = lut[prev[0] * lutsize2 + next[1] * lutsize + prev[2]]; > + const rgbvec c110 = lut[next[0] * lutsize2 + next[1] * lutsize + prev[2]]; > + c.r = (1-d.g) * c000.r + (d.g-d.r) * c010.r + (d.r-d.b) * c110.r + (d.b) * > c111.r; > + c.g = (1-d.g) * c000.g + (d.g-d.r) * c010.g + (d.r-d.b) * c110.g + (d.b) * > c111.g; > + c.b = (1-d.g) * c000.b + (d.g-d.r) * c010.b + (d.r-d.b) * c110.b + (d.b) * > c111.b; > + } > + } > + return c; > +} > + > +#define LUT3D_KERNEL(INTERP_FUNC) \ > +__kernel void lut3d_##INTERP_FUNC( \ > + __read_only image2d_t src, \ > + __write_only image2d_t dst, \ > + __global const rgbvec* lut, \ > + int lut_edge_size) \ > +{ \ > + const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | \ > + CLK_ADDRESS_CLAMP_TO_EDGE | \ > + CLK_FILTER_NEAREST); \ > + \ > + int2 loc = (int2)(get_global_id(0), get_global_id(1)); \ > + float4 px = read_imagef(src, sampler, loc); \ > + \ > + for (int i = 0; i < 3; i++) { \ > + px[i] *= (lut_edge_size - 1); \ > + } \ > + \ > + rgbvec lutpx = INTERP_FUNC(px, lut, lut_edge_size); \ > + \ > + write_imagef(dst, loc, (float4)(lutpx.r, lutpx.g, lutpx.b, 0.0f)); \ > +} > + > +LUT3D_KERNEL(interp_nearest) > +LUT3D_KERNEL(interp_trilinear) > +LUT3D_KERNEL(interp_tetrahedral) > diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h > index b6930fb686..d143286d21 100644 > --- a/libavfilter/opencl_source.h > +++ b/libavfilter/opencl_source.h > @@ -26,6 +26,7 @@ extern const char *ff_source_convolution_cl; > extern const char *ff_source_deshake_cl; > extern const char *ff_source_neighbor_cl; > extern const char *ff_source_nlmeans_cl; > +extern const char *ff_source_lut3d_cl; > extern const char *ff_source_overlay_cl; > extern const char *ff_source_pad_cl; > extern const char *ff_source_remap_cl; > @@ -34,4 +35,5 @@ extern const char *ff_source_transpose_cl; > extern const char *ff_source_unsharp_cl; > extern const char *ff_source_xfade_cl; > > + > #endif /* AVFILTER_OPENCL_SOURCE_H */ > diff --git a/libavfilter/vf_lut3d_opencl.c b/libavfilter/vf_lut3d_opencl.c > new file mode 100644 > index 0000000000..127a81edce > --- /dev/null > +++ b/libavfilter/vf_lut3d_opencl.c > @@ -0,0 +1,444 @@ > +/* > + * Copyright (c) 2025 Jan Studeny > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + */ > + > +#include "config_components.h" > + > +#include "libavutil/avassert.h" > +#include "libavutil/common.h" > +#include "libavutil/imgutils.h" > +#include "libavutil/mem.h" > +#include "libavutil/opt.h" > +#include "libavutil/pixdesc.h" > +#include "libavutil/avstring.h" > + > +#include "libavutil/file_open.h" > + > +#include "avfilter.h" > +#include "filters.h" > +#include "opencl.h" > +#include "drawutils.h" > +#include "opencl_source.h" > +#include "video.h" > + > + > +#define MAX_LINE_SIZE 512 > + > +enum interp_mode { > + INTERPOLATE_NEAREST, > + INTERPOLATE_TRILINEAR, > + INTERPOLATE_TETRAHEDRAL, > + INTERPOLATE_PYRAMID, > + INTERPOLATE_PRISM, > + NB_INTERP_MODE > +}; > + > +typedef struct rgbvec { > + cl_float r, g, b, a; > +} rgbvec; > + > +#define MAX_LEVEL 256 > + > + > +typedef struct LUT3DOpenCLContext { > + OpenCLFilterContext ocf; > + > + int initialised; > + cl_kernel kernel; > + cl_command_queue command_queue; > + cl_mem lut3d_buf; > + > + struct rgbvec *lut; > + int lutsize; > + int lutsize2; > + struct rgbvec scale; > + int interpolation; ///<interp_mode > + char *file; > +} LUT3DOpenCLContext; > + > +static int allocate_3dlut(AVFilterContext *ctx, int lutsize) > +{ > + LUT3DOpenCLContext *lut3d = ctx->priv; > + if (lutsize < 2 || lutsize > MAX_LEVEL) { > + av_log(ctx, AV_LOG_ERROR, "Too large or invalid 3D LUT size\n"); > + return AVERROR(EINVAL); > + } > + > + av_freep(&lut3d->lut); > + lut3d->lut = av_malloc_array(lutsize * lutsize * lutsize, > sizeof(*lut3d->lut)); > + if (!lut3d->lut) > + return AVERROR(ENOMEM); > + > + lut3d->lutsize = lutsize; > + lut3d->lutsize2 = lutsize * lutsize; > + return 0; > +} > + > +static int set_identity_matrix(AVFilterContext *ctx, int size) > +{ > + LUT3DOpenCLContext *lut3d = ctx->priv; > + int ret, i, j, k; > + const int size2 = size * size; > + const float c = 1. / (size - 1); > + > + ret = allocate_3dlut(ctx, size); > + if (ret < 0) > + return ret; > + > + for (k = 0; k < size; k++) { > + for (j = 0; j < size; j++) { > + for (i = 0; i < size; i++) { > + struct rgbvec *vec = &lut3d->lut[k * size2 + j * size + i]; > + vec->r = k * c; > + vec->g = j * c; > + vec->b = i * c; > + } > + } > + } > + > + return 0; > +} > + > +static int skip_line(const char *p) > +{ > + while (*p && av_isspace(*p)) > + p++; > + return !*p || *p == '#'; > +} > + > +#define NEXT_LINE(loop_cond) do { \ > + if (!fgets(line, sizeof(line), f)) { \ > + av_log(ctx, AV_LOG_ERROR, "Unexpected EOF\n"); \ > + return AVERROR_INVALIDDATA; \ > + } \ > +} while (loop_cond) > + > +static int parse_cube(AVFilterContext *ctx, FILE *f) > +{ > + LUT3DOpenCLContext *lut3d = ctx->priv; > + char line[MAX_LINE_SIZE]; > + > + while (fgets(line, sizeof(line), f)) { > + if (!strncmp(line, "LUT_3D_SIZE", 11)) { > + int ret, i, j, k; > + const int size = strtol(line + 12, NULL, 0); > + const int size2 = size * size; > + > + ret = allocate_3dlut(ctx, size); > + if (ret < 0) > + return ret; > + > + for (k = 0; k < size; k++) { > + for (j = 0; j < size; j++) { > + for (i = 0; i < size; i++) { > + struct rgbvec *vec = &lut3d->lut[i * size2 + j * size + k]; > + > + do { > +try_again: > + NEXT_LINE(0); > + if (!strncmp(line, "DOMAIN_", 7)) { > + av_log(ctx, AV_LOG_ERROR, "Min/max not supported in this format\n"); > + return AVERROR_INVALIDDATA; > + } else if (!strncmp(line, "TITLE", 5)) { > + goto try_again; > + } > + } while (skip_line(line)); > + if (av_sscanf(line, "%f %f %f", &vec->r, &vec->g, &vec->b) != 3) > + return AVERROR_INVALIDDATA; > + } > + } > + } > + break; > + } > + } > + > + return 0; > +} > + > +static int lut3d_opencl_init_device(AVFilterContext *avctx) > +{ > + int err; > + LUT3DOpenCLContext *ctx = avctx->priv; > + cl_int cle; > + > + > + size_t n = ctx->lutsize; > + size_t total = n * n * n; > + > + > + cl_mem lut3d_buf = clCreateBuffer(ctx->ocf.hwctx->context, > + CL_MEM_READ_ONLY | > + CL_MEM_COPY_HOST_PTR | > + CL_MEM_HOST_NO_ACCESS, > + sizeof(rgbvec) * total, > + ctx->lut, &cle); > + > + if (!lut3d_buf) { > + av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: " > + "%d.\n", cle); > + return AVERROR(EIO); > + } > + ctx->lut3d_buf = lut3d_buf; > + > + av_log(avctx, AV_LOG_DEBUG, "LUT3D data loaded onto host\n"); > + > + > + > + > + err = ff_opencl_filter_load_program(avctx, &ff_source_lut3d_cl, 1); > + if (err < 0) > + return err; > + > + ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context, > + ctx->ocf.hwctx->device_id, > + 0, &cle); > + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL " > + "command queue %d.\n", cle); > + > + const char *kernel_name; > + switch (ctx->interpolation) { > + case INTERPOLATE_NEAREST: kernel_name = "lut3d_interp_nearest"; break; > + case INTERPOLATE_TRILINEAR: kernel_name = "lut3d_interp_trilinear"; break; > + case INTERPOLATE_TETRAHEDRAL: kernel_name = "lut3d_interp_tetrahedral"; > break; > + default: > + av_assert0(0); > + } > + ctx->kernel = clCreateKernel(ctx->ocf.program, kernel_name, &cle); > + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create " > + "kernel %d.\n", cle); > + > + ctx->initialised = 1; > + return 0; > + fail: > + if (ctx->command_queue) > + clReleaseCommandQueue(ctx->command_queue); > + if (ctx->kernel) > + clReleaseKernel(ctx->kernel); > + return err; > +} > + > +static int lut3d_opencl_init(AVFilterContext *avctx) > +{ > + > + av_log(avctx, AV_LOG_DEBUG, "Starting intialization of LUT3D OpenCL\n"); > + LUT3DOpenCLContext *ctx = avctx->priv; > + int err = 0; > + > + ff_opencl_filter_init(avctx); > + > + av_log(avctx, AV_LOG_DEBUG, "LUT3D OpenCL filter initialized\n"); > + > + > + FILE *f; > + const char *ext; > + > + if (!ctx->file) { > + return set_identity_matrix(avctx, 32); > + } > + else { > + ext = strrchr(ctx->file, '.'); > + if (!ext) { > + av_log(avctx, AV_LOG_ERROR, "Unable to guess the format from the > extension\n"); > + err = AVERROR_INVALIDDATA; > + return err; > + } > + ext++; > + if (!av_strcasecmp(ext, "cube")) { > + f = avpriv_fopen_utf8(ctx->file, "r"); > + if (!f) { > + err = AVERROR(errno); > + av_log(avctx, AV_LOG_ERROR, "%s: %s\n", ctx->file, av_err2str(err)); > + return err; > + } > + err = parse_cube(avctx, f); > + fclose(f); > + } else { > + av_log(avctx, AV_LOG_ERROR, "Unrecognized '.%s' file type\n", ext); > + err = AVERROR(EINVAL); > + return err; > + } > + if (!err && !ctx->lutsize) { > + av_log(avctx, AV_LOG_ERROR, "3D LUT is empty\n"); > + err = AVERROR_INVALIDDATA; > + return err; > + } > + > + } > + av_log(avctx, AV_LOG_DEBUG, "LUT3D OpenCL data loaded\n"); > + return err; > +} > + > +static int lut3d_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input) > +{ > + AVFilterContext *avctx = inlink->dst; > + AVFilterLink *outlink = avctx->outputs[0]; > + LUT3DOpenCLContext *ctx = avctx->priv; > + AVFrame *output = NULL; > + cl_int cle; > + size_t global_work[2]; > + cl_mem src, dst; > + int err, p; > + > + av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n", > + av_get_pix_fmt_name(input->format), > + input->width, input->height, input->pts); > + > + if (!input->hw_frames_ctx) > + return AVERROR(EINVAL); > + > + if (!ctx->initialised) { > + err = lut3d_opencl_init_device(avctx); > + if (err < 0) > + goto fail; > + } > + > + output = ff_get_video_buffer(outlink, outlink->w, outlink->h); > + if (!output) { > + err = AVERROR(ENOMEM); > + goto fail; > + } > + > + > + for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) { > + src = (cl_mem) input->data[p]; > + dst = (cl_mem)output->data[p]; > + > + if (!dst) > + break; > + > + CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &src); > + CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &dst); > + CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_mem, &ctx->lut3d_buf); > + CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_int, &ctx->lutsize); > + > + err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, > 0); > + if (err < 0) > + goto fail; > + > + av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d " > + "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n", > + p, global_work[0], global_work[1]); > + > + cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL, > + global_work, NULL, > + 0, NULL, NULL); > + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue " > + "kernel: %d.\n", cle); > + } > + > + cle = clFinish(ctx->command_queue); > + CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", > cle); > + > + err = av_frame_copy_props(output, input); > + if (err < 0) > + goto fail; > + > + av_frame_free(&input); > + > + av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n", > + av_get_pix_fmt_name(output->format), > + output->width, output->height, output->pts); > + > + return ff_filter_frame(outlink, output); > + > +fail: > + clFinish(ctx->command_queue); > + av_frame_free(&input); > + av_frame_free(&output); > + return err; > +} > + > +static av_cold void lut3d_opencl_uninit(AVFilterContext *avctx) > +{ > + LUT3DOpenCLContext *ctx = avctx->priv; > + cl_int cle; > + > + clReleaseMemObject(ctx->lut3d_buf); > + > + if (ctx->kernel) { > + cle = clReleaseKernel(ctx->kernel); > + if (cle != CL_SUCCESS) > + av_log(avctx, AV_LOG_ERROR, "Failed to release " > + "kernel: %d.\n", cle); > + } > + > + if (ctx->command_queue) { > + cle = clReleaseCommandQueue(ctx->command_queue); > + if (cle != CL_SUCCESS) > + av_log(avctx, AV_LOG_ERROR, "Failed to release " > + "command queue: %d.\n", cle); > + } > + > + av_freep(&ctx->lut); > + > + ff_opencl_filter_uninit(avctx); > +} > + > +static const AVFilterPad lut3d_opencl_inputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .filter_frame = &lut3d_opencl_filter_frame, > + .config_props = &ff_opencl_filter_config_input, > + }, > +}; > + > +static const AVFilterPad lut3d_opencl_outputs[] = { > + { > + .name = "default", > + .type = AVMEDIA_TYPE_VIDEO, > + .config_props = &ff_opencl_filter_config_output, > + }, > +}; > + > +#define OFFSET(x) offsetof(LUT3DOpenCLContext, x) > +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM) > +#define TFLAGS > AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_RUNTIME_PARAM > + > + > + > +#if CONFIG_LUT3D_OPENCL_FILTER > + > + > +static const AVOption lut3d_opencl_options[] = { > + { "file", "set 3D LUT file name", OFFSET(file), AV_OPT_TYPE_STRING, > {.str=NULL}, .flags = FLAGS }, > + { "interp", "select interpolation mode", OFFSET(interpolation), > AV_OPT_TYPE_INT, {.i64=INTERPOLATE_TETRAHEDRAL}, 0, NB_INTERP_MODE-1, TFLAGS, > .unit = "interp_mode" }, > + { "nearest", "use values from the nearest defined points", 0, > AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_NEAREST}, 0, 0, TFLAGS, .unit = > "interp_mode" }, > + { "trilinear", "interpolate values using the 8 points defining a cube", 0, > AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_TRILINEAR}, 0, 0, TFLAGS, .unit = > "interp_mode" }, > + { "tetrahedral", "interpolate values using a tetrahedron", 0, > AV_OPT_TYPE_CONST, {.i64=INTERPOLATE_TETRAHEDRAL}, 0, 0, TFLAGS, .unit = > "interp_mode" }, \ > + { NULL } > +}; > + > +AVFILTER_DEFINE_CLASS(lut3d_opencl); > + > +const FFFilter ff_vf_lut3d_opencl = { > + .p.name = "lut3d_opencl", > + .p.description = NULL_IF_CONFIG_SMALL("Adjust colors using a 3D LUT."), > + .p.priv_class = &lut3d_opencl_class, > + .p.flags = AVFILTER_FLAG_HWDEVICE, > + .priv_size = sizeof(LUT3DOpenCLContext), > + .init = &lut3d_opencl_init, > + .uninit = &lut3d_opencl_uninit, > + FILTER_INPUTS(lut3d_opencl_inputs), > + FILTER_OUTPUTS(lut3d_opencl_outputs), > + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_OPENCL), > + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, > +}; > + > +#endif /* CONFIG_LUT3D_OPENCL_FILTER */ > -- > 2.39.5 (Apple Git-154) > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".