> Subject: Re: [FFmpeg-devel] [PATCH] added slicing yuv, rgb You probably want to say what part it affects, as well as mentioning threading. So probably something like "avfilter/lut: support rgb and yuv slice threading"
You might also want to mention in the commit description what performance boost you observe. Also, from what we discussed already, you can get a huge performance boost by tweaking a bit the current C code before the addition of the threading (at least for the rgb path). Please do that before threading. On Tue, Dec 16, 2014 at 10:44:27PM -0800, Yayoi wrote: > --- > libavfilter/allfilters.c | 2 +- > libavfilter/vf_lut.c | 119 > ++++++++++++++++++++++++++++++++++------------- > 2 files changed, 88 insertions(+), 33 deletions(-) > > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c > index adb86be..76341bd 100644 > --- a/libavfilter/allfilters.c > +++ b/libavfilter/allfilters.c > @@ -218,7 +218,7 @@ void avfilter_register_all(void) > REGISTER_FILTER(YADIF, yadif, vf); > REGISTER_FILTER(ZMQ, zmq, vf); > REGISTER_FILTER(ZOOMPAN, zoompan, vf); > - > + This change is unrelated and should not be part of this commit (you are adding trailing whitespaces). You might want to have a look at http://ffmpeg.org/developer.html#Editor-configuration > REGISTER_FILTER(CELLAUTO, cellauto, vsrc); > REGISTER_FILTER(COLOR, color, vsrc); > REGISTER_FILTER(FREI0R, frei0r_src, vsrc); > diff --git a/libavfilter/vf_lut.c b/libavfilter/vf_lut.c > index 0b7a2ca..3ab7f40 100644 > --- a/libavfilter/vf_lut.c > +++ b/libavfilter/vf_lut.c > @@ -69,6 +69,13 @@ typedef struct LutContext { > int negate_alpha; /* only used by negate */ > } LutContext; > > +typedef struct ThreadData { > + AVFrame *in, *out; > + int plane; > + int h,w; Style nit: missing space > + AVFilterLink *inlink; It doesn't look like you are using this one > +} ThreadData; > + > #define Y 0 > #define U 1 > #define V 2 > @@ -276,14 +283,76 @@ static int config_props(AVFilterLink *inlink) > return 0; > } > > +static int process_slice_rgb(AVFilterContext *ctx, void *arg, int jobnr, int > nb_jobs) > +{ > + int x, y; > + LutContext *s = ctx->priv; > + const ThreadData *td = arg; > + const AVFrame *in = td->in; > + const AVFrame *out = td->out; > + const int step = s->step; > + const uint8_t (*tab)[256] = (const uint8_t (*)[256])s->lut; > + > + > + const int slice_start = (in->height * jobnr ) / nb_jobs; > + const int slice_end = (in->height * (jobnr+1)) / nb_jobs; > + uint8_t *outrow = out->data[0] + slice_start * out->linesize[0]; > + const uint8_t *inrow = in->data[0] + slice_start * in->linesize[0]; style: weird vertical alignment (same in yuv variant) > + int w = td->w; > + for (y = slice_start; y < slice_end; y++) { Here and a few other places below you have trailing whitespaces again (I won't mention them again) > + > + for (x = 0; x < w; x++) { > + switch (step) { > + case 4: outrow[3] = tab[3][inrow[3]]; // Fall-through > + case 3: outrow[2] = tab[2][inrow[2]]; // Fall-through > + case 2: outrow[1] = tab[1][inrow[1]]; // Fall-through > + default: outrow[0] = tab[0][inrow[0]]; > + } > + outrow += step; > + inrow += step; > + } > + } > + return 0; > + > +} > + > + > + nit style: drop the 2 extra lines > +static int process_slice_yuv(AVFilterContext *ctx, void *arg, int jobnr, int > nb_jobs) > +{ > + int x, y; > + LutContext *s = ctx->priv; > + const ThreadData *td = arg; > + int plane = td->plane; > + int h = td->h; > + int w = td->w; > + nit: you might want to mark them as const > + const AVFrame *in = td->in; > + const AVFrame *out = td->out; > + int slice_start = (h * jobnr ) / nb_jobs; > + int slice_end = (h * (jobnr+1)) / nb_jobs; > + uint8_t *outrow = out->data[plane] + slice_start * > out->linesize[plane]; > + const uint8_t *inrow = in->data[plane] + slice_start * > in->linesize[plane]; > + > + for (y = slice_start; y < slice_end; y++) { > + const uint8_t *tab = s->lut[plane]; > + for (x = 0; x < w; x++) > + outrow[x] = tab[inrow[x]]; > + inrow += in ->linesize[plane]; > + outrow += out->linesize[plane]; > + } > + return 0; > + > +} > + > static int filter_frame(AVFilterLink *inlink, AVFrame *in) > { > AVFilterContext *ctx = inlink->dst; > LutContext *s = ctx->priv; > AVFilterLink *outlink = ctx->outputs[0]; > AVFrame *out; > - uint8_t *inrow, *outrow, *inrow0, *outrow0; > - int i, j, plane, direct = 0; > + ThreadData td; You might want to zero init this with ThreadData td = {0}; Because typically, in the RGB case, the plane field will stay uninitialized and the execute function might want to copy the ThreadData or something like that (probably not the case but just in case...) > + int plane, direct = 0; > > if (av_frame_is_writable(in)) { > direct = 1; > @@ -297,47 +366,33 @@ static int filter_frame(AVFilterLink *inlink, AVFrame > *in) > av_frame_copy_props(out, in); > } > > + td.in = in; > + td.out = out; > + td.inlink = inlink; > + > + > if (s->is_rgb) { > /* packed */ > - inrow0 = in ->data[0]; > - outrow0 = out->data[0]; > - > - for (i = 0; i < in->height; i ++) { > - int w = inlink->w; > - const uint8_t (*tab)[256] = (const uint8_t (*)[256])s->lut; > - inrow = inrow0; > - outrow = outrow0; > - for (j = 0; j < w; j++) { > - switch (s->step) { > - case 4: outrow[3] = tab[3][inrow[3]]; // Fall-through > - case 3: outrow[2] = tab[2][inrow[2]]; // Fall-through > - case 2: outrow[1] = tab[1][inrow[1]]; // Fall-through > - default: outrow[0] = tab[0][inrow[0]]; > - } > - outrow += s->step; > - inrow += s->step; > - } > - inrow0 += in ->linesize[0]; > - outrow0 += out->linesize[0]; > - } > + > + int w = inlink->w; Is there a reason for this intermediate step? Also, you can't declare a new variable here since it's not the beginning of a scope (it will choke on some compilers) > + td.w = w; > + ctx->internal->execute(ctx, process_slice_rgb, &td, NULL, > FFMIN(outlink->h, ctx->graph->nb_threads)); > + > } else { > /* planar */ > + > for (plane = 0; plane < 4 && in->data[plane] && in->linesize[plane]; > plane++) { > int vsub = plane == 1 || plane == 2 ? s->vsub : 0; > int hsub = plane == 1 || plane == 2 ? s->hsub : 0; > int h = FF_CEIL_RSHIFT(inlink->h, vsub); > int w = FF_CEIL_RSHIFT(inlink->w, hsub); > > - inrow = in ->data[plane]; > - outrow = out->data[plane]; > + td.plane = plane; > + td.h = h; > + td.w = w; > + > + ctx->internal->execute(ctx, process_slice_yuv, &td, NULL, > FFMIN(h, ctx->graph->nb_threads)); > > - for (i = 0; i < h; i++) { > - const uint8_t *tab = s->lut[plane]; > - for (j = 0; j < w; j++) > - outrow[j] = tab[inrow[j]]; > - inrow += in ->linesize[plane]; > - outrow += out->linesize[plane]; > - } > } > } Also missing a .flags update. Looking forward next iteration, thank you. -- Clément B.
pgpncSPP0WQUh.pgp
Description: PGP signature
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel