Re: [FFmpeg-devel] [PATCH V4] lavf/vf_ocr: add subregion support
On Mon, 12 Jul 2021 13:11:28 +0800 Lingjiang Fang wrote: Sorry, I made a mistake when reply to previous email, ignore this please : ( > follow comments from Steven Liu > --- > doc/filters.texi | 8 > libavfilter/vf_ocr.c | 45 > +++- 2 files changed, 52 > insertions(+), 1 deletion(-) > > diff --git a/doc/filters.texi b/doc/filters.texi > index d991c06628..f41ba0ce46 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -15457,6 +15457,14 @@ Set character whitelist. > > @item blacklist > Set character blacklist. > + > +@item x, y > +Set top-left corner of the subregion, in pixels, default is (0,0). > + > +@item w, h > +Set width and height of the subregion, in pixels, > +default is the bottom-right part from given top-left corner. > + > @end table > > The filter exports recognized text as the frame metadata > @code{lavfi.ocr.text}. diff --git a/libavfilter/vf_ocr.c > b/libavfilter/vf_ocr.c index 6de474025a..55f04b6592 100644 > --- a/libavfilter/vf_ocr.c > +++ b/libavfilter/vf_ocr.c > @@ -33,6 +33,8 @@ typedef struct OCRContext { > char *language; > char *whitelist; > char *blacklist; > +int x, y, x_in, y_in; > +int w, h, w_in, h_in; > > TessBaseAPI *tess; > } OCRContext; > @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = { > { "language", "set language",OFFSET(language), > AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, { "whitelist", "set > character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, > {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ > "}, 0, 0, FLAGS }, { "blacklist", "set character blacklist", > OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""},0, 0, FLAGS }, > +{ "x", "top x of sub region", OFFSET(x), > AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, > +{ "y", "top y of sub region", OFFSET(y), > AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, > +{ "w", "width of sub region", OFFSET(w), > AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, > +{ "h", "height of sub region",OFFSET(h), > AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, { NULL } > }; > > @@ -93,6 +99,41 @@ static int query_formats(AVFilterContext *ctx) > return ff_set_common_formats(ctx, fmts_list); > } > > +static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int > pic_h) +{ > +// 0 <= x < pic_w > +if (*x >= pic_w) > +*x = 0; > +// 0 <= y < pic_h > +if (*y >= pic_h) > +*y = 0; > + > +if (*w == 0 || *w + *x > pic_w) > +*w = pic_w - *x; > +if (*h == 0 || *h + *y > pic_h) > +*h = pic_h - *y; > +} > + > +static int config_input(AVFilterLink *inlink) > +{ > +AVFilterContext *ctx = inlink->dst; > +OCRContext *s = ctx->priv; > + > +s->x_in = s->x; > +s->y_in = s->y; > +s->w_in = s->w; > +s->h_in = s->h; > +check_fix(>x_in, >y_in, >w_in, >h_in, inlink->w, > inlink->h); > +if ( s->x_in != s->x || s->y_in != s->y || > +(s->w != 0 && s->w_in != s->w) || (s->h != 0 && s->h_in != > s->h)) { > +av_log(s, AV_LOG_WARNING, "config error, subregion changed > to " > + "x=%d, y=%d, w=%d, h=%d\n", > + s->x_in, s->y_in, s->w_in, > s->h_in); > +} > + > +return 0; > +} > + > static int filter_frame(AVFilterLink *inlink, AVFrame *in) > { > AVDictionary **metadata = >metadata; > @@ -102,8 +143,9 @@ static int filter_frame(AVFilterLink *inlink, > AVFrame *in) char *result; > int *confs; > > +// TODO(vacing): support expression > result = TessBaseAPIRect(s->tess, in->data[0], 1, > - in->linesize[0], 0, 0, in->width, > in->height); > + in->linesize[0], s->x_in, s->y_in, > s->w_in, s->h_in); confs = TessBaseAPIAllWordConfidences(s->tess); > av_dict_set(metadata, "lavfi.ocr.text", result, 0); > for (int i = 0; confs[i] != -1; i++) { > @@ -134,6 +176,7 @@ static const AVFilterPad ocr_inputs[] = { > .name = "default", > .type = AVMEDIA_TYPE_VIDEO, > .filter_frame = filter_frame, > +.config_props = config_input, > }, > { NULL } > }; Regards, Lingjiang Fang ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH V4] lavf/vf_ocr: add subregion support
On Mon, 12 Jul 2021 13:09:26 +0800 Lingjiang Fang wrote: Sorry, I made a mistake when reply to previous email, ignore this please : ( > follow comments from Steven Liu > --- > doc/filters.texi | 8 > libavfilter/vf_ocr.c | 45 > +++- 2 files changed, 52 > insertions(+), 1 deletion(-) > > diff --git a/doc/filters.texi b/doc/filters.texi > index d991c06628..f41ba0ce46 100644 > --- a/doc/filters.texi > +++ b/doc/filters.texi > @@ -15457,6 +15457,14 @@ Set character whitelist. > > @item blacklist > Set character blacklist. > + > +@item x, y > +Set top-left corner of the subregion, in pixels, default is (0,0). > + > +@item w, h > +Set width and height of the subregion, in pixels, > +default is the bottom-right part from given top-left corner. > + > @end table > > The filter exports recognized text as the frame metadata > @code{lavfi.ocr.text}. diff --git a/libavfilter/vf_ocr.c > b/libavfilter/vf_ocr.c index 6de474025a..55f04b6592 100644 > --- a/libavfilter/vf_ocr.c > +++ b/libavfilter/vf_ocr.c > @@ -33,6 +33,8 @@ typedef struct OCRContext { > char *language; > char *whitelist; > char *blacklist; > +int x, y, x_in, y_in; > +int w, h, w_in, h_in; > > TessBaseAPI *tess; > } OCRContext; > @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = { > { "language", "set language",OFFSET(language), > AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, { "whitelist", "set > character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, > {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ > "}, 0, 0, FLAGS }, { "blacklist", "set character blacklist", > OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""},0, 0, FLAGS }, > +{ "x", "top x of sub region", OFFSET(x), > AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, > +{ "y", "top y of sub region", OFFSET(y), > AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, > +{ "w", "width of sub region", OFFSET(w), > AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, > +{ "h", "height of sub region",OFFSET(h), > AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, { NULL } > }; > > @@ -93,6 +99,41 @@ static int query_formats(AVFilterContext *ctx) > return ff_set_common_formats(ctx, fmts_list); > } > > +static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int > pic_h) +{ > +// 0 <= x < pic_w > +if (*x >= pic_w) > +*x = 0; > +// 0 <= y < pic_h > +if (*y >= pic_h) > +*y = 0; > + > +if (*w == 0 || *w + *x > pic_w) > +*w = pic_w - *x; > +if (*h == 0 || *h + *y > pic_h) > +*h = pic_h - *y; > +} > + > +static int config_input(AVFilterLink *inlink) > +{ > +AVFilterContext *ctx = inlink->dst; > +OCRContext *s = ctx->priv; > + > +s->x_in = s->x; > +s->y_in = s->y; > +s->w_in = s->w; > +s->h_in = s->h; > +check_fix(>x_in, >y_in, >w_in, >h_in, inlink->w, > inlink->h); > +if ( s->x_in != s->x || s->y_in != s->y || > +(s->w != 0 && s->w_in != s->w) || (s->h != 0 && s->h_in != > s->h)) { > +av_log(s, AV_LOG_WARNING, "config error, subregion changed > to " > + "x=%d, y=%d, w=%d, h=%d\n", > + s->x_in, s->y_in, s->w_in, > s->h_in); > +} > + > +return 0; > +} > + > static int filter_frame(AVFilterLink *inlink, AVFrame *in) > { > AVDictionary **metadata = >metadata; > @@ -102,8 +143,9 @@ static int filter_frame(AVFilterLink *inlink, > AVFrame *in) char *result; > int *confs; > > +// TODO(vacing): support expression > result = TessBaseAPIRect(s->tess, in->data[0], 1, > - in->linesize[0], 0, 0, in->width, > in->height); > + in->linesize[0], s->x_in, s->y_in, > s->w_in, s->h_in); confs = TessBaseAPIAllWordConfidences(s->tess); > av_dict_set(metadata, "lavfi.ocr.text", result, 0); > for (int i = 0; confs[i] != -1; i++) { > @@ -134,6 +176,7 @@ static const AVFilterPad ocr_inputs[] = { > .name = "default", > .type = AVMEDIA_TYPE_VIDEO, > .filter_frame = filter_frame, > +.config_props = config_input, > }, > { NULL } > }; Regards, Lingjiang Fang ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V4] lavf/vf_ocr: add subregion support
follow comments from Steven Liu --- doc/filters.texi | 8 libavfilter/vf_ocr.c | 45 +++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/doc/filters.texi b/doc/filters.texi index d991c06628..f41ba0ce46 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15457,6 +15457,14 @@ Set character whitelist. @item blacklist Set character blacklist. + +@item x, y +Set top-left corner of the subregion, in pixels, default is (0,0). + +@item w, h +Set width and height of the subregion, in pixels, +default is the bottom-right part from given top-left corner. + @end table The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}. diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c index 6de474025a..55f04b6592 100644 --- a/libavfilter/vf_ocr.c +++ b/libavfilter/vf_ocr.c @@ -33,6 +33,8 @@ typedef struct OCRContext { char *language; char *whitelist; char *blacklist; +int x, y, x_in, y_in; +int w, h, w_in, h_in; TessBaseAPI *tess; } OCRContext; @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = { { "language", "set language",OFFSET(language), AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS }, { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""},0, 0, FLAGS }, +{ "x", "top x of sub region", OFFSET(x), AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, +{ "y", "top y of sub region", OFFSET(y), AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, +{ "w", "width of sub region", OFFSET(w), AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, +{ "h", "height of sub region",OFFSET(h), AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, { NULL } }; @@ -93,6 +99,41 @@ static int query_formats(AVFilterContext *ctx) return ff_set_common_formats(ctx, fmts_list); } +static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int pic_h) +{ +// 0 <= x < pic_w +if (*x >= pic_w) +*x = 0; +// 0 <= y < pic_h +if (*y >= pic_h) +*y = 0; + +if (*w == 0 || *w + *x > pic_w) +*w = pic_w - *x; +if (*h == 0 || *h + *y > pic_h) +*h = pic_h - *y; +} + +static int config_input(AVFilterLink *inlink) +{ +AVFilterContext *ctx = inlink->dst; +OCRContext *s = ctx->priv; + +s->x_in = s->x; +s->y_in = s->y; +s->w_in = s->w; +s->h_in = s->h; +check_fix(>x_in, >y_in, >w_in, >h_in, inlink->w, inlink->h); +if ( s->x_in != s->x || s->y_in != s->y || +(s->w != 0 && s->w_in != s->w) || (s->h != 0 && s->h_in != s->h)) { +av_log(s, AV_LOG_WARNING, "config error, subregion changed to " + "x=%d, y=%d, w=%d, h=%d\n", + s->x_in, s->y_in, s->w_in, s->h_in); +} + +return 0; +} + static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVDictionary **metadata = >metadata; @@ -102,8 +143,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) char *result; int *confs; +// TODO(vacing): support expression result = TessBaseAPIRect(s->tess, in->data[0], 1, - in->linesize[0], 0, 0, in->width, in->height); + in->linesize[0], s->x_in, s->y_in, s->w_in, s->h_in); confs = TessBaseAPIAllWordConfidences(s->tess); av_dict_set(metadata, "lavfi.ocr.text", result, 0); for (int i = 0; confs[i] != -1; i++) { @@ -134,6 +176,7 @@ static const AVFilterPad ocr_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .filter_frame = filter_frame, +.config_props = config_input, }, { NULL } }; -- 2.29.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V4] lavf/vf_ocr: add subregion support
follow comments from Steven Liu --- doc/filters.texi | 8 libavfilter/vf_ocr.c | 45 +++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/doc/filters.texi b/doc/filters.texi index d991c06628..f41ba0ce46 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15457,6 +15457,14 @@ Set character whitelist. @item blacklist Set character blacklist. + +@item x, y +Set top-left corner of the subregion, in pixels, default is (0,0). + +@item w, h +Set width and height of the subregion, in pixels, +default is the bottom-right part from given top-left corner. + @end table The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}. diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c index 6de474025a..55f04b6592 100644 --- a/libavfilter/vf_ocr.c +++ b/libavfilter/vf_ocr.c @@ -33,6 +33,8 @@ typedef struct OCRContext { char *language; char *whitelist; char *blacklist; +int x, y, x_in, y_in; +int w, h, w_in, h_in; TessBaseAPI *tess; } OCRContext; @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = { { "language", "set language",OFFSET(language), AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS }, { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""},0, 0, FLAGS }, +{ "x", "top x of sub region", OFFSET(x), AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, +{ "y", "top y of sub region", OFFSET(y), AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, +{ "w", "width of sub region", OFFSET(w), AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, +{ "h", "height of sub region",OFFSET(h), AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, { NULL } }; @@ -93,6 +99,41 @@ static int query_formats(AVFilterContext *ctx) return ff_set_common_formats(ctx, fmts_list); } +static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int pic_h) +{ +// 0 <= x < pic_w +if (*x >= pic_w) +*x = 0; +// 0 <= y < pic_h +if (*y >= pic_h) +*y = 0; + +if (*w == 0 || *w + *x > pic_w) +*w = pic_w - *x; +if (*h == 0 || *h + *y > pic_h) +*h = pic_h - *y; +} + +static int config_input(AVFilterLink *inlink) +{ +AVFilterContext *ctx = inlink->dst; +OCRContext *s = ctx->priv; + +s->x_in = s->x; +s->y_in = s->y; +s->w_in = s->w; +s->h_in = s->h; +check_fix(>x_in, >y_in, >w_in, >h_in, inlink->w, inlink->h); +if ( s->x_in != s->x || s->y_in != s->y || +(s->w != 0 && s->w_in != s->w) || (s->h != 0 && s->h_in != s->h)) { +av_log(s, AV_LOG_WARNING, "config error, subregion changed to " + "x=%d, y=%d, w=%d, h=%d\n", + s->x_in, s->y_in, s->w_in, s->h_in); +} + +return 0; +} + static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVDictionary **metadata = >metadata; @@ -102,8 +143,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) char *result; int *confs; +// TODO(vacing): support expression result = TessBaseAPIRect(s->tess, in->data[0], 1, - in->linesize[0], 0, 0, in->width, in->height); + in->linesize[0], s->x_in, s->y_in, s->w_in, s->h_in); confs = TessBaseAPIAllWordConfidences(s->tess); av_dict_set(metadata, "lavfi.ocr.text", result, 0); for (int i = 0; confs[i] != -1; i++) { @@ -134,6 +176,7 @@ static const AVFilterPad ocr_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .filter_frame = filter_frame, +.config_props = config_input, }, { NULL } }; -- 2.29.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH V4] lavf/vf_ocr: add subregion support
follow comments from Steven Liu --- doc/filters.texi | 8 libavfilter/vf_ocr.c | 45 +++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/doc/filters.texi b/doc/filters.texi index d991c06628..f41ba0ce46 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15457,6 +15457,14 @@ Set character whitelist. @item blacklist Set character blacklist. + +@item x, y +Set top-left corner of the subregion, in pixels, default is (0,0). + +@item w, h +Set width and height of the subregion, in pixels, +default is the bottom-right part from given top-left corner. + @end table The filter exports recognized text as the frame metadata @code{lavfi.ocr.text}. diff --git a/libavfilter/vf_ocr.c b/libavfilter/vf_ocr.c index 6de474025a..55f04b6592 100644 --- a/libavfilter/vf_ocr.c +++ b/libavfilter/vf_ocr.c @@ -33,6 +33,8 @@ typedef struct OCRContext { char *language; char *whitelist; char *blacklist; +int x, y, x_in, y_in; +int w, h, w_in, h_in; TessBaseAPI *tess; } OCRContext; @@ -45,6 +47,10 @@ static const AVOption ocr_options[] = { { "language", "set language",OFFSET(language), AV_OPT_TYPE_STRING, {.str="eng"}, 0, 0, FLAGS }, { "whitelist", "set character whitelist", OFFSET(whitelist), AV_OPT_TYPE_STRING, {.str="0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.:;,-+_!?\"'[]{}()<>|/\\=*&%$#@!~ "}, 0, 0, FLAGS }, { "blacklist", "set character blacklist", OFFSET(blacklist), AV_OPT_TYPE_STRING, {.str=""},0, 0, FLAGS }, +{ "x", "top x of sub region", OFFSET(x), AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, +{ "y", "top y of sub region", OFFSET(y), AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, +{ "w", "width of sub region", OFFSET(w), AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, +{ "h", "height of sub region",OFFSET(h), AV_OPT_TYPE_INT,{.i64=0}, 0, INT_MAX, FLAGS }, { NULL } }; @@ -93,6 +99,41 @@ static int query_formats(AVFilterContext *ctx) return ff_set_common_formats(ctx, fmts_list); } +static void check_fix(int *x, int *y, int *w, int *h, int pic_w, int pic_h) +{ +// 0 <= x < pic_w +if (*x >= pic_w) +*x = 0; +// 0 <= y < pic_h +if (*y >= pic_h) +*y = 0; + +if (*w == 0 || *w + *x > pic_w) +*w = pic_w - *x; +if (*h == 0 || *h + *y > pic_h) +*h = pic_h - *y; +} + +static int config_input(AVFilterLink *inlink) +{ +AVFilterContext *ctx = inlink->dst; +OCRContext *s = ctx->priv; + +s->x_in = s->x; +s->y_in = s->y; +s->w_in = s->w; +s->h_in = s->h; +check_fix(>x_in, >y_in, >w_in, >h_in, inlink->w, inlink->h); +if ( s->x_in != s->x || s->y_in != s->y || +(s->w != 0 && s->w_in != s->w) || (s->h != 0 && s->h_in != s->h)) { +av_log(s, AV_LOG_WARNING, "config error, subregion changed to " + "x=%d, y=%d, w=%d, h=%d\n", + s->x_in, s->y_in, s->w_in, s->h_in); +} + +return 0; +} + static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVDictionary **metadata = >metadata; @@ -102,8 +143,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) char *result; int *confs; +// TODO(vacing): support expression result = TessBaseAPIRect(s->tess, in->data[0], 1, - in->linesize[0], 0, 0, in->width, in->height); + in->linesize[0], s->x_in, s->y_in, s->w_in, s->h_in); confs = TessBaseAPIAllWordConfidences(s->tess); av_dict_set(metadata, "lavfi.ocr.text", result, 0); for (int i = 0; confs[i] != -1; i++) { @@ -134,6 +176,7 @@ static const AVFilterPad ocr_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .filter_frame = filter_frame, +.config_props = config_input, }, { NULL } }; -- 2.29.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avcodec/qsvenc: clip global_quality for ICQ modes.
On 2021-07-12 06:44, Xiang, Haihao wrote: On Sun, 2021-07-11 at 10:14 +0530, Gyan Doshi wrote: Allowed range is 1 to 51. Ref: https://software.intel.com/content/www/us/en/develop/articles/advanced-bitrate-control-methods-in-intel-media-sdk.html --- doc/encoders.texi | 3 ++- libavcodec/qsvenc.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/encoders.texi b/doc/encoders.texi index 4c38996372..8fccd73691 100644 --- a/doc/encoders.texi +++ b/doc/encoders.texi @@ -3119,7 +3119,8 @@ also set (the @option{-qscale} ffmpeg option). @option{look_ahead} option is also set. @item -@var{ICQ} -- intelligent constant quality otherwise. +@var{ICQ} -- intelligent constant quality otherwise. For the ICQ modes, global +quality range is 1 to 51, with 1 being the best quality. @end itemize @item diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c index ca2b42cafd..b9a922d6b9 100644 --- a/libavcodec/qsvenc.c +++ b/libavcodec/qsvenc.c @@ -650,7 +650,7 @@ static int init_video_param(AVCodecContext *avctx, QSVEncContext *q) case MFX_RATECONTROL_LA_ICQ: q->extco2.LookAheadDepth = q->look_ahead_depth; case MFX_RATECONTROL_ICQ: -q->param.mfx.ICQQuality = avctx->global_quality; +q->param.mfx.ICQQuality = av_clip(avctx->global_quality, 1, 51); break; #endif #endif LGTM Thanks. Pushed as 1aa9dcd091ed9cebf06c4c6a9c96ff80a54722f4 Regards, Gyan ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avcodec/qsvenc: clip global_quality for ICQ modes.
On Sun, 2021-07-11 at 10:14 +0530, Gyan Doshi wrote: > Allowed range is 1 to 51. > > Ref: > https://software.intel.com/content/www/us/en/develop/articles/advanced-bitrate-control-methods-in-intel-media-sdk.html > --- > doc/encoders.texi | 3 ++- > libavcodec/qsvenc.c | 2 +- > 2 files changed, 3 insertions(+), 2 deletions(-) > > diff --git a/doc/encoders.texi b/doc/encoders.texi > index 4c38996372..8fccd73691 100644 > --- a/doc/encoders.texi > +++ b/doc/encoders.texi > @@ -3119,7 +3119,8 @@ also set (the @option{-qscale} ffmpeg option). > @option{look_ahead} option is also set. > > @item > -@var{ICQ} -- intelligent constant quality otherwise. > +@var{ICQ} -- intelligent constant quality otherwise. For the ICQ modes, > global > +quality range is 1 to 51, with 1 being the best quality. > @end itemize > > @item > diff --git a/libavcodec/qsvenc.c b/libavcodec/qsvenc.c > index ca2b42cafd..b9a922d6b9 100644 > --- a/libavcodec/qsvenc.c > +++ b/libavcodec/qsvenc.c > @@ -650,7 +650,7 @@ static int init_video_param(AVCodecContext *avctx, > QSVEncContext *q) > case MFX_RATECONTROL_LA_ICQ: > q->extco2.LookAheadDepth = q->look_ahead_depth; > case MFX_RATECONTROL_ICQ: > -q->param.mfx.ICQQuality = avctx->global_quality; > +q->param.mfx.ICQQuality = av_clip(avctx->global_quality, 1, 51); > break; > #endif > #endif LGTM Thanks Haihao ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH] mxfdec.c: fixed frame wrapping detection for MXFGCP1FrameWrappedPicture essence container
From: Pierre-Anthony Lemieux Signed-off-by: Pierre-Anthony Lemieux --- Notes: For JPEG 2000 essence, the MXF input format module currently uses the value of byte 14 of the essence container UL to determines whether the J2K essence is clip- (byte 14 is 0x02) or frame-wrapped (byte 14 is 0x01). This approach does work when the essence container UL is equal to MXFGCP1FrameWrappedPicture, in which case the essence is always frame-wrapped. libavformat/mxf.h| 3 ++- libavformat/mxfdec.c | 4 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/libavformat/mxf.h b/libavformat/mxf.h index b1b1fedac7..ca510f5a2f 100644 --- a/libavformat/mxf.h +++ b/libavformat/mxf.h @@ -75,7 +75,8 @@ typedef enum { NormalWrap = 0, D10D11Wrap, RawAWrap, -RawVWrap +RawVWrap, +AlwaysFrameWrap } MXFWrappingIndicatorType; typedef struct MXFLocalTagPair { diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c index 3bf480a3a6..7024d2ea7d 100644 --- a/libavformat/mxfdec.c +++ b/libavformat/mxfdec.c @@ -1413,6 +1413,7 @@ static void *mxf_resolve_strong_ref(MXFContext *mxf, UID *strong_ref, enum MXFMe static const MXFCodecUL mxf_picture_essence_container_uls[] = { // video essence container uls +{ { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x07,0x0d,0x01,0x03,0x01,0x02,0x0c,0x06,0x00 }, 15, AV_CODEC_ID_JPEG2000, NULL, 16, AlwaysFrameWrap }, /* MXF-GC P1 Frame-Wrapped JPEG 2000 */ { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x07,0x0d,0x01,0x03,0x01,0x02,0x0c,0x01,0x00 }, 14, AV_CODEC_ID_JPEG2000, NULL, 14 }, { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x10,0x60,0x01 }, 14, AV_CODEC_ID_H264, NULL, 15 }, /* H.264 */ { { 0x06,0x0e,0x2b,0x34,0x04,0x01,0x01,0x02,0x0d,0x01,0x03,0x01,0x02,0x11,0x01,0x00 }, 14, AV_CODEC_ID_DNXHD, NULL, 14 }, /* VC-3 */ @@ -1497,6 +1498,9 @@ static MXFWrappingScheme mxf_get_wrapping_kind(UID *essence_container_ul) if (val == 0x02) val = 0x01; break; +case AlwaysFrameWrap: +val = 0x01; +break; } if (val == 0x01) return FrameWrapped; -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avfilter: add afwtdn filter
On 2021-07-10 15:20, Paul B Mahol wrote: Signed-off-by: Paul B Mahol --- doc/filters.texi | 60 ++ libavfilter/Makefile |1 + libavfilter/af_afwtdn.c | 1349 ++ libavfilter/allfilters.c |1 + 4 files changed, 1411 insertions(+) create mode 100644 libavfilter/af_afwtdn.c diff --git a/doc/filters.texi b/doc/filters.texi index d991c06628..8c91d49ced 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -1493,6 +1493,66 @@ Default value is 1.0. This filter supports the all above options as @ref{commands}. +@section afwtdn +Reduce broadband noise from input samples using Wavelets. + +A description of the accepted options follows. + +@table @option +@item sigma +Set the noise sigma, allowed range is from 0 to 1. +Default value is 0. +This option controls strength of denoising applied to input samples. +Most useful way to set this option is via decibels, eg. -45dB. + +@item levels +Set the number of wavelet levels of decomposition. +Allowed range is from 1 to 12. +Default value is 10. +Setting this too low make denoising performance very poor. + +@item wavet +Set wavelet type for decomposition of input frame. +They are sorted by number of coefficients, from lowest to highest. +More coefficients means worse filtering speed, but overall better quality. +Available wavelets are: + +@table @samp +@item sym2 +@item sym4 +@item rbior68 +@item deb10 +@item sym10 +@item coif5 +@item bl3 +@end table + +@item percent +Set percent of full denoising. Allowed range is from 0 to 100 percent. +Default value is 85 percent or partial denoising. + +@item profile +If enabled, first input frame will be used as noise profile. +If first frame samples contain non-noise performance will be very poor. + +@item adaptive +If enabled, input frames are analyzed for presence of noise. +If noise is detected with high possibility then input frame profile will be +used for processing following frames, until new noise frame is detected. + +@item samples +Set size of single frame in number of samples. Allowed range is from 512 to +65536. Default frame size is 8192 samples. + +@item softness +Set softness applied inside thresholding function. Allowed range is from 0 to +10. Default softness is 1. +@end table + +@subsection Commands + +This filter supports the all above options as @ref{commands}. + @section agate A gate is mainly used to reduce lower parts of a signal. This kind of signal diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 62ee3d7b67..49c0c8342b 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -53,6 +53,7 @@ OBJS-$(CONFIG_AFFTFILT_FILTER) += af_afftfilt.o OBJS-$(CONFIG_AFIR_FILTER) += af_afir.o OBJS-$(CONFIG_AFORMAT_FILTER)+= af_aformat.o OBJS-$(CONFIG_AFREQSHIFT_FILTER) += af_afreqshift.o +OBJS-$(CONFIG_AFWTDN_FILTER) += af_afwtdn.o OBJS-$(CONFIG_AGATE_FILTER) += af_agate.o OBJS-$(CONFIG_AIIR_FILTER) += af_aiir.o OBJS-$(CONFIG_AINTEGRAL_FILTER) += af_aderivative.o diff --git a/libavfilter/af_afwtdn.c b/libavfilter/af_afwtdn.c new file mode 100644 index 00..16195776b4 --- /dev/null +++ b/libavfilter/af_afwtdn.c @@ -0,0 +1,1349 @@ +/* + * Copyright (c) 2020 Paul B Mahol + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/avassert.h" +#include "libavutil/avstring.h" +#include "libavutil/opt.h" +#include "avfilter.h" +#include "audio.h" +#include "filters.h" +#include "formats.h" + +enum WaveletTypes { +SYM2, +SYM4, +RBIOR68, +DEB10, +SYM10, +COIF5, +BL3, +NB_WAVELET_TYPES, +}; + +/* + * All wavelets coefficients are taken from: http://wavelets.pybytes.com/ + */ + +static const double bl3_lp[42] = { +0.000146098, -0.000232304, -0.000285414, 0.000462093, 0.000559952, +-0.000927187, -0.001103748, 0.00188212, 0.002186714, -0.003882426, +-0.00435384, 0.008201477, 0.008685294, -0.017982291, -0.017176331, +0.042068328, 0.032080869, -0.110036987, -0.050201753, 0.433923147, +0.766130398, 0.433923147, -0.050201753, -0.110036987, 0.032080869, +0.042068328, -0.017176331, -0.017982291,
Re: [FFmpeg-devel] [PATCH] cafenc: fill in avg. packet size later if unknown
On 2021-07-10 03:42, Lynne wrote: This doesn't move the pointer back to the file end if par->block_align is set. I think that's fine though, since the function writes the trailer, which should mean that nothing more needs to be written. Patch LGTM. But please, someone yell at Apple to support Opus in MP4, WebM and OGG, as terrible as that is. Doesn't apple already support webm and opus? ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/2] lavfi/dnn_backend_ov: Rename RequestItem to OVRequestItem
Rename RequestItem to OVRequestItem in the OpenVINO backend to avoid confusion. Signed-off-by: Shubhanshu Saxena --- libavfilter/dnn/dnn_backend_openvino.c | 24 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index b340859c12..f8d548feaf 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -54,18 +54,18 @@ typedef struct OVModel{ ie_core_t *core; ie_network_t *network; ie_executable_network_t *exe_network; -SafeQueue *request_queue; // holds RequestItem +SafeQueue *request_queue; // holds OVRequestItem Queue *task_queue; // holds TaskItem Queue *inference_queue; // holds InferenceItem } OVModel; // one request for one call to openvino -typedef struct RequestItem { +typedef struct OVRequestItem { ie_infer_request_t *infer_request; InferenceItem **inferences; uint32_t inference_count; ie_complete_call_back_t callback; -} RequestItem; +} OVRequestItem; #define APPEND_STRING(generated_string, iterate_string) \ generated_string = generated_string ? av_asprintf("%s %s", generated_string, iterate_string) : \ @@ -111,7 +111,7 @@ static int get_datatype_size(DNNDataType dt) } } -static DNNReturnType fill_model_input_ov(OVModel *ov_model, RequestItem *request) +static DNNReturnType fill_model_input_ov(OVModel *ov_model, OVRequestItem *request) { dimensions_t dims; precision_e precision; @@ -198,7 +198,7 @@ static void infer_completion_callback(void *args) dimensions_t dims; precision_e precision; IEStatusCode status; -RequestItem *request = args; +OVRequestItem *request = args; InferenceItem *inference = request->inferences[0]; TaskItem *task = inference->task; OVModel *ov_model = task->model; @@ -381,7 +381,7 @@ static DNNReturnType init_model_ov(OVModel *ov_model, const char *input_name, co } for (int i = 0; i < ctx->options.nireq; i++) { -RequestItem *item = av_mallocz(sizeof(*item)); +OVRequestItem *item = av_mallocz(sizeof(*item)); if (!item) { goto err; } @@ -422,7 +422,7 @@ err: return DNN_ERROR; } -static DNNReturnType execute_model_ov(RequestItem *request, Queue *inferenceq) +static DNNReturnType execute_model_ov(OVRequestItem *request, Queue *inferenceq) { IEStatusCode status; DNNReturnType ret; @@ -639,7 +639,7 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu OVModel *ov_model = model; OVContext *ctx = _model->ctx; TaskItem task; -RequestItem *request; +OVRequestItem *request; AVFrame *in_frame = NULL; AVFrame *out_frame = NULL; IEStatusCode status; @@ -779,7 +779,7 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, DNNExecBaseParams * OVModel *ov_model = model->model; OVContext *ctx = _model->ctx; TaskItem task; -RequestItem *request; +OVRequestItem *request; if (ff_check_exec_params(ctx, DNN_OV, model->func_type, exec_params) != 0) { return DNN_ERROR; @@ -827,7 +827,7 @@ DNNReturnType ff_dnn_execute_model_async_ov(const DNNModel *model, DNNExecBasePa { OVModel *ov_model = model->model; OVContext *ctx = _model->ctx; -RequestItem *request; +OVRequestItem *request; TaskItem *task; DNNReturnType ret; @@ -904,7 +904,7 @@ DNNReturnType ff_dnn_flush_ov(const DNNModel *model) { OVModel *ov_model = model->model; OVContext *ctx = _model->ctx; -RequestItem *request; +OVRequestItem *request; IEStatusCode status; DNNReturnType ret; @@ -943,7 +943,7 @@ void ff_dnn_free_model_ov(DNNModel **model) if (*model){ OVModel *ov_model = (*model)->model; while (ff_safe_queue_size(ov_model->request_queue) != 0) { -RequestItem *item = ff_safe_queue_pop_front(ov_model->request_queue); +OVRequestItem *item = ff_safe_queue_pop_front(ov_model->request_queue); if (item && item->infer_request) { ie_infer_request_free(>infer_request); } -- 2.25.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/2] lavfi/dnn_backend_openvino.c: Fix Memory Leak in execute_model_ov
In cases where the execution inside the function execute_model_ov fails, the OVRequestItem must be pushed back to the request_queue before returning the error. In case pushing back fails, release the allocated memory. Signed-off-by: Shubhanshu Saxena --- libavfilter/dnn/dnn_backend_openvino.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index f34b8150f5..b340859c12 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -432,6 +432,8 @@ static DNNReturnType execute_model_ov(RequestItem *request, Queue *inferenceq) OVModel *ov_model; if (ff_queue_size(inferenceq) == 0) { +ie_infer_request_free(>infer_request); +av_freep(); return DNN_SUCCESS; } @@ -443,7 +445,7 @@ static DNNReturnType execute_model_ov(RequestItem *request, Queue *inferenceq) if (task->async) { ret = fill_model_input_ov(ov_model, request); if (ret != DNN_SUCCESS) { -return ret; +goto err; } status = ie_infer_set_completion_callback(request->infer_request, >callback); if (status != OK) { @@ -459,7 +461,7 @@ static DNNReturnType execute_model_ov(RequestItem *request, Queue *inferenceq) } else { ret = fill_model_input_ov(ov_model, request); if (ret != DNN_SUCCESS) { -return ret; +goto err; } status = ie_infer_request_infer(request->infer_request); if (status != OK) { -- 2.25.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH V2 3/6] lavfi/dnn_backend_tf: Request-based Execution
On Sun, Jul 11, 2021 at 6:25 PM Guo, Yejun wrote: > > > > -Original Message- > > From: ffmpeg-devel On Behalf Of > > Shubhanshu Saxena > > Sent: 2021年7月5日 18:31 > > To: ffmpeg-devel@ffmpeg.org > > Cc: Shubhanshu Saxena > > Subject: [FFmpeg-devel] [PATCH V2 3/6] lavfi/dnn_backend_tf: Request- > > based Execution > > > > This commit uses TFRequestItem and the existing sync execution mechanism > > to use request-based execution. It will help in adding async > functionality to > > the TensorFlow backend later. > > > > Signed-off-by: Shubhanshu Saxena > > --- > > libavfilter/dnn/dnn_backend_common.h | 3 + > > libavfilter/dnn/dnn_backend_openvino.c | 2 +- > > libavfilter/dnn/dnn_backend_tf.c | 156 ++--- > > 3 files changed, 91 insertions(+), 70 deletions(-) > > > > diff --git a/libavfilter/dnn/dnn_backend_common.h > > b/libavfilter/dnn/dnn_backend_common.h > > index df59615f40..5281fdfed1 100644 > > --- a/libavfilter/dnn/dnn_backend_common.h > > +++ b/libavfilter/dnn/dnn_backend_common.h > > @@ -26,6 +26,9 @@ > > > > #include "../dnn_interface.h" > > > > +#define DNN_BACKEND_COMMON_OPTIONS \ > > +{ "nireq", "number of request", > OFFSET(options.nireq), > > AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, FLAGS }, > > + > > // one task for one function call from dnn interface typedef struct > TaskItem > > { > > void *model; // model for the backend diff --git > > a/libavfilter/dnn/dnn_backend_openvino.c > > b/libavfilter/dnn/dnn_backend_openvino.c > > index 3295fc79d3..f34b8150f5 100644 > > --- a/libavfilter/dnn/dnn_backend_openvino.c > > +++ b/libavfilter/dnn/dnn_backend_openvino.c > > @@ -75,7 +75,7 @@ typedef struct RequestItem { #define FLAGS > > AV_OPT_FLAG_FILTERING_PARAM static const AVOption > > dnn_openvino_options[] = { > > { "device", "device to run model", OFFSET(options.device_type), > > AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS }, > > -{ "nireq", "number of request", OFFSET(options.nireq), > > AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, FLAGS }, > > +DNN_BACKEND_COMMON_OPTIONS > > { "batch_size", "batch size per request", > OFFSET(options.batch_size), > > AV_OPT_TYPE_INT,{ .i64 = 1 }, 1, 1000, FLAGS}, > > { "input_resizable", "can input be resizable or not", > > OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, > 0, 1, > > FLAGS }, > > { NULL } > > diff --git a/libavfilter/dnn/dnn_backend_tf.c > > b/libavfilter/dnn/dnn_backend_tf.c > > index 578748eb35..e8007406c8 100644 > > --- a/libavfilter/dnn/dnn_backend_tf.c > > +++ b/libavfilter/dnn/dnn_backend_tf.c > > @@ -35,11 +35,13 @@ > > #include "dnn_backend_native_layer_maximum.h" > > #include "dnn_io_proc.h" > > #include "dnn_backend_common.h" > > +#include "safe_queue.h" > > #include "queue.h" > > #include > > > > typedef struct TFOptions{ > > char *sess_config; > > +uint32_t nireq; > > } TFOptions; > > > > typedef struct TFContext { > > @@ -53,6 +55,7 @@ typedef struct TFModel{ > > TF_Graph *graph; > > TF_Session *session; > > TF_Status *status; > > +SafeQueue *request_queue; > > Queue *inference_queue; > > } TFModel; > > > > @@ -77,12 +80,13 @@ typedef struct TFRequestItem { #define FLAGS > > AV_OPT_FLAG_FILTERING_PARAM static const AVOption > > dnn_tensorflow_options[] = { > > { "sess_config", "config for SessionOptions", > OFFSET(options.sess_config), > > AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, > > +DNN_BACKEND_COMMON_OPTIONS > > { NULL } > > }; > > > > AVFILTER_DEFINE_CLASS(dnn_tensorflow); > > > > -static DNNReturnType execute_model_tf(Queue *inference_queue); > > +static DNNReturnType execute_model_tf(TFRequestItem *request, Queue > > +*inference_queue); > > > > static void free_buffer(void *data, size_t length) { @@ -237,6 +241,7 > @@ > > static DNNReturnType get_output_tf(void *model, const char *input_name, > > int inpu > > AVFrame *in_frame = av_frame_alloc(); > > AVFrame *out_frame = NULL; > > TaskItem task; > > +TFRequestItem *request; > > > > if (!in_frame) { > > av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input > > frame\n"); @@ -267,7 +272,13 @@ static DNNReturnType > > get_output_tf(void *model, const char *input_name, int inpu > > return DNN_ERROR; > > } > > > > -ret = execute_model_tf(tf_model->inference_queue); > > +request = ff_safe_queue_pop_front(tf_model->request_queue); > > +if (!request) { > > +av_log(ctx, AV_LOG_ERROR, "unable to get infer request.\n"); > > +return DNN_ERROR; > > +} > > + > > +ret = execute_model_tf(request, tf_model->inference_queue); > > *output_width = out_frame->width; > > *output_height = out_frame->height; > > > > @@ -771,6 +782,7 @@ DNNModel *ff_dnn_load_model_tf(const char > > *model_filename, DNNFunctionType func_ { > > DNNModel *model = NULL; > > TFModel *tf_model
[FFmpeg-devel] [PATCH 3/3] avformat/mov: do not ignore errors in mov_metadata_hmmt()
Fixes: Timeout Fixes: 35637/clusterfuzz-testcase-minimized-ffmpeg_dem_MOV_fuzzer-6311060272447488 Found-by: continuous fuzzing process https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg Signed-off-by: Michael Niedermayer --- libavformat/mov.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libavformat/mov.c b/libavformat/mov.c index 2e061a55d1..84a240e2ca 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c @@ -286,6 +286,8 @@ static int mov_metadata_hmmt(MOVContext *c, AVIOContext *pb, unsigned len) int moment_time = avio_rb32(pb); avpriv_new_chapter(c->fc, i, av_make_q(1, 1000), moment_time, AV_NOPTS_VALUE, NULL); } +if (avio_feof(pb)) +return AVERROR_INVALIDDATA; return 0; } -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 2/3] avformat/mxfdec: Check size for shrinking
av_shrink_packet() takes int size, so size must fit in int Fixes: out of array access Fixes: 35607/clusterfuzz-testcase-minimized-ffmpeg_dem_MXF_fuzzer-4875541323841536 Found-by: continuous fuzzing process https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg Signed-off-by: Michael Niedermayer --- libavformat/mxfdec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c index 16f5052452..f813408b3d 100644 --- a/libavformat/mxfdec.c +++ b/libavformat/mxfdec.c @@ -622,7 +622,7 @@ static int mxf_decrypt_triplet(AVFormatContext *s, AVPacket *pkt, KLVPacket *klv return AVERROR_INVALIDDATA; // enc. code size = klv_decode_ber_length(pb); -if (size < 32 || size - 32 < orig_size) +if (size < 32 || size - 32 < orig_size || (int)orig_size != orig_size) return AVERROR_INVALIDDATA; avio_read(pb, ivec, 16); avio_read(pb, tmpbuf, 16); -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 1/3] avcodec/exr: Check uncompressed_size against max_pixels
Fixes: Timeout Fixes: 35286/clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_EXR_fuzzer-6557139802914816 Fixes: 31253/clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_EXR_fuzzer-4901782326214656 Found-by: continuous fuzzing process https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg Signed-off-by: Michael Niedermayer --- libavcodec/exr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libavcodec/exr.c b/libavcodec/exr.c index e7387ffd66..f7d59c8241 100644 --- a/libavcodec/exr.c +++ b/libavcodec/exr.c @@ -1299,6 +1299,9 @@ static int decode_block(AVCodecContext *avctx, void *tdata, axmax = FFMAX(0, (avctx->width - (s->xmax + 1))) * step; } +if (avctx->max_pixels && uncompressed_size > avctx->max_pixels * 16LL) +return AVERROR_INVALIDDATA; + if (data_size < uncompressed_size || s->is_tile) { /* td->tmp is use for tile reorganization */ av_fast_padded_malloc(>tmp, >tmp_size, uncompressed_size); if (!td->tmp) -- 2.17.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Request for review - x265 User Data Unregistered SEI patch
On 11.07.2021 14:01, Derek Buitenhuis wrote: Hi Brad, On 7/8/2021 4:31 AM, Brad Hards wrote: About a month ago, I submitted a patch to add User Data Unregistered SEI writing to the x265 implementation. See http://ffmpeg.org/pipermail/ffmpeg-devel/2021-June/280978.html[1] and https://patchwork.ffmpeg.org/project/ffmpeg/patch/20210605102028.15571-2-br...@frogmouth.net/[2] If this is OK, can it please be merged? If not, can I get feedback so I can address the issues? Can you amend the commit message to contain the reasoning from [1]? A quick review: +void *sei_data; +int sei_data_size; I don't see sei_data freed anywhere at the end of decoding? if (pic) { +x265_sei *sei = &(x265pic.userSEI); Drop the paren for consistency with the rest of the codebase. +tmp = av_fast_realloc(ctx->sei_data, + >sei_data_size, + (sei->numPayloads + 1) * sizeof(x265_sei_payload)); Convention in FFmpeg is to do sizeof(*var). +if (!tmp) { +av_freep(); +av_freep(); +return AVERROR(ENOMEM); +} else { This else statement is not needed. +sei_payload = &(sei->payloads[sei->numPayloads]); Drop the paren. +sei_payload->payloadType = USER_DATA_UNREGISTERED; I'm surprised x265 has un-namespaced enums... gross. Could probably use our SEI_TYPE_USER_DATA_UNREGISTERED instead, seems to refer to the same value. - Derek ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". smime.p7s Description: S/MIME Cryptographic Signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] avcodec/libdav1d: parse sequence headers in extradata if available
On 7/11/2021 8:40 AM, Derek Buitenhuis wrote: On 7/9/2021 3:53 PM, James Almer wrote: +res = dav1d_parse_sequence_header(, c->extradata + offset, + c->extradata_size - offset); +if (res < 0) { +av_log(c, explode ? AV_LOG_ERROR : AV_LOG_INFO, + "Error decoding extradata\n"); +return explode ? AVERROR_INVALIDDATA : 0; +} + I don't think it is a good idea to fail like this, even in explode mode. Both the AV1-in-ISOBMFF and AV1-in-Matroska specs do specify that the sequence header OBU must be first if there is one in the configOBUs, however, they do not require on actually be present. For example, configOBUs may contain a single metadata OBU, and nothing else - and it would be entirely valid - so failing here would actually be wrong, even in explode mode. This can happen, if, for example, you have a file with HDR metadata in the av1c box, but in-band sequence headers. I can amend this patch locally to check for DAV1D_ERR(ENOENT), which was implemented to signal "No seqhdr is present", and return 0 in that case, but it's a very recent addition, so older libdav1d builds will never emit that error code. Guess just returning 0 on all scenarios here until we increase the minimum required library version is better, so I'll do that. - Derek ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH V2 3/6] lavfi/dnn_backend_tf: Request-based Execution
> -Original Message- > From: ffmpeg-devel On Behalf Of > Shubhanshu Saxena > Sent: 2021年7月5日 18:31 > To: ffmpeg-devel@ffmpeg.org > Cc: Shubhanshu Saxena > Subject: [FFmpeg-devel] [PATCH V2 3/6] lavfi/dnn_backend_tf: Request- > based Execution > > This commit uses TFRequestItem and the existing sync execution mechanism > to use request-based execution. It will help in adding async functionality to > the TensorFlow backend later. > > Signed-off-by: Shubhanshu Saxena > --- > libavfilter/dnn/dnn_backend_common.h | 3 + > libavfilter/dnn/dnn_backend_openvino.c | 2 +- > libavfilter/dnn/dnn_backend_tf.c | 156 ++--- > 3 files changed, 91 insertions(+), 70 deletions(-) > > diff --git a/libavfilter/dnn/dnn_backend_common.h > b/libavfilter/dnn/dnn_backend_common.h > index df59615f40..5281fdfed1 100644 > --- a/libavfilter/dnn/dnn_backend_common.h > +++ b/libavfilter/dnn/dnn_backend_common.h > @@ -26,6 +26,9 @@ > > #include "../dnn_interface.h" > > +#define DNN_BACKEND_COMMON_OPTIONS \ > +{ "nireq", "number of request", > OFFSET(options.nireq), > AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, FLAGS }, > + > // one task for one function call from dnn interface typedef struct TaskItem > { > void *model; // model for the backend diff --git > a/libavfilter/dnn/dnn_backend_openvino.c > b/libavfilter/dnn/dnn_backend_openvino.c > index 3295fc79d3..f34b8150f5 100644 > --- a/libavfilter/dnn/dnn_backend_openvino.c > +++ b/libavfilter/dnn/dnn_backend_openvino.c > @@ -75,7 +75,7 @@ typedef struct RequestItem { #define FLAGS > AV_OPT_FLAG_FILTERING_PARAM static const AVOption > dnn_openvino_options[] = { > { "device", "device to run model", OFFSET(options.device_type), > AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS }, > -{ "nireq", "number of request", OFFSET(options.nireq), > AV_OPT_TYPE_INT,{ .i64 = 0 }, 0, INT_MAX, FLAGS }, > +DNN_BACKEND_COMMON_OPTIONS > { "batch_size", "batch size per request", OFFSET(options.batch_size), > AV_OPT_TYPE_INT,{ .i64 = 1 }, 1, 1000, FLAGS}, > { "input_resizable", "can input be resizable or not", > OFFSET(options.input_resizable), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, > FLAGS }, > { NULL } > diff --git a/libavfilter/dnn/dnn_backend_tf.c > b/libavfilter/dnn/dnn_backend_tf.c > index 578748eb35..e8007406c8 100644 > --- a/libavfilter/dnn/dnn_backend_tf.c > +++ b/libavfilter/dnn/dnn_backend_tf.c > @@ -35,11 +35,13 @@ > #include "dnn_backend_native_layer_maximum.h" > #include "dnn_io_proc.h" > #include "dnn_backend_common.h" > +#include "safe_queue.h" > #include "queue.h" > #include > > typedef struct TFOptions{ > char *sess_config; > +uint32_t nireq; > } TFOptions; > > typedef struct TFContext { > @@ -53,6 +55,7 @@ typedef struct TFModel{ > TF_Graph *graph; > TF_Session *session; > TF_Status *status; > +SafeQueue *request_queue; > Queue *inference_queue; > } TFModel; > > @@ -77,12 +80,13 @@ typedef struct TFRequestItem { #define FLAGS > AV_OPT_FLAG_FILTERING_PARAM static const AVOption > dnn_tensorflow_options[] = { > { "sess_config", "config for SessionOptions", > OFFSET(options.sess_config), > AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS }, > +DNN_BACKEND_COMMON_OPTIONS > { NULL } > }; > > AVFILTER_DEFINE_CLASS(dnn_tensorflow); > > -static DNNReturnType execute_model_tf(Queue *inference_queue); > +static DNNReturnType execute_model_tf(TFRequestItem *request, Queue > +*inference_queue); > > static void free_buffer(void *data, size_t length) { @@ -237,6 +241,7 @@ > static DNNReturnType get_output_tf(void *model, const char *input_name, > int inpu > AVFrame *in_frame = av_frame_alloc(); > AVFrame *out_frame = NULL; > TaskItem task; > +TFRequestItem *request; > > if (!in_frame) { > av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input > frame\n"); @@ -267,7 +272,13 @@ static DNNReturnType > get_output_tf(void *model, const char *input_name, int inpu > return DNN_ERROR; > } > > -ret = execute_model_tf(tf_model->inference_queue); > +request = ff_safe_queue_pop_front(tf_model->request_queue); > +if (!request) { > +av_log(ctx, AV_LOG_ERROR, "unable to get infer request.\n"); > +return DNN_ERROR; > +} > + > +ret = execute_model_tf(request, tf_model->inference_queue); > *output_width = out_frame->width; > *output_height = out_frame->height; > > @@ -771,6 +782,7 @@ DNNModel *ff_dnn_load_model_tf(const char > *model_filename, DNNFunctionType func_ { > DNNModel *model = NULL; > TFModel *tf_model = NULL; > +TFContext *ctx = NULL; > > model = av_mallocz(sizeof(DNNModel)); > if (!model){ > @@ -782,13 +794,14 @@ DNNModel *ff_dnn_load_model_tf(const char > *model_filename, DNNFunctionType func_ > av_freep(); > return NULL; > } > -
Re: [FFmpeg-devel] [PATCH] avcodec/libdav1d: parse sequence headers in extradata if available
On 7/9/2021 3:53 PM, James Almer wrote: > +res = dav1d_parse_sequence_header(, c->extradata + offset, > + c->extradata_size - offset); > +if (res < 0) { > +av_log(c, explode ? AV_LOG_ERROR : AV_LOG_INFO, > + "Error decoding extradata\n"); > +return explode ? AVERROR_INVALIDDATA : 0; > +} > + I don't think it is a good idea to fail like this, even in explode mode. Both the AV1-in-ISOBMFF and AV1-in-Matroska specs do specify that the sequence header OBU must be first if there is one in the configOBUs, however, they do not require on actually be present. For example, configOBUs may contain a single metadata OBU, and nothing else - and it would be entirely valid - so failing here would actually be wrong, even in explode mode. This can happen, if, for example, you have a file with HDR metadata in the av1c box, but in-band sequence headers. - Derek ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] Request for review - x265 User Data Unregistered SEI patch
Hi Brad, On 7/8/2021 4:31 AM, Brad Hards wrote: > About a month ago, I submitted a patch to add User Data Unregistered SEI > writing to the x265 implementation. > > See http://ffmpeg.org/pipermail/ffmpeg-devel/2021-June/280978.html[1] > and > https://patchwork.ffmpeg.org/project/ffmpeg/patch/20210605102028.15571-2-br...@frogmouth.net/[2] > > > If this is OK, can it please be merged? If not, can I get feedback so I can > address the issues? Can you amend the commit message to contain the reasoning from [1]? A quick review: > +void *sei_data; > +int sei_data_size; I don't see sei_data freed anywhere at the end of decoding? > if (pic) { > +x265_sei *sei = &(x265pic.userSEI); Drop the paren for consistency with the rest of the codebase. > +tmp = av_fast_realloc(ctx->sei_data, > + >sei_data_size, > + (sei->numPayloads + 1) * > sizeof(x265_sei_payload)); Convention in FFmpeg is to do sizeof(*var). > +if (!tmp) { > +av_freep(); > +av_freep(); > +return AVERROR(ENOMEM); > +} else { This else statement is not needed. > +sei_payload = &(sei->payloads[sei->numPayloads]); Drop the paren. > +sei_payload->payloadType = USER_DATA_UNREGISTERED; I'm surprised x265 has un-namespaced enums... gross. - Derek ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH] ffmpeg: add option recast_media
Pushed as 79ebdbb9b9da0a86b277e3f85981196c781af398 On 2021-07-08 09:27, Gyan Doshi wrote: Plan to push in a couple of days. On 2021-07-02 15:33, Gyan Doshi wrote: Allows forcing decoders of different media type. Needed to decode media data muxed as data streams. --- doc/ffmpeg.texi | 5 + fftools/ffmpeg_opt.c | 7 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/ffmpeg.texi b/doc/ffmpeg.texi index 7827291755..c1065086e5 100644 --- a/doc/ffmpeg.texi +++ b/doc/ffmpeg.texi @@ -449,6 +449,11 @@ output file already exists. Set number of times input stream shall be looped. Loop 0 means no loop, loop -1 means infinite loop. +@item -recast_media (@emph{global}) +Enable to allow forcing a decoder of a different media type than +the one detected or designated by the demuxer. Useful for decoding +media data muxed as data streams. + @item -c[:@var{stream_specifier}] @var{codec} (@emph{input/output,per-stream}) @itemx -codec[:@var{stream_specifier}] @var{codec} (@emph{input/output,per-stream}) Select an encoder (when used before an output file) or a decoder (when used diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c index a63bed54cf..76a220c21c 100644 --- a/fftools/ffmpeg_opt.c +++ b/fftools/ffmpeg_opt.c @@ -186,6 +186,7 @@ static int input_sync; static int input_stream_potentially_available = 0; static int ignore_unknown_streams = 0; static int copy_unknown_streams = 0; +static int recast_media = 0; static int find_stream_info = 1; static void uninit_options(OptionsContext *o) @@ -759,7 +760,7 @@ static const AVCodec *find_codec_or_die(const char *name, enum AVMediaType type, av_log(NULL, AV_LOG_FATAL, "Unknown %s '%s'\n", codec_string, name); exit_program(1); } - if (codec->type != type) { + if (codec->type != type && !recast_media) { av_log(NULL, AV_LOG_FATAL, "Invalid %s type '%s'\n", codec_string, name); exit_program(1); } @@ -774,6 +775,8 @@ static const AVCodec *choose_decoder(OptionsContext *o, AVFormatContext *s, AVSt if (codec_name) { const AVCodec *codec = find_codec_or_die(codec_name, st->codecpar->codec_type, 0); st->codecpar->codec_id = codec->id; + if (recast_media && st->codecpar->codec_type != codec->type) + st->codecpar->codec_type = codec->type; return codec; } else return avcodec_find_decoder(st->codecpar->codec_id); @@ -3429,6 +3432,8 @@ const OptionDef options[] = { "Ignore unknown stream types" }, { "copy_unknown", OPT_BOOL | OPT_EXPERT, { _unknown_streams }, "Copy unknown stream types" }, + { "recast_media", OPT_BOOL | OPT_EXPERT, { _media }, + "recast stream type in order to force a decoder of different media type" }, { "c", HAS_ARG | OPT_STRING | OPT_SPEC | OPT_INPUT | OPT_OUTPUT, { .off = OFFSET(codec_names) }, "codec name", "codec" }, ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".