Am 11.03.19 um 23:29 schrieb Lou Logan: > Commit message title prefix for filter patches are usually in the form > of: > > avfilter/fillborders: > or > lavfi/fillborders: > > Trailing whitespace. This should always be avoided. > > Use av_malloc.
I now have separted the changes into 4 patches, and mergerd your hints. So you can clearly see, which calculations I have skipped or moved out of inner for loops. Can you give a rating if a performance win could be expected compaired to the original code from your experienced knowledge without a benchmark? -Ulf
>From c51360f3b4be0dca597190da5c2128b45e9ee31b Mon Sep 17 00:00:00 2001 From: Ulf Zibis <ulf.zi...@cosoco.de> Date: 14.03.2019, 19:34:03 avfilter/fillborders: added comments; named more descriptive; corrected indentations; diff --git a/libavfilter/vf_fillborders.c b/libavfilter/vf_fillborders.c index 1344587..820aa2d 100644 --- a/libavfilter/vf_fillborders.c +++ b/libavfilter/vf_fillborders.c @@ -87,26 +87,27 @@ int p, y; for (p = 0; p < s->nb_planes; p++) { - uint8_t *ptr = frame->data[p]; + uint8_t *data = frame->data[p]; int linesize = frame->linesize[p]; + /* fill left and right borders from top to bottom border */ for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - memset(ptr + y * linesize, - *(ptr + y * linesize + s->borders[p].left), + memset(data + y * linesize, + *(data + y * linesize + s->borders[p].left), s->borders[p].left); - memset(ptr + y * linesize + s->planewidth[p] - s->borders[p].right, - *(ptr + y * linesize + s->planewidth[p] - s->borders[p].right - 1), + memset(data + y * linesize + s->planewidth[p] - s->borders[p].right, + *(data + y * linesize + s->planewidth[p] - s->borders[p].right - 1), s->borders[p].right); } + /* fill top and bottom borders */ for (y = 0; y < s->borders[p].top; y++) { - memcpy(ptr + y * linesize, - ptr + s->borders[p].top * linesize, s->planewidth[p]); + memcpy(data + y * linesize, + data + s->borders[p].top * linesize, s->planewidth[p]); } - for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) { - memcpy(ptr + y * linesize, - ptr + (s->planeheight[p] - s->borders[p].bottom - 1) * linesize, + memcpy(data + y * linesize, + data + (s->planeheight[p] - s->borders[p].bottom - 1) * linesize, s->planewidth[p]); } } @@ -117,29 +118,29 @@ int p, y, x; for (p = 0; p < s->nb_planes; p++) { - uint16_t *ptr = (uint16_t *)frame->data[p]; - int linesize = frame->linesize[p] / 2; + uint16_t *data = (uint16_t *)frame->data[p]; + int linesize = frame->linesize[p] / sizeof(uint16_t); + /* fill left and right borders from top to bottom border */ for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { for (x = 0; x < s->borders[p].left; x++) { - ptr[y * linesize + x] = *(ptr + y * linesize + s->borders[p].left); + data[y * linesize + x] = *(data + y * linesize + s->borders[p].left); } - for (x = 0; x < s->borders[p].right; x++) { - ptr[y * linesize + s->planewidth[p] - s->borders[p].right + x] = - *(ptr + y * linesize + s->planewidth[p] - s->borders[p].right - 1); + data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = + *(data + y * linesize + s->planewidth[p] - s->borders[p].right - 1); } } + /* fill top and bottom borders */ for (y = 0; y < s->borders[p].top; y++) { - memcpy(ptr + y * linesize, - ptr + s->borders[p].top * linesize, s->planewidth[p] * 2); + memcpy(data + y * linesize, + data + s->borders[p].top * linesize, s->planewidth[p] * sizeof(uint16_t)); } - for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) { - memcpy(ptr + y * linesize, - ptr + (s->planeheight[p] - s->borders[p].bottom - 1) * linesize, - s->planewidth[p] * 2); + memcpy(data + y * linesize, + data + (s->planeheight[p] - s->borders[p].bottom - 1) * linesize, + s->planewidth[p] * sizeof(uint16_t)); } } } @@ -149,29 +150,29 @@ int p, y, x; for (p = 0; p < s->nb_planes; p++) { - uint8_t *ptr = frame->data[p]; + uint8_t *data = frame->data[p]; int linesize = frame->linesize[p]; + /* fill left and right borders from top to bottom border */ for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { for (x = 0; x < s->borders[p].left; x++) { - ptr[y * linesize + x] = ptr[y * linesize + s->borders[p].left * 2 - 1 - x]; + data[y * linesize + x] = data[y * linesize + s->borders[p].left * 2 - 1 - x]; } - for (x = 0; x < s->borders[p].right; x++) { - ptr[y * linesize + s->planewidth[p] - s->borders[p].right + x] = - ptr[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x]; + data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = + data[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x]; } } + /* fill top and bottom borders */ for (y = 0; y < s->borders[p].top; y++) { - memcpy(ptr + y * linesize, - ptr + (s->borders[p].top * 2 - 1 - y) * linesize, + memcpy(data + y * linesize, + data + (s->borders[p].top * 2 - 1 - y) * linesize, s->planewidth[p]); } - for (y = 0; y < s->borders[p].bottom; y++) { - memcpy(ptr + (s->planeheight[p] - s->borders[p].bottom + y) * linesize, - ptr + (s->planeheight[p] - s->borders[p].bottom - 1 - y) * linesize, + memcpy(data + (s->planeheight[p] - s->borders[p].bottom + y) * linesize, + data + (s->planeheight[p] - s->borders[p].bottom - 1 - y) * linesize, s->planewidth[p]); } } @@ -182,30 +183,31 @@ int p, y, x; for (p = 0; p < s->nb_planes; p++) { - uint16_t *ptr = (uint16_t *)frame->data[p]; - int linesize = frame->linesize[p] / 2; + uint16_t *data = (uint16_t *)frame->data[p]; + int linesize = frame->linesize[p] / sizeof(uint16_t); + /* fill left and right borders from top to bottom border */ for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { for (x = 0; x < s->borders[p].left; x++) { - ptr[y * linesize + x] = ptr[y * linesize + s->borders[p].left * 2 - 1 - x]; + data[y * linesize + x] = data[y * linesize + s->borders[p].left * 2 - 1 - x]; } for (x = 0; x < s->borders[p].right; x++) { - ptr[y * linesize + s->planewidth[p] - s->borders[p].right + x] = - ptr[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x]; + data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = + data[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x]; } } + /* fill top and bottom borders */ for (y = 0; y < s->borders[p].top; y++) { - memcpy(ptr + y * linesize, - ptr + (s->borders[p].top * 2 - 1 - y) * linesize, - s->planewidth[p] * 2); + memcpy(data + y * linesize, + data + (s->borders[p].top * 2 - 1 - y) * linesize, + s->planewidth[p] * sizeof(uint16_t)); } - for (y = 0; y < s->borders[p].bottom; y++) { - memcpy(ptr + (s->planeheight[p] - s->borders[p].bottom + y) * linesize, - ptr + (s->planeheight[p] - s->borders[p].bottom - 1 - y) * linesize, - s->planewidth[p] * 2); + memcpy(data + (s->planeheight[p] - s->borders[p].bottom + y) * linesize, + data + (s->planeheight[p] - s->borders[p].bottom - 1 - y) * linesize, + s->planewidth[p] * sizeof(uint16_t)); } } } @@ -215,22 +217,23 @@ int p, y; for (p = 0; p < s->nb_planes; p++) { - uint8_t *ptr = frame->data[p]; + uint8_t *data = frame->data[p]; uint8_t fill = s->fill[p]; int linesize = frame->linesize[p]; + /* fill left and right borders from top to bottom border */ for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - memset(ptr + y * linesize, fill, s->borders[p].left); - memset(ptr + y * linesize + s->planewidth[p] - s->borders[p].right, fill, + memset(data + y * linesize, fill, s->borders[p].left); + memset(data + y * linesize + s->planewidth[p] - s->borders[p].right, fill, s->borders[p].right); } + /* fill top and bottom borders */ for (y = 0; y < s->borders[p].top; y++) { - memset(ptr + y * linesize, fill, s->planewidth[p]); + memset(data + y * linesize, fill, s->planewidth[p]); } - for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) { - memset(ptr + y * linesize, fill, s->planewidth[p]); + memset(data + y * linesize, fill, s->planewidth[p]); } } } @@ -240,29 +243,29 @@ int p, y, x; for (p = 0; p < s->nb_planes; p++) { - uint16_t *ptr = (uint16_t *)frame->data[p]; + uint16_t *data = (uint16_t *)frame->data[p]; uint16_t fill = s->fill[p] << (s->depth - 8); - int linesize = frame->linesize[p] / 2; + int linesize = frame->linesize[p] / sizeof(uint16_t); + /* fill left and right borders from top to bottom border */ for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { for (x = 0; x < s->borders[p].left; x++) { - ptr[y * linesize + x] = fill; + data[y * linesize + x] = fill; } - for (x = 0; x < s->borders[p].right; x++) { - ptr[y * linesize + s->planewidth[p] - s->borders[p].right + x] = fill; + data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = fill; } } + /* fill top and bottom borders */ for (y = 0; y < s->borders[p].top; y++) { for (x = 0; x < s->planewidth[p]; x++) { - ptr[y * linesize + x] = fill; + data[y * linesize + x] = fill; } } - for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) { for (x = 0; x < s->planewidth[p]; x++) { - ptr[y * linesize + x] = fill; + data[y * linesize + x] = fill; } } } @@ -307,23 +310,23 @@ s->borders[2].bottom = s->bottom >> desc->log2_chroma_h; if (inlink->w < s->left + s->right || - inlink->w <= s->left || - inlink->w <= s->right || - inlink->h < s->top + s->bottom || - inlink->h <= s->top || - inlink->h <= s->bottom || - inlink->w < s->left * 2 || - inlink->w < s->right * 2 || - inlink->h < s->top * 2 || - inlink->h < s->bottom * 2) { + inlink->w <= s->left || + inlink->w <= s->right || + inlink->h < s->top + s->bottom || + inlink->h <= s->top || + inlink->h <= s->bottom || + inlink->w < s->left * 2 || + inlink->w < s->right * 2 || + inlink->h < s->top * 2 || + inlink->h < s->bottom * 2) { av_log(ctx, AV_LOG_ERROR, "Borders are bigger than input frame size.\n"); return AVERROR(EINVAL); } switch (s->mode) { - case FM_SMEAR: s->fillborders = s->depth <= 8 ? smear_borders8 : smear_borders16; break; - case FM_MIRROR: s->fillborders = s->depth <= 8 ? mirror_borders8 : mirror_borders16; break; - case FM_FIXED: s->fillborders = s->depth <= 8 ? fixed_borders8 : fixed_borders16; break; + case FM_SMEAR: s->fillborders = s->depth <= 8 ? smear_borders8 : smear_borders16; break; + case FM_MIRROR: s->fillborders = s->depth <= 8 ? mirror_borders8 : mirror_borders16; break; + case FM_FIXED: s->fillborders = s->depth <= 8 ? fixed_borders8 : fixed_borders16; break; } s->yuv_color[Y] = RGB_TO_Y_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B]); @@ -336,7 +339,7 @@ int i; ff_fill_rgba_map(rgba_map, inlink->format); - for (i = 0; i < 4; i++) + for (i = 0; i < sizeof(rgba_map); i++) s->fill[rgba_map[i]] = s->rgba_color[i]; } else { memcpy(s->fill, s->yuv_color, sizeof(s->yuv_color));
>From 263c3631c529dc0b3ff6653c850888af19f1e39a Mon Sep 17 00:00:00 2001 From: Ulf Zibis <ulf.zi...@cosoco.de> Date: 14.03.2019, 19:59:08 avfilter/fillborders: avoid needless calculations for performance diff --git a/libavfilter/vf_fillborders.c b/libavfilter/vf_fillborders.c index 820aa2d..9492d53 100644 --- a/libavfilter/vf_fillborders.c +++ b/libavfilter/vf_fillborders.c @@ -91,14 +91,16 @@ int linesize = frame->linesize[p]; /* fill left and right borders from top to bottom border */ - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - memset(data + y * linesize, - *(data + y * linesize + s->borders[p].left), - s->borders[p].left); - memset(data + y * linesize + s->planewidth[p] - s->borders[p].right, - *(data + y * linesize + s->planewidth[p] - s->borders[p].right - 1), - s->borders[p].right); - } + if (s->borders[p].left != 0 || + s->borders[p].right != s->planewidth[p]) // in case skip for performance + for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { + memset(data + y * linesize, + *(data + y * linesize + s->borders[p].left), + s->borders[p].left); + memset(data + y * linesize + s->planewidth[p] - s->borders[p].right, + *(data + y * linesize + s->planewidth[p] - s->borders[p].right - 1), + s->borders[p].right); + } /* fill top and bottom borders */ for (y = 0; y < s->borders[p].top; y++) { @@ -122,15 +124,17 @@ int linesize = frame->linesize[p] / sizeof(uint16_t); /* fill left and right borders from top to bottom border */ - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - for (x = 0; x < s->borders[p].left; x++) { - data[y * linesize + x] = *(data + y * linesize + s->borders[p].left); + if (s->borders[p].left != 0 || + s->borders[p].right != s->planewidth[p]) // in case skip for performance + for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { + for (x = 0; x < s->borders[p].left; x++) { + data[y * linesize + x] = *(data + y * linesize + s->borders[p].left); + } + for (x = 0; x < s->borders[p].right; x++) { + data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = + *(data + y * linesize + s->planewidth[p] - s->borders[p].right - 1); + } } - for (x = 0; x < s->borders[p].right; x++) { - data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = - *(data + y * linesize + s->planewidth[p] - s->borders[p].right - 1); - } - } /* fill top and bottom borders */ for (y = 0; y < s->borders[p].top; y++) { @@ -154,15 +158,17 @@ int linesize = frame->linesize[p]; /* fill left and right borders from top to bottom border */ - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - for (x = 0; x < s->borders[p].left; x++) { - data[y * linesize + x] = data[y * linesize + s->borders[p].left * 2 - 1 - x]; + if (s->borders[p].left != 0 || + s->borders[p].right != s->planewidth[p]) // in case skip for performance + for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { + for (x = 0; x < s->borders[p].left; x++) { + data[y * linesize + x] = data[y * linesize + s->borders[p].left * 2 - 1 - x]; + } + for (x = 0; x < s->borders[p].right; x++) { + data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = + data[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x]; + } } - for (x = 0; x < s->borders[p].right; x++) { - data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = - data[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x]; - } - } /* fill top and bottom borders */ for (y = 0; y < s->borders[p].top; y++) { @@ -187,16 +193,18 @@ int linesize = frame->linesize[p] / sizeof(uint16_t); /* fill left and right borders from top to bottom border */ - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - for (x = 0; x < s->borders[p].left; x++) { - data[y * linesize + x] = data[y * linesize + s->borders[p].left * 2 - 1 - x]; - } + if (s->borders[p].left != 0 || + s->borders[p].right != s->planewidth[p]) // in case skip for performance + for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { + for (x = 0; x < s->borders[p].left; x++) { + data[y * linesize + x] = data[y * linesize + s->borders[p].left * 2 - 1 - x]; + } - for (x = 0; x < s->borders[p].right; x++) { - data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = - data[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x]; + for (x = 0; x < s->borders[p].right; x++) { + data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = + data[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x]; + } } - } /* fill top and bottom borders */ for (y = 0; y < s->borders[p].top; y++) { @@ -222,11 +230,13 @@ int linesize = frame->linesize[p]; /* fill left and right borders from top to bottom border */ - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - memset(data + y * linesize, fill, s->borders[p].left); - memset(data + y * linesize + s->planewidth[p] - s->borders[p].right, fill, - s->borders[p].right); - } + if (s->borders[p].left != 0 || + s->borders[p].right != s->planewidth[p]) // in case skip for performance + for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { + memset(data + y * linesize, fill, s->borders[p].left); + memset(data + y * linesize + s->planewidth[p] - s->borders[p].right, fill, + s->borders[p].right); + } /* fill top and bottom borders */ for (y = 0; y < s->borders[p].top; y++) { @@ -248,14 +258,16 @@ int linesize = frame->linesize[p] / sizeof(uint16_t); /* fill left and right borders from top to bottom border */ - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - for (x = 0; x < s->borders[p].left; x++) { - data[y * linesize + x] = fill; + if (s->borders[p].left != 0 || + s->borders[p].right != s->planewidth[p]) // in case skip for performance + for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { + for (x = 0; x < s->borders[p].left; x++) { + data[y * linesize + x] = fill; + } + for (x = 0; x < s->borders[p].right; x++) { + data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = fill; + } } - for (x = 0; x < s->borders[p].right; x++) { - data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = fill; - } - } /* fill top and bottom borders */ for (y = 0; y < s->borders[p].top; y++) { @@ -286,6 +298,20 @@ FillBordersContext *s = ctx->priv; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); + if (inlink->w < s->left + s->right || + inlink->w <= s->left || + inlink->w <= s->right || + inlink->h < s->top + s->bottom || + inlink->h <= s->top || + inlink->h <= s->bottom || + inlink->w < s->left * 2 || + inlink->w < s->right * 2 || + inlink->h < s->top * 2 || + inlink->h < s->bottom * 2) { + av_log(ctx, AV_LOG_ERROR, "Borders are bigger than input frame size.\n"); + return AVERROR(EINVAL); + } + s->nb_planes = desc->nb_components; s->depth = desc->comp[0].depth; @@ -309,40 +335,23 @@ s->borders[2].top = s->top >> desc->log2_chroma_h; s->borders[2].bottom = s->bottom >> desc->log2_chroma_h; - if (inlink->w < s->left + s->right || - inlink->w <= s->left || - inlink->w <= s->right || - inlink->h < s->top + s->bottom || - inlink->h <= s->top || - inlink->h <= s->bottom || - inlink->w < s->left * 2 || - inlink->w < s->right * 2 || - inlink->h < s->top * 2 || - inlink->h < s->bottom * 2) { - av_log(ctx, AV_LOG_ERROR, "Borders are bigger than input frame size.\n"); - return AVERROR(EINVAL); - } - switch (s->mode) { case FM_SMEAR: s->fillborders = s->depth <= 8 ? smear_borders8 : smear_borders16; break; case FM_MIRROR: s->fillborders = s->depth <= 8 ? mirror_borders8 : mirror_borders16; break; - case FM_FIXED: s->fillborders = s->depth <= 8 ? fixed_borders8 : fixed_borders16; break; - } - - s->yuv_color[Y] = RGB_TO_Y_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B]); - s->yuv_color[U] = RGB_TO_U_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0); - s->yuv_color[V] = RGB_TO_V_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0); - s->yuv_color[A] = s->rgba_color[A]; - - if (desc->flags & AV_PIX_FMT_FLAG_RGB) { - uint8_t rgba_map[4]; - int i; - - ff_fill_rgba_map(rgba_map, inlink->format); - for (i = 0; i < sizeof(rgba_map); i++) - s->fill[rgba_map[i]] = s->rgba_color[i]; - } else { - memcpy(s->fill, s->yuv_color, sizeof(s->yuv_color)); + case FM_FIXED: s->fillborders = s->depth <= 8 ? fixed_borders8 : fixed_borders16; + if (desc->flags & AV_PIX_FMT_FLAG_RGB) { + uint8_t rgba_map[4]; + int i; + ff_fill_rgba_map(rgba_map, inlink->format); + for (i = 0; i < sizeof(rgba_map); i++) + s->fill[rgba_map[i]] = s->rgba_color[i]; + } else { + s->yuv_color[Y] = RGB_TO_Y_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B]); + s->yuv_color[U] = RGB_TO_U_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0); + s->yuv_color[V] = RGB_TO_V_CCIR(s->rgba_color[R], s->rgba_color[G], s->rgba_color[B], 0); + s->yuv_color[A] = s->rgba_color[A]; + memcpy(s->fill, s->yuv_color, sizeof(s->yuv_color)); + } break; } return 0;
>From 52e6ab2144751f909e81a36e97085d09da7f3c18 Mon Sep 17 00:00:00 2001 From: Ulf Zibis <ulf.zi...@cosoco.de> Date: 14.03.2019, 22:44:04 avfilter/fillborders: enhanced readability; side effect: better performance by less indirections in for loops diff --git a/libavfilter/vf_fillborders.c b/libavfilter/vf_fillborders.c index 9492d53..393ad7d 100644 --- a/libavfilter/vf_fillborders.c +++ b/libavfilter/vf_fillborders.c @@ -89,28 +89,30 @@ for (p = 0; p < s->nb_planes; p++) { uint8_t *data = frame->data[p]; int linesize = frame->linesize[p]; + int width = s->planewidth[p]; + int height = s->planeheight[p]; + int left = s->borders[p].left; + int right = s->borders[p].right; + int top = s->borders[p].top; + int bottom = s->borders[p].bottom; /* fill left and right borders from top to bottom border */ - if (s->borders[p].left != 0 || - s->borders[p].right != s->planewidth[p]) // in case skip for performance - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { + if (left != 0 || right != width) // in case skip for performance + for (y = top; y < height - bottom; y++) { memset(data + y * linesize, - *(data + y * linesize + s->borders[p].left), - s->borders[p].left); - memset(data + y * linesize + s->planewidth[p] - s->borders[p].right, - *(data + y * linesize + s->planewidth[p] - s->borders[p].right - 1), - s->borders[p].right); + *(data + y * linesize + left), left); + memset(data + y * linesize + width - right, + *(data + y * linesize + width - right - 1), right); } /* fill top and bottom borders */ - for (y = 0; y < s->borders[p].top; y++) { + for (y = 0; y < top; y++) { memcpy(data + y * linesize, - data + s->borders[p].top * linesize, s->planewidth[p]); + data + top * linesize, width); } - for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) { + for (y = height - bottom; y < height; y++) { memcpy(data + y * linesize, - data + (s->planeheight[p] - s->borders[p].bottom - 1) * linesize, - s->planewidth[p]); + data + (height - bottom - 1) * linesize, width); } } } @@ -122,29 +124,34 @@ for (p = 0; p < s->nb_planes; p++) { uint16_t *data = (uint16_t *)frame->data[p]; int linesize = frame->linesize[p] / sizeof(uint16_t); + int width = s->planewidth[p]; + int height = s->planeheight[p]; + int left = s->borders[p].left; + int right = s->borders[p].right; + int top = s->borders[p].top; + int bottom = s->borders[p].bottom; /* fill left and right borders from top to bottom border */ - if (s->borders[p].left != 0 || - s->borders[p].right != s->planewidth[p]) // in case skip for performance - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - for (x = 0; x < s->borders[p].left; x++) { - data[y * linesize + x] = *(data + y * linesize + s->borders[p].left); + if (left != 0 || right != width) // in case skip for performance + for (y = top; y < height - bottom; y++) { + for (x = 0; x < left; x++) { + data[y * linesize + x] = *(data + y * linesize + left); } - for (x = 0; x < s->borders[p].right; x++) { - data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = - *(data + y * linesize + s->planewidth[p] - s->borders[p].right - 1); + for (x = 0; x < right; x++) { + data[y * linesize + width - right + x] = + *(data + y * linesize + width - right - 1); } } /* fill top and bottom borders */ - for (y = 0; y < s->borders[p].top; y++) { + for (y = 0; y < top; y++) { memcpy(data + y * linesize, - data + s->borders[p].top * linesize, s->planewidth[p] * sizeof(uint16_t)); + data + top * linesize, width * sizeof(uint16_t)); } - for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) { + for (y = height - bottom; y < height; y++) { memcpy(data + y * linesize, - data + (s->planeheight[p] - s->borders[p].bottom - 1) * linesize, - s->planewidth[p] * sizeof(uint16_t)); + data + (height - bottom - 1) * linesize, + width * sizeof(uint16_t)); } } } @@ -156,30 +163,33 @@ for (p = 0; p < s->nb_planes; p++) { uint8_t *data = frame->data[p]; int linesize = frame->linesize[p]; + int width = s->planewidth[p]; + int height = s->planeheight[p]; + int left = s->borders[p].left; + int right = s->borders[p].right; + int top = s->borders[p].top; + int bottom = s->borders[p].bottom; /* fill left and right borders from top to bottom border */ - if (s->borders[p].left != 0 || - s->borders[p].right != s->planewidth[p]) // in case skip for performance - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - for (x = 0; x < s->borders[p].left; x++) { - data[y * linesize + x] = data[y * linesize + s->borders[p].left * 2 - 1 - x]; + if (left != 0 || right != width) // in case skip for performance + for (y = top; y < height - bottom; y++) { + for (x = 0; x < left; x++) { + data[y * linesize + x] = data[y * linesize + left * 2 - 1 - x]; } - for (x = 0; x < s->borders[p].right; x++) { - data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = - data[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x]; + for (x = 0; x < right; x++) { + data[y * linesize + width - right + x] = + data[y * linesize + width - right - 1 - x]; } } /* fill top and bottom borders */ - for (y = 0; y < s->borders[p].top; y++) { + for (y = 0; y < top; y++) { memcpy(data + y * linesize, - data + (s->borders[p].top * 2 - 1 - y) * linesize, - s->planewidth[p]); + data + (top * 2 - 1 - y) * linesize, width); } - for (y = 0; y < s->borders[p].bottom; y++) { - memcpy(data + (s->planeheight[p] - s->borders[p].bottom + y) * linesize, - data + (s->planeheight[p] - s->borders[p].bottom - 1 - y) * linesize, - s->planewidth[p]); + for (y = 0; y < bottom; y++) { + memcpy(data + (height - bottom + y) * linesize, + data + (height - bottom - 1 - y) * linesize, width); } } } @@ -191,31 +201,36 @@ for (p = 0; p < s->nb_planes; p++) { uint16_t *data = (uint16_t *)frame->data[p]; int linesize = frame->linesize[p] / sizeof(uint16_t); + int width = s->planewidth[p]; + int height = s->planeheight[p]; + int left = s->borders[p].left; + int right = s->borders[p].right; + int top = s->borders[p].top; + int bottom = s->borders[p].bottom; /* fill left and right borders from top to bottom border */ - if (s->borders[p].left != 0 || - s->borders[p].right != s->planewidth[p]) // in case skip for performance - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - for (x = 0; x < s->borders[p].left; x++) { - data[y * linesize + x] = data[y * linesize + s->borders[p].left * 2 - 1 - x]; + if (left != 0 || right != width) // in case skip for performance + for (y = top; y < height - bottom; y++) { + for (x = 0; x < left; x++) { + data[y * linesize + x] = data[y * linesize + left * 2 - 1 - x]; } - for (x = 0; x < s->borders[p].right; x++) { - data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = - data[y * linesize + s->planewidth[p] - s->borders[p].right - 1 - x]; + for (x = 0; x < right; x++) { + data[y * linesize + width - right + x] = + data[y * linesize + width - right - 1 - x]; } } /* fill top and bottom borders */ - for (y = 0; y < s->borders[p].top; y++) { + for (y = 0; y < top; y++) { memcpy(data + y * linesize, - data + (s->borders[p].top * 2 - 1 - y) * linesize, - s->planewidth[p] * sizeof(uint16_t)); + data + (top * 2 - 1 - y) * linesize, + width * sizeof(uint16_t)); } - for (y = 0; y < s->borders[p].bottom; y++) { - memcpy(data + (s->planeheight[p] - s->borders[p].bottom + y) * linesize, - data + (s->planeheight[p] - s->borders[p].bottom - 1 - y) * linesize, - s->planewidth[p] * sizeof(uint16_t)); + for (y = 0; y < bottom; y++) { + memcpy(data + (height - bottom + y) * linesize, + data + (height - bottom - 1 - y) * linesize, + width * sizeof(uint16_t)); } } } @@ -226,24 +241,28 @@ for (p = 0; p < s->nb_planes; p++) { uint8_t *data = frame->data[p]; - uint8_t fill = s->fill[p]; int linesize = frame->linesize[p]; + int width = s->planewidth[p]; + int height = s->planeheight[p]; + int left = s->borders[p].left; + int right = s->borders[p].right; + int top = s->borders[p].top; + int bottom = s->borders[p].bottom; + uint8_t fill = s->fill[p]; /* fill left and right borders from top to bottom border */ - if (s->borders[p].left != 0 || - s->borders[p].right != s->planewidth[p]) // in case skip for performance - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - memset(data + y * linesize, fill, s->borders[p].left); - memset(data + y * linesize + s->planewidth[p] - s->borders[p].right, fill, - s->borders[p].right); + if (left != 0 || right != width) // in case skip for performance + for (y = top; y < height - bottom; y++) { + memset(data + y * linesize, fill, left); + memset(data + y * linesize + width - right, fill, right); } /* fill top and bottom borders */ - for (y = 0; y < s->borders[p].top; y++) { - memset(data + y * linesize, fill, s->planewidth[p]); + for (y = 0; y < top; y++) { + memset(data + y * linesize, fill, width); } - for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) { - memset(data + y * linesize, fill, s->planewidth[p]); + for (y = height - bottom; y < height; y++) { + memset(data + y * linesize, fill, width); } } } @@ -254,29 +273,34 @@ for (p = 0; p < s->nb_planes; p++) { uint16_t *data = (uint16_t *)frame->data[p]; - uint16_t fill = s->fill[p] << (s->depth - 8); int linesize = frame->linesize[p] / sizeof(uint16_t); + int width = s->planewidth[p]; + int height = s->planeheight[p]; + int left = s->borders[p].left; + int right = s->borders[p].right; + int top = s->borders[p].top; + int bottom = s->borders[p].bottom; + uint16_t fill = s->fill[p] << (s->depth - 8); /* fill left and right borders from top to bottom border */ - if (s->borders[p].left != 0 || - s->borders[p].right != s->planewidth[p]) // in case skip for performance - for (y = s->borders[p].top; y < s->planeheight[p] - s->borders[p].bottom; y++) { - for (x = 0; x < s->borders[p].left; x++) { + if (left != 0 || right != width) // in case skip for performance + for (y = top; y < height - bottom; y++) { + for (x = 0; x < left; x++) { data[y * linesize + x] = fill; } - for (x = 0; x < s->borders[p].right; x++) { - data[y * linesize + s->planewidth[p] - s->borders[p].right + x] = fill; + for (x = 0; x < right; x++) { + data[y * linesize + width - right + x] = fill; } } /* fill top and bottom borders */ - for (y = 0; y < s->borders[p].top; y++) { - for (x = 0; x < s->planewidth[p]; x++) { + for (y = 0; y < top; y++) { + for (x = 0; x < width; x++) { data[y * linesize + x] = fill; } } - for (y = s->planeheight[p] - s->borders[p].bottom; y < s->planeheight[p]; y++) { - for (x = 0; x < s->planewidth[p]; x++) { + for (y = height - bottom; y < height; y++) { + for (x = 0; x < width; x++) { data[y * linesize + x] = fill; } }
>From 663987b6391301b963714eb3a660642d46656ed9 Mon Sep 17 00:00:00 2001 From: Ulf Zibis <ulf.zi...@cosoco.de> Date: 14.03.2019, 23:27:43 avfilter/fillborders: enhance performance by - less calculations in inner for loops - more use of memcpy() side effect: again enhanced readability; diff --git a/libavfilter/vf_fillborders.c b/libavfilter/vf_fillborders.c index 393ad7d..3d58f9e 100644 --- a/libavfilter/vf_fillborders.c +++ b/libavfilter/vf_fillborders.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <sys/param.h> #include "libavutil/colorspace.h" #include "libavutil/common.h" #include "libavutil/opt.h" @@ -84,226 +85,176 @@ static void smear_borders8(FillBordersContext *s, AVFrame *frame) { - int p, y; - - for (p = 0; p < s->nb_planes; p++) { + for (int p = 0; p < s->nb_planes; p++) { uint8_t *data = frame->data[p]; - int linesize = frame->linesize[p]; + int lz = frame->linesize[p]; int width = s->planewidth[p]; - int height = s->planeheight[p]; + int height = s->planeheight[p] * lz; int left = s->borders[p].left; - int right = s->borders[p].right; - int top = s->borders[p].top; - int bottom = s->borders[p].bottom; + int right = width - s->borders[p].right; + int top = s->borders[p].top * lz; + int bottom = height - s->borders[p].bottom * lz; /* fill left and right borders from top to bottom border */ if (left != 0 || right != width) // in case skip for performance - for (y = top; y < height - bottom; y++) { - memset(data + y * linesize, - *(data + y * linesize + left), left); - memset(data + y * linesize + width - right, - *(data + y * linesize + width - right - 1), right); + for (int y = top; y < bottom; y += lz) { + memset(data + y, *(data + y + left), left); + memset(data + y + right, *(data + y + right - 1), width - right); } /* fill top and bottom borders */ - for (y = 0; y < top; y++) { - memcpy(data + y * linesize, - data + top * linesize, width); - } - for (y = height - bottom; y < height; y++) { - memcpy(data + y * linesize, - data + (height - bottom - 1) * linesize, width); - } + for (uint8_t *y = data + top; y > data; ) + memcpy(y -= lz, data + top, width); + for (uint8_t *y = data + bottom; y < data + height; y += lz) + memcpy(y, data + bottom - lz, width); } } static void smear_borders16(FillBordersContext *s, AVFrame *frame) { - int p, y, x; - - for (p = 0; p < s->nb_planes; p++) { + for (int p = 0; p < s->nb_planes; p++) { uint16_t *data = (uint16_t *)frame->data[p]; - int linesize = frame->linesize[p] / sizeof(uint16_t); + int lz = frame->linesize[p] / sizeof(uint16_t); int width = s->planewidth[p]; - int height = s->planeheight[p]; + int height = s->planeheight[p] * lz; int left = s->borders[p].left; - int right = s->borders[p].right; - int top = s->borders[p].top; - int bottom = s->borders[p].bottom; + int right = width - s->borders[p].right; + int top = s->borders[p].top * lz; + int bottom = height - s->borders[p].bottom * lz; /* fill left and right borders from top to bottom border */ if (left != 0 || right != width) // in case skip for performance - for (y = top; y < height - bottom; y++) { - for (x = 0; x < left; x++) { - data[y * linesize + x] = *(data + y * linesize + left); - } - for (x = 0; x < right; x++) { - data[y * linesize + width - right + x] = - *(data + y * linesize + width - right - 1); - } + for (int y = top; y < bottom; y += lz) { + for (int x = left; x >= 0; x--) + data[y + x] = data[y + left]; + for (int x = right; x < width; x++) + data[y + x] = data[y + right - 1]; } /* fill top and bottom borders */ - for (y = 0; y < top; y++) { - memcpy(data + y * linesize, - data + top * linesize, width * sizeof(uint16_t)); - } - for (y = height - bottom; y < height; y++) { - memcpy(data + y * linesize, - data + (height - bottom - 1) * linesize, - width * sizeof(uint16_t)); - } + for (uint16_t *y = data + top; y > data; ) + memcpy(y -= lz, data + top, width * sizeof(uint16_t)); + for (uint16_t *y = data + bottom; y < data + height; y += lz) + memcpy(y, data + bottom - lz, width * sizeof(uint16_t)); } } static void mirror_borders8(FillBordersContext *s, AVFrame *frame) { - int p, y, x; - - for (p = 0; p < s->nb_planes; p++) { + for (int p = 0; p < s->nb_planes; p++) { uint8_t *data = frame->data[p]; - int linesize = frame->linesize[p]; + int lz = frame->linesize[p]; int width = s->planewidth[p]; - int height = s->planeheight[p]; + int height = s->planeheight[p] * lz; int left = s->borders[p].left; - int right = s->borders[p].right; - int top = s->borders[p].top; - int bottom = s->borders[p].bottom; + int right = width - s->borders[p].right; + int top = s->borders[p].top * lz; + int bottom = height - s->borders[p].bottom * lz; /* fill left and right borders from top to bottom border */ if (left != 0 || right != width) // in case skip for performance - for (y = top; y < height - bottom; y++) { - for (x = 0; x < left; x++) { - data[y * linesize + x] = data[y * linesize + left * 2 - 1 - x]; - } - for (x = 0; x < right; x++) { - data[y * linesize + width - right + x] = - data[y * linesize + width - right - 1 - x]; - } + for (int y = top; y < bottom; y += lz) + for (int x = left, x2 = x; x >= 0; x--) { + data[y + x] = data[y + x2++]; + for (int x = right, x2 = x; x < width; x++) + data[y + x] = data[y + --x2]; } /* fill top and bottom borders */ - for (y = 0; y < top; y++) { - memcpy(data + y * linesize, - data + (top * 2 - 1 - y) * linesize, width); - } - for (y = 0; y < bottom; y++) { - memcpy(data + (height - bottom + y) * linesize, - data + (height - bottom - 1 - y) * linesize, width); - } + for (uint8_t *y = data + top, *y2 = y; y > data; y2 += lz) + memcpy(y -= lz, y2, width); + for (uint8_t *y = data + bottom, *y2 = y; y < data + height; y += lz) + memcpy(y, y2 -= lz, width); } } static void mirror_borders16(FillBordersContext *s, AVFrame *frame) { - int p, y, x; - - for (p = 0; p < s->nb_planes; p++) { + for (int p = 0; p < s->nb_planes; p++) { uint16_t *data = (uint16_t *)frame->data[p]; - int linesize = frame->linesize[p] / sizeof(uint16_t); + int lz = frame->linesize[p] / sizeof(uint16_t); int width = s->planewidth[p]; - int height = s->planeheight[p]; + int height = s->planeheight[p] * lz; int left = s->borders[p].left; - int right = s->borders[p].right; - int top = s->borders[p].top; - int bottom = s->borders[p].bottom; + int right = width - s->borders[p].right; + int top = s->borders[p].top * lz; + int bottom = height - s->borders[p].bottom * lz; /* fill left and right borders from top to bottom border */ if (left != 0 || right != width) // in case skip for performance - for (y = top; y < height - bottom; y++) { - for (x = 0; x < left; x++) { - data[y * linesize + x] = data[y * linesize + left * 2 - 1 - x]; - } - - for (x = 0; x < right; x++) { - data[y * linesize + width - right + x] = - data[y * linesize + width - right - 1 - x]; - } + for (int y = top; y < bottom; y += lz) { + for (int x = left, x2 = x; x >= 0; x--) + data[y + x] = data[y + x2++]; + for (int x = right, x2 = x; x < width; x++) + data[y + x] = data[y + --x2]; } /* fill top and bottom borders */ - for (y = 0; y < top; y++) { - memcpy(data + y * linesize, - data + (top * 2 - 1 - y) * linesize, - width * sizeof(uint16_t)); - } - for (y = 0; y < bottom; y++) { - memcpy(data + (height - bottom + y) * linesize, - data + (height - bottom - 1 - y) * linesize, - width * sizeof(uint16_t)); - } + for (uint16_t *y = data + top, *y2 = y; y > data; y2 += lz) + memcpy(y -= lz, y2, width * sizeof(uint16_t)); + for (uint16_t *y = data + bottom, *y2 = y; y < data + height; y += lz) + memcpy(y, y2 -= lz, width * sizeof(uint16_t)); } } static void fixed_borders8(FillBordersContext *s, AVFrame *frame) { - int p, y; - - for (p = 0; p < s->nb_planes; p++) { + for (int p = 0; p < s->nb_planes; p++) { uint8_t *data = frame->data[p]; - int linesize = frame->linesize[p]; + int lz = frame->linesize[p]; int width = s->planewidth[p]; - int height = s->planeheight[p]; + int height = s->planeheight[p] * lz; int left = s->borders[p].left; - int right = s->borders[p].right; - int top = s->borders[p].top; - int bottom = s->borders[p].bottom; + int right = width - s->borders[p].right; + int top = s->borders[p].top * lz; + int bottom = height - s->borders[p].bottom * lz; uint8_t fill = s->fill[p]; /* fill left and right borders from top to bottom border */ if (left != 0 || right != width) // in case skip for performance - for (y = top; y < height - bottom; y++) { - memset(data + y * linesize, fill, left); - memset(data + y * linesize + width - right, fill, right); + for (int y = top; y < bottom; y += lz) { + memset(data + y, fill, left); + memset(data + y + right, fill, width - right); } /* fill top and bottom borders */ - for (y = 0; y < top; y++) { - memset(data + y * linesize, fill, width); - } - for (y = height - bottom; y < height; y++) { - memset(data + y * linesize, fill, width); - } + for (uint8_t *y = data + top; y > data; ) + memset(y -= lz, fill, width); + for (uint8_t *y = data + bottom; y < data + height; y += lz) + memset(y, fill, width); } } static void fixed_borders16(FillBordersContext *s, AVFrame *frame) { - int p, y, x; - - for (p = 0; p < s->nb_planes; p++) { + for (int p = 0; p < s->nb_planes; p++) { uint16_t *data = (uint16_t *)frame->data[p]; - int linesize = frame->linesize[p] / sizeof(uint16_t); + int lz = frame->linesize[p] / sizeof(uint16_t); int width = s->planewidth[p]; - int height = s->planeheight[p]; + int height = s->planeheight[p] * lz; int left = s->borders[p].left; - int right = s->borders[p].right; - int top = s->borders[p].top; - int bottom = s->borders[p].bottom; - uint16_t fill = s->fill[p] << (s->depth - 8); + int right = width - s->borders[p].right; + int top = s->borders[p].top * lz; + int bottom = height - s->borders[p].bottom * lz; + int fill_sz = MAX(MAX(left, right), top!=0 || height-bottom!=0 ? width : 0); + uint16_t *fill = av_malloc(fill_sz * sizeof(uint16_t)); + for (int i = 0; i < fill_sz; i++) + fill[i] = s->fill[p] << (s->depth - 8); /* fill left and right borders from top to bottom border */ if (left != 0 || right != width) // in case skip for performance - for (y = top; y < height - bottom; y++) { - for (x = 0; x < left; x++) { - data[y * linesize + x] = fill; - } - for (x = 0; x < right; x++) { - data[y * linesize + width - right + x] = fill; - } + for (int y = top; y < bottom; y += lz) { + memcpy(data + y, fill, left * sizeof(uint16_t)); + memcpy(data + y + right, fill, (width - right) * sizeof(uint16_t)); } /* fill top and bottom borders */ - for (y = 0; y < top; y++) { - for (x = 0; x < width; x++) { - data[y * linesize + x] = fill; - } - } - for (y = height - bottom; y < height; y++) { - for (x = 0; x < width; x++) { - data[y * linesize + x] = fill; - } - } + for (uint16_t *y = data + top; y > data; ) + memcpy(y -= lz, fill, width * sizeof(uint16_t)); + for (uint16_t *y = data + bottom; y < data + height; y += lz) + memcpy(y, fill, width * sizeof(uint16_t)); + + av_free(fill); } }
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel