From: Clément Bœsch <[email protected]> This macro is faster when shift is constant.
Signed-off-by: Vittorio Giovara <[email protected]> --- This macro is needed by the cfhd decoder and it slightly improves performance where it is used. Vittorio libavcodec/ffv1dec.c | 4 ++-- libavcodec/ffv1enc.c | 4 ++-- libavcodec/mimic.c | 4 ++-- libavfilter/vf_framepack.c | 4 ++-- libavutil/common.h | 4 ++++ libavutil/frame.c | 2 +- libavutil/imgutils.c | 2 +- libswscale/rgb2rgb_template.c | 8 ++++---- libswscale/swscale.c | 6 +++--- libswscale/swscale_unscaled.c | 6 +++--- libswscale/utils.c | 10 +++++----- libswscale/x86/rgb2rgb_template.c | 8 ++++---- 12 files changed, 33 insertions(+), 29 deletions(-) diff --git a/libavcodec/ffv1dec.c b/libavcodec/ffv1dec.c index d32da60..3f93044 100644 --- a/libavcodec/ffv1dec.c +++ b/libavcodec/ffv1dec.c @@ -375,8 +375,8 @@ static int decode_slice(AVCodecContext *c, void *arg) av_assert1(width && height); if (f->colorspace == 0) { - const int chroma_width = -((-width) >> f->chroma_h_shift); - const int chroma_height = -((-height) >> f->chroma_v_shift); + const int chroma_width = FF_CEIL_RSHIFT(width, f->chroma_h_shift); + const int chroma_height = FF_CEIL_RSHIFT(height, f->chroma_v_shift); const int cx = x >> f->chroma_h_shift; const int cy = y >> f->chroma_v_shift; decode_plane(fs, p->data[0] + ps * x + y * p->linesize[0], width, diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c index 0158605..1430564 100644 --- a/libavcodec/ffv1enc.c +++ b/libavcodec/ffv1enc.c @@ -897,8 +897,8 @@ static int encode_slice(AVCodecContext *c, void *arg) } if (f->colorspace == 0) { - const int chroma_width = -((-width) >> f->chroma_h_shift); - const int chroma_height = -((-height) >> f->chroma_v_shift); + const int chroma_width = FF_CEIL_RSHIFT(width, f->chroma_h_shift); + const int chroma_height = FF_CEIL_RSHIFT(height, f->chroma_v_shift); const int cx = x >> f->chroma_h_shift; const int cy = y >> f->chroma_v_shift; diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c index b8b3285..1bc2d85 100644 --- a/libavcodec/mimic.c +++ b/libavcodec/mimic.c @@ -392,8 +392,8 @@ static int mimic_decode_frame(AVCodecContext *avctx, void *data, avctx->height = height; avctx->pix_fmt = AV_PIX_FMT_YUV420P; for (i = 0; i < 3; i++) { - ctx->num_vblocks[i] = -((-height) >> (3 + !!i)); - ctx->num_hblocks[i] = width >> (3 + !!i); + ctx->num_vblocks[i] = FF_CEIL_RSHIFT(height, 3 + !!i); + ctx->num_hblocks[i] = width >> (3 + !!i); } } else if (width != ctx->avctx->width || height != ctx->avctx->height) { avpriv_request_sample(avctx, "Resolution changing"); diff --git a/libavfilter/vf_framepack.c b/libavfilter/vf_framepack.c index f3bb6b3..631fb08 100644 --- a/libavfilter/vf_framepack.c +++ b/libavfilter/vf_framepack.c @@ -158,8 +158,8 @@ static void horizontal_frame_pack(AVFilterLink *outlink, for (plane = 0; plane < s->pix_desc->nb_components; plane++) { if (plane == 1 || plane == 2) { - length = -(-(out->width / 2) >> s->pix_desc->log2_chroma_w); - lines = -(-(out->height) >> s->pix_desc->log2_chroma_h); + length = FF_CEIL_RSHIFT(out->width / 2, s->pix_desc->log2_chroma_w); + lines = FF_CEIL_RSHIFT(out->height, s->pix_desc->log2_chroma_h); } for (i = 0; i < lines; i++) { int j; diff --git a/libavutil/common.h b/libavutil/common.h index 7a43ccf..ce13844 100644 --- a/libavutil/common.h +++ b/libavutil/common.h @@ -50,6 +50,10 @@ #define RSHIFT(a,b) ((a) > 0 ? ((a) + ((1<<(b))>>1))>>(b) : ((a) + ((1<<(b))>>1)-1)>>(b)) /* assume b>0 */ #define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b)) +/* assume a>0 and b>0 */ +#define FF_CEIL_RSHIFT(a,b) (!av_builtin_constant_p(b) ? -((-(a)) >> (b)) \ + : ((a) + (1 << (b)) - 1) >> (b)) + #define FFABS(a) ((a) >= 0 ? (a) : (-(a))) #define FFSIGN(a) ((a) > 0 ? 1 : -1) diff --git a/libavutil/frame.c b/libavutil/frame.c index e4f6ab3..009c3d5 100644 --- a/libavutil/frame.c +++ b/libavutil/frame.c @@ -111,7 +111,7 @@ static int get_video_buffer(AVFrame *frame, int align) for (i = 0; i < 4 && frame->linesize[i]; i++) { int h = frame->height; if (i == 1 || i == 2) - h = -((-h) >> desc->log2_chroma_h); + h = FF_CEIL_RSHIFT(h, desc->log2_chroma_h); frame->buf[i] = av_buffer_alloc(frame->linesize[i] * h); if (!frame->buf[i]) diff --git a/libavutil/imgutils.c b/libavutil/imgutils.c index 4fb7a9b..0985438 100644 --- a/libavutil/imgutils.c +++ b/libavutil/imgutils.c @@ -291,7 +291,7 @@ void av_image_copy(uint8_t *dst_data[4], int dst_linesizes[4], int h = height; int bwidth = av_image_get_linesize(pix_fmt, width, i); if (i == 1 || i == 2) { - h= -((-height)>>desc->log2_chroma_h); + h = FF_CEIL_RSHIFT(height, desc->log2_chroma_h); } av_image_copy_plane(dst_data[i], dst_linesizes[i], src_data[i], src_linesizes[i], diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c index 693c7f2..7a4ade9 100644 --- a/libswscale/rgb2rgb_template.c +++ b/libswscale/rgb2rgb_template.c @@ -856,7 +856,7 @@ static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth = -((-width) >> 1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y = 0; y < height; y++) { extract_even_c(src, ydst, width); @@ -876,7 +876,7 @@ static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth = -((-width) >> 1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y = 0; y < height; y++) { extract_even_c(src, ydst, width); @@ -894,7 +894,7 @@ static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth = -((-width) >> 1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y = 0; y < height; y++) { extract_even_c(src + 1, ydst, width); @@ -914,7 +914,7 @@ static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth = -((-width) >> 1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y = 0; y < height; y++) { extract_even_c(src + 1, ydst, width); diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 2f9cb1b..7a85bf7 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -384,8 +384,8 @@ static int swscale(SwsContext *c, const uint8_t *src[], yuv2packed2_fn yuv2packed2 = c->yuv2packed2; yuv2packedX_fn yuv2packedX = c->yuv2packedX; yuv2anyX_fn yuv2anyX = c->yuv2anyX; - const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample; - const int chrSrcSliceH = -((-srcSliceH) >> c->chrSrcVSubSample); + const int chrSrcSliceY = srcSliceY >> c->chrSrcVSubSample; + const int chrSrcSliceH = FF_CEIL_RSHIFT(srcSliceH, c->chrSrcVSubSample); int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat); int lastDstY; @@ -484,7 +484,7 @@ static int swscale(SwsContext *c, const uint8_t *src[], // Do we have enough lines in this slice to output the dstY line enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && - lastChrSrcY < -((-srcSliceY - srcSliceH) >> c->chrSrcVSubSample); + lastChrSrcY < FF_CEIL_RSHIFT(srcSliceY + srcSliceH, c->chrSrcVSubSample); if (!enough_lines) { lastLumSrcY = srcSliceY + srcSliceH - 1; diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c index 39ae5cf..8153d8c 100644 --- a/libswscale/swscale_unscaled.c +++ b/libswscale/swscale_unscaled.c @@ -798,9 +798,9 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t *src[], const AVPixFmtDescriptor *desc_dst = av_pix_fmt_desc_get(c->dstFormat); int plane, i, j; for (plane = 0; plane < 4; plane++) { - int length = (plane == 0 || plane == 3) ? c->srcW : -((-c->srcW ) >> c->chrDstHSubSample); - int y = (plane == 0 || plane == 3) ? srcSliceY: -((-srcSliceY) >> c->chrDstVSubSample); - int height = (plane == 0 || plane == 3) ? srcSliceH: -((-srcSliceH) >> c->chrDstVSubSample); + int length = (plane == 0 || plane == 3) ? c->srcW : FF_CEIL_RSHIFT(c->srcW, c->chrDstHSubSample); + int y = (plane == 0 || plane == 3) ? srcSliceY: FF_CEIL_RSHIFT(srcSliceY, c->chrDstVSubSample); + int height = (plane == 0 || plane == 3) ? srcSliceH: FF_CEIL_RSHIFT(srcSliceH, c->chrDstVSubSample); const uint8_t *srcPtr = src[plane]; uint8_t *dstPtr = dst[plane] + dstStride[plane] * y; int shiftonly = plane == 1 || plane == 2 || (!c->srcRange && plane == 0); diff --git a/libswscale/utils.c b/libswscale/utils.c index 5406e19..ace3c0f 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -1019,11 +1019,11 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, (flags & SWS_FAST_BILINEAR))) c->chrSrcHSubSample = 1; - // Note the -((-x)>>y) is so that we always round toward +inf. - c->chrSrcW = -((-srcW) >> c->chrSrcHSubSample); - c->chrSrcH = -((-srcH) >> c->chrSrcVSubSample); - c->chrDstW = -((-dstW) >> c->chrDstHSubSample); - c->chrDstH = -((-dstH) >> c->chrDstVSubSample); + // Note the FF_CEIL_RSHIFT is so that we always round toward +inf. + c->chrSrcW = FF_CEIL_RSHIFT(srcW, c->chrSrcHSubSample); + c->chrSrcH = FF_CEIL_RSHIFT(srcH, c->chrSrcVSubSample); + c->chrDstW = FF_CEIL_RSHIFT(dstW, c->chrDstHSubSample); + c->chrDstH = FF_CEIL_RSHIFT(dstH, c->chrDstVSubSample); /* unscaled special cases */ if (unscaled && !usesHFilter && !usesVFilter && diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index 5d34c21..f0cedab 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -2397,7 +2397,7 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth= -((-width)>>1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { RENAME(extract_even)(src, ydst, width); @@ -2423,7 +2423,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth= -((-width)>>1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { RENAME(extract_even)(src, ydst, width); @@ -2447,7 +2447,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth= -((-width)>>1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { RENAME(extract_even)(src+1, ydst, width); @@ -2473,7 +2473,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co int lumStride, int chromStride, int srcStride) { int y; - const int chromWidth= -((-width)>>1); + const int chromWidth = FF_CEIL_RSHIFT(width, 1); for (y=0; y<height; y++) { RENAME(extract_even)(src+1, ydst, width); -- 2.6.4 _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
