The stride is sometimes way bigger than actually needed. ptrdiff_t is the actual type used. Aligning to 8 is needed as the lines must have aligned addresses for SIMD.
-- Christophe
From 126adf820bc54c2d00f794629595ad6310fbfc37 Mon Sep 17 00:00:00 2001 From: Christophe Gisquet <christophe.gisq...@gmail.com> Date: Sat, 26 Jul 2014 17:17:18 +0200 Subject: [PATCH 12/13] hevc_mc: reduce stride for bidir temp buffers It is unconditionally set to 64, which is quite higher than the actual block size. --- libavcodec/hevc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c index a775f26..0f87b33 100644 --- a/libavcodec/hevc.c +++ b/libavcodec/hevc.c @@ -1357,6 +1357,7 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, { HEVCLocalContext *lc = s->HEVClc; DECLARE_ALIGNED(16, int16_t, tmp[MAX_PB_SIZE * MAX_PB_SIZE]); + ptrdiff_t tmpstride = FFALIGN(block_w, 8); ptrdiff_t src0stride = ref0->linesize[0]; ptrdiff_t src1stride = ref1->linesize[0]; int pic_width = s->sps->width; @@ -1410,13 +1411,13 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, src1stride = edge_emu_stride; } - s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](tmp, MAX_PB_SIZE, src0, src0stride, + s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](tmp, tmpstride, src0, src0stride, block_h, mx0, my0, block_w); if (!weight_flag) - s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE, + s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, tmpstride, block_h, mx1, my1, block_w); else - s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE, + s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, tmpstride, block_h, s->sh.luma_log2_weight_denom, s->sh.luma_weight_l0[current_mv->ref_idx[0]], s->sh.luma_weight_l1[current_mv->ref_idx[1]], @@ -1512,7 +1513,7 @@ static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVF int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx) { DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]); - int tmpstride = MAX_PB_SIZE; + ptrdiff_t tmpstride = FFALIGN(block_w, 8); HEVCLocalContext *lc = s->HEVClc; uint8_t *src1 = ref0->data[cidx+1]; uint8_t *src2 = ref1->data[cidx+1]; -- 1.9.2.msysgit.0
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel