The stride is sometimes way bigger than actually needed. ptrdiff_t is
the actual type used. Aligning to 8 is needed as the lines must have
aligned addresses for SIMD.

-- 
Christophe
From 126adf820bc54c2d00f794629595ad6310fbfc37 Mon Sep 17 00:00:00 2001
From: Christophe Gisquet <christophe.gisq...@gmail.com>
Date: Sat, 26 Jul 2014 17:17:18 +0200
Subject: [PATCH 12/13] hevc_mc: reduce stride for bidir temp buffers

It is unconditionally set to 64, which is quite higher than the
actual block size.
---
 libavcodec/hevc.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c
index a775f26..0f87b33 100644
--- a/libavcodec/hevc.c
+++ b/libavcodec/hevc.c
@@ -1357,6 +1357,7 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
 {
     HEVCLocalContext *lc = s->HEVClc;
     DECLARE_ALIGNED(16, int16_t,  tmp[MAX_PB_SIZE * MAX_PB_SIZE]);
+    ptrdiff_t tmpstride  = FFALIGN(block_w, 8);
     ptrdiff_t src0stride  = ref0->linesize[0];
     ptrdiff_t src1stride  = ref1->linesize[0];
     int pic_width        = s->sps->width;
@@ -1410,13 +1411,13 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride,
         src1stride = edge_emu_stride;
     }
 
-    s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](tmp, MAX_PB_SIZE, src0, src0stride,
+    s->hevcdsp.put_hevc_qpel[idx][!!my0][!!mx0](tmp, tmpstride, src0, src0stride,
                                                 block_h, mx0, my0, block_w);
     if (!weight_flag)
-        s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
+        s->hevcdsp.put_hevc_qpel_bi[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, tmpstride,
                                                        block_h, mx1, my1, block_w);
     else
-        s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, MAX_PB_SIZE,
+        s->hevcdsp.put_hevc_qpel_bi_w[idx][!!my1][!!mx1](dst, dststride, src1, src1stride, tmp, tmpstride,
                                                          block_h, s->sh.luma_log2_weight_denom,
                                                          s->sh.luma_weight_l0[current_mv->ref_idx[0]],
                                                          s->sh.luma_weight_l1[current_mv->ref_idx[1]],
@@ -1512,7 +1513,7 @@ static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVF
                          int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx)
 {
     DECLARE_ALIGNED(16, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
-    int tmpstride = MAX_PB_SIZE;
+    ptrdiff_t tmpstride  = FFALIGN(block_w, 8);
     HEVCLocalContext *lc = s->HEVClc;
     uint8_t *src1        = ref0->data[cidx+1];
     uint8_t *src2        = ref1->data[cidx+1];
-- 
1.9.2.msysgit.0

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to