A decoded 8-bit 4:2:0 frame need memory for up to 448 macroblocks
and is laid out in memory as follow:

+-------------------+
| Y-plane   256 MBs |
+-------------------+
| UV-plane  128 MBs |
+-------------------+
| MV buffer  64 MBs |
+-------------------+

The motion vector buffer offset is currently correct for 4:2:0 because
the extra space for motion vectors is overallocated with an extra 64 MBs.

Wrong offset for both destination and motion vector buffer are used
for the bottom field of field encoded content, wrong offset is
also used for 4:0:0 (monochrome) content.

Fix this by always setting the motion vector address to the expected
384 MBs offset for 4:2:0 and 256 MBs offset for 4:0:0 content.

Also use correct destination and motion vector buffer offset
for the bottom field of field encoded content.

While at it also extend the check for 4:0:0 (monochrome) to include an
additional check for High Profile (100).

Fixes: dea0a82f3d22 ("media: hantro: Add support for H264 decoding on G1")
Signed-off-by: Jonas Karlman <jo...@kwiboo.se>
---
 .../staging/media/hantro/hantro_g1_h264_dec.c | 33 +++++++++++--------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/staging/media/hantro/hantro_g1_h264_dec.c 
b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
index 7ab534936843..159bd67e0a36 100644
--- a/drivers/staging/media/hantro/hantro_g1_h264_dec.c
+++ b/drivers/staging/media/hantro/hantro_g1_h264_dec.c
@@ -19,6 +19,9 @@
 #include "hantro_hw.h"
 #include "hantro_v4l2.h"
 
+#define MV_OFFSET_420  384
+#define MV_OFFSET_400  256
+
 static void set_params(struct hantro_ctx *ctx)
 {
        const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
@@ -49,8 +52,8 @@ static void set_params(struct hantro_ctx *ctx)
        vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL0);
 
        /* Decoder control register 1. */
-       reg = G1_REG_DEC_CTRL1_PIC_MB_WIDTH(sps->pic_width_in_mbs_minus1 + 1) |
-             
G1_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(sps->pic_height_in_map_units_minus1 + 1) |
+       reg = G1_REG_DEC_CTRL1_PIC_MB_WIDTH(H264_MB_WIDTH(ctx->dst_fmt.width)) |
+             
G1_REG_DEC_CTRL1_PIC_MB_HEIGHT_P(H264_MB_HEIGHT(ctx->dst_fmt.height)) |
              G1_REG_DEC_CTRL1_REF_FRAMES(sps->max_num_ref_frames);
        vdpu_write_relaxed(vpu, reg, G1_REG_DEC_CTRL1);
 
@@ -79,7 +82,7 @@ static void set_params(struct hantro_ctx *ctx)
                reg |= G1_REG_DEC_CTRL4_CABAC_E;
        if (sps->flags & V4L2_H264_SPS_FLAG_DIRECT_8X8_INFERENCE)
                reg |= G1_REG_DEC_CTRL4_DIR_8X8_INFER_E;
-       if (sps->chroma_format_idc == 0)
+       if (sps->profile_idc >= 100 && sps->chroma_format_idc == 0)
                reg |= G1_REG_DEC_CTRL4_BLACKWHITE_E;
        if (pps->flags & V4L2_H264_PPS_FLAG_WEIGHTED_PRED)
                reg |= G1_REG_DEC_CTRL4_WEIGHT_PRED_E;
@@ -233,6 +236,7 @@ static void set_buffers(struct hantro_ctx *ctx)
        struct vb2_v4l2_buffer *src_buf, *dst_buf;
        struct hantro_dev *vpu = ctx->dev;
        dma_addr_t src_dma, dst_dma;
+       unsigned int offset = MV_OFFSET_420;
 
        src_buf = hantro_get_src_buf(ctx);
        dst_buf = hantro_get_dst_buf(ctx);
@@ -243,19 +247,20 @@ static void set_buffers(struct hantro_ctx *ctx)
 
        /* Destination (decoded frame) buffer. */
        dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
+       if (ctrls->slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
+               dst_dma += ALIGN(ctx->dst_fmt.width, H264_MB_DIM);
        vdpu_write_relaxed(vpu, dst_dma, G1_REG_ADDR_DST);
 
-       /* Higher profiles require DMV buffer appended to reference frames. */
-       if (ctrls->sps->profile_idc > 66) {
-               size_t pic_size = ctx->h264_dec.pic_size;
-               size_t mv_offset = round_up(pic_size, 8);
-
-               if (ctrls->slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
-                       mv_offset += 32 * H264_MB_WIDTH(ctx->dst_fmt.width);
-
-               vdpu_write_relaxed(vpu, dst_dma + mv_offset,
-                                  G1_REG_ADDR_DIR_MV);
-       }
+       /* Motion vector buffer is located after the decoded frame. */
+       dst_dma = vb2_dma_contig_plane_dma_addr(&dst_buf->vb2_buf, 0);
+       if (ctrls->sps->profile_idc >= 100 && ctrls->sps->chroma_format_idc == 
0)
+               offset = MV_OFFSET_400;
+       dst_dma += offset * H264_MB_WIDTH(ctx->dst_fmt.width) *
+                  H264_MB_HEIGHT(ctx->dst_fmt.height);
+       if (ctrls->slices[0].flags & V4L2_H264_SLICE_FLAG_BOTTOM_FIELD)
+               dst_dma += 32 * H264_MB_WIDTH(ctx->dst_fmt.width) *
+                          H264_MB_HEIGHT(ctx->dst_fmt.height);
+       vdpu_write_relaxed(vpu, dst_dma, G1_REG_ADDR_DIR_MV);
 
        /* Auxiliary buffer prepared in hantro_g1_h264_dec_prepare_table(). */
        vdpu_write_relaxed(vpu, ctx->h264_dec.priv.dma, G1_REG_ADDR_QTABLE);
-- 
2.17.1

Reply via email to