Optimize support for grayscale surfaces in two aspects: (i) space by only allocating the luma component ; (ii) speed by avoiding initialization of the (now inexistent) chroma planes.
Keep backward compatibility with older codec layers that only supported YUV 4:2:0 and not grayscale formats properly. Signed-off-by: Gwenole Beauchesne <gwenole.beauche...@intel.com> --- src/gen6_mfd.c | 21 +++++--------- src/gen75_mfd.c | 19 ++++--------- src/gen7_mfd.c | 19 ++++--------- src/gen8_mfd.c | 19 ++++--------- src/i965_decoder_utils.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++-- src/i965_decoder_utils.h | 8 ++++++ src/i965_drv_video.c | 43 +++++++++++++++++++++++++---- src/i965_drv_video.h | 12 ++++++++ 8 files changed, 151 insertions(+), 61 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 22d8a51..6ec2278 100755 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx, { struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; struct object_surface *obj_surface = decode_state->render_object; - + unsigned int surface_format; + + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 19) | ((obj_surface->orig_width - 1) << 6)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */ (0 << 22) | /* surface object control state, FIXME??? */ ((obj_surface->width - 1) << 3) | /* pitch */ @@ -842,18 +846,7 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx, obj_surface->flags |= SURFACE_REFERENCED; else obj_surface->flags &= ~SURFACE_REFERENCED; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); - - /* initial uv component for YUV400 case */ - if (pic_param->seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface->width * obj_surface->height; - unsigned int uv_size = obj_surface->width * obj_surface->height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface->bo); - memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface->bo); - } - + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo); diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c index cb85996..d2dbb69 100644 --- a/src/gen75_mfd.c +++ b/src/gen75_mfd.c @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state->render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; + unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface->y_cb_offset; y_cr_offset = obj_surface->y_cr_offset; + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 18) | ((obj_surface->orig_width - 1) << 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ (0 << 22) | /* surface object control state, ignored */ ((obj_surface->width - 1) << 3) | /* pitch */ @@ -1086,18 +1090,7 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx, obj_surface->flags |= SURFACE_REFERENCED; else obj_surface->flags &= ~SURFACE_REFERENCED; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); - - /* initial uv component for YUV400 case */ - if (pic_param->seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface->width * obj_surface->height; - unsigned int uv_size = obj_surface->width * obj_surface->height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface->bo); - memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface->bo); - } - + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index 9891cee..7f92142 100755 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state->render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; + unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface->y_cb_offset; y_cr_offset = obj_surface->y_cr_offset; + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 18) | ((obj_surface->orig_width - 1) << 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ (0 << 22) | /* surface object control state, ignored */ ((obj_surface->width - 1) << 3) | /* pitch */ @@ -760,18 +764,7 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx, obj_surface->flags |= SURFACE_REFERENCED; else obj_surface->flags &= ~SURFACE_REFERENCED; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); - - /* initial uv component for YUV400 case */ - if (pic_param->seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface->width * obj_surface->height; - unsigned int uv_size = obj_surface->width * obj_surface->height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface->bo); - memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface->bo); - } - + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c index c1f80e4..32731f5 100644 --- a/src/gen8_mfd.c +++ b/src/gen8_mfd.c @@ -145,12 +145,16 @@ gen8_mfd_surface_state(VADriverContextP ctx, struct object_surface *obj_surface = decode_state->render_object; unsigned int y_cb_offset; unsigned int y_cr_offset; + unsigned int surface_format; assert(obj_surface); y_cb_offset = obj_surface->y_cb_offset; y_cr_offset = obj_surface->y_cr_offset; + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ? + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8; + BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2)); OUT_BCS_BATCH(batch, 0); @@ -158,7 +162,7 @@ gen8_mfd_surface_state(VADriverContextP ctx, ((obj_surface->orig_height - 1) << 18) | ((obj_surface->orig_width - 1) << 4)); OUT_BCS_BATCH(batch, - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */ + (surface_format << 28) | /* 420 planar YUV surface */ ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */ (0 << 22) | /* surface object control state, ignored */ ((obj_surface->width - 1) << 3) | /* pitch */ @@ -847,18 +851,7 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx, obj_surface->flags |= SURFACE_REFERENCED; else obj_surface->flags &= ~SURFACE_REFERENCED; - i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420); - - /* initial uv component for YUV400 case */ - if (pic_param->seq_fields.bits.chroma_format_idc == 0) { - unsigned int uv_offset = obj_surface->width * obj_surface->height; - unsigned int uv_size = obj_surface->width * obj_surface->height / 2; - - drm_intel_gem_bo_map_gtt(obj_surface->bo); - memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); - drm_intel_gem_bo_unmap_gtt(obj_surface->bo); - } - + avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param); gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface); dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo); diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index 525efca..a36fbdb 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -174,6 +174,73 @@ mpeg2_set_reference_surfaces( } } +/* Ensure the supplied VA surface has valid storage for decoding the + current picture */ +VAStatus +avc_ensure_surface_bo( + VADriverContextP ctx, + struct decode_state *decode_state, + struct object_surface *obj_surface, + const VAPictureParameterBufferH264 *pic_param +) +{ + VAStatus va_status; + uint32_t hw_fourcc, fourcc, subsample, chroma_format; + + /* Validate chroma format */ + switch (pic_param->seq_fields.bits.chroma_format_idc) { + case 0: // Grayscale + fourcc = VA_FOURCC_Y800; + subsample = SUBSAMPLE_YUV400; + chroma_format = VA_RT_FORMAT_YUV400; + break; + case 1: // YUV 4:2:0 + fourcc = VA_FOURCC_NV12; + subsample = SUBSAMPLE_YUV420; + chroma_format = VA_RT_FORMAT_YUV420; + break; + default: + return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; + } + + /* Determine the HW surface format, bound to VA config needs */ + if ((decode_state->base.chroma_formats & chroma_format) == chroma_format) + hw_fourcc = fourcc; + else { + hw_fourcc = 0; + switch (fourcc) { + case VA_FOURCC_Y800: // Implement with an NV12 surface + if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) { + hw_fourcc = VA_FOURCC_NV12; + subsample = SUBSAMPLE_YUV420; + } + break; + } + } + if (!hw_fourcc) + return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT; + + /* (Re-)allocate the underlying surface buffer store, if necessary */ + if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) { + i965_destroy_surface_storage(obj_surface); + va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, + hw_fourcc, subsample); + if (va_status != VA_STATUS_SUCCESS) + return va_status; + } + + /* Fake chroma components if grayscale is implemented on top of NV12 */ + if (fourcc == VA_FOURCC_Y800 && hw_fourcc == VA_FOURCC_NV12) { + const uint32_t uv_offset = obj_surface->width * obj_surface->height; + const uint32_t uv_size = obj_surface->width * obj_surface->height / 2; + + drm_intel_gem_bo_map_gtt(obj_surface->bo); + memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size); + drm_intel_gem_bo_unmap_gtt(obj_surface->bo); + } + return VA_STATUS_SUCCESS; +} + /* Generate flat scaling matrices for H.264 decoding */ void avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix) @@ -561,8 +628,8 @@ intel_decoder_check_avc_parameter(VADriverContextP ctx, * sure the store buffer is allocated for this reference * frame */ - va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1, - VA_FOURCC_NV12, SUBSAMPLE_YUV420); + va_status = avc_ensure_surface_bo(ctx, decode_state, obj_surface, + pic_param); if (va_status != VA_STATUS_SUCCESS) return va_status; } diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h index b7b72b3..14a45fb 100644 --- a/src/i965_decoder_utils.h +++ b/src/i965_decoder_utils.h @@ -43,6 +43,14 @@ mpeg2_set_reference_surfaces( VAPictureParameterBufferMPEG2 *pic_param ); +VAStatus +avc_ensure_surface_bo( + VADriverContextP ctx, + struct decode_state *decode_state, + struct object_surface *obj_surface, + const VAPictureParameterBufferH264 *pic_param +); + void avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix); diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index 5e1adfc..7dd38c9 100755 --- a/src/i965_drv_video.c +++ b/src/i965_drv_video.c @@ -214,6 +214,10 @@ get_subpic_format(const VAImageFormat *va_format) return NULL; } +/* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */ +#define EXTRA_H264_DEC_CHROMA_FORMATS \ + (VA_RT_FORMAT_YUV400) + /* Extra set of chroma formats supported for JPEG decoding (beyond YUV 4:2:0) */ #define EXTRA_JPEG_DEC_CHROMA_FORMATS \ (VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444) @@ -257,6 +261,8 @@ static struct hw_codec_info gen6_hw_codec_info = { .max_width = 2048, .max_height = 2048, + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, + .has_mpeg2_decoding = 1, .has_h264_decoding = 1, .has_h264_encoding = 1, @@ -282,6 +288,7 @@ static struct hw_codec_info gen7_hw_codec_info = { .max_width = 4096, .max_height = 4096, + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, .has_mpeg2_decoding = 1, @@ -311,6 +318,7 @@ static struct hw_codec_info gen75_hw_codec_info = { .max_width = 4096, .max_height = 4096, + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, .has_mpeg2_decoding = 1, @@ -344,6 +352,7 @@ static struct hw_codec_info gen8_hw_codec_info = { .max_width = 4096, .max_height = 4096, + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS, .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS, .has_mpeg2_decoding = 1, @@ -602,6 +611,13 @@ i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile, uint32_t chroma_formats = VA_RT_FORMAT_YUV420; switch (profile) { + case VAProfileH264ConstrainedBaseline: + case VAProfileH264Main: + case VAProfileH264High: + if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) + chroma_formats |= i965->codec_info->h264_dec_chroma_formats; + break; + case VAProfileJPEGBaseline: if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD) chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats; @@ -817,10 +833,11 @@ VAStatus i965_QueryConfigAttributes(VADriverContextP ctx, return vaStatus; } -static void -i965_destroy_surface(struct object_heap *heap, struct object_base *obj) +void +i965_destroy_surface_storage(struct object_surface *obj_surface) { - struct object_surface *obj_surface = (struct object_surface *)obj; + if (!obj_surface) + return; dri_bo_unreference(obj_surface->bo); obj_surface->bo = NULL; @@ -829,7 +846,14 @@ i965_destroy_surface(struct object_heap *heap, struct object_base *obj) obj_surface->free_private_data(&obj_surface->private_data); obj_surface->private_data = NULL; } +} +static void +i965_destroy_surface(struct object_heap *heap, struct object_base *obj) +{ + struct object_surface *obj_surface = (struct object_surface *)obj; + + i965_destroy_surface_storage(obj_surface); object_heap_free(heap, obj); } @@ -1075,6 +1099,7 @@ bpp_1stplane_by_fourcc(unsigned int fourcc) case VA_FOURCC_YUY2: return 2; + case VA_FOURCC_Y800: case VA_FOURCC_YV12: case VA_FOURCC_IMC3: case VA_FOURCC_IYUV: @@ -1677,6 +1702,7 @@ i965_CreateContext(VADriverContextP ctx, struct i965_render_state *render_state = &i965->render_state; struct object_config *obj_config = CONFIG(config_id); struct object_context *obj_context = NULL; + VAConfigAttrib *attrib; VAStatus vaStatus = VA_STATUS_SUCCESS; int contextID; int i; @@ -1770,6 +1796,11 @@ i965_CreateContext(VADriverContextP ctx, } } + attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribRTFormat); + if (!attrib) + return VA_STATUS_ERROR_INVALID_CONFIG; + obj_context->codec_state.base.chroma_formats = attrib->value; + /* Error recovery */ if (VA_STATUS_SUCCESS != vaStatus) { i965_destroy_context(&i965->context_heap, (struct object_base *)obj_context); @@ -3083,10 +3114,10 @@ i965_check_alloc_surface_bo(VADriverContextP ctx, obj_surface->cb_cr_pitch = obj_surface->width; obj_surface->cb_cr_width = 0; obj_surface->cb_cr_height = 0; - obj_surface->y_cb_offset = obj_surface->height; - obj_surface->y_cr_offset = obj_surface->y_cb_offset + ALIGN(obj_surface->cb_cr_height, 32); + obj_surface->y_cb_offset = 0; + obj_surface->y_cr_offset = 0; region_width = obj_surface->width; - region_height = obj_surface->height + ALIGN(obj_surface->cb_cr_height, 32) * 2; + region_height = obj_surface->height; break; diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index c66712f..d902ddb 100644 --- a/src/i965_drv_video.h +++ b/src/i965_drv_video.h @@ -101,8 +101,13 @@ struct object_config #define NUM_SLICES 10 +struct codec_state_base { + uint32_t chroma_formats; +}; + struct decode_state { + struct codec_state_base base; struct buffer_store *pic_param; struct buffer_store **slice_params; struct buffer_store *iq_matrix; @@ -122,6 +127,7 @@ struct decode_state struct encode_state { + struct codec_state_base base; struct buffer_store *seq_param; struct buffer_store *pic_param; struct buffer_store *pic_control; @@ -152,6 +158,7 @@ struct encode_state struct proc_state { + struct codec_state_base base; struct buffer_store *pipeline_param; VASurfaceID current_render_target; @@ -163,6 +170,7 @@ struct proc_state union codec_state { + struct codec_state_base base; struct decode_state decode; struct encode_state encode; struct proc_state proc; @@ -285,6 +293,7 @@ struct hw_codec_info int max_width; int max_height; + unsigned int h264_dec_chroma_formats; unsigned int jpeg_dec_chroma_formats; unsigned int has_mpeg2_decoding:1; @@ -421,4 +430,7 @@ extern VAStatus i965_DestroySurfaces(VADriverContextP ctx, #define I965_SURFACE_MEM_GEM_FLINK 1 #define I965_SURFACE_MEM_DRM_PRIME 2 +void +i965_destroy_surface_storage(struct object_surface *obj_surface); + #endif /* _I965_DRV_VIDEO_H_ */ -- 1.9.1 _______________________________________________ Libva mailing list Libva@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/libva