This simplifies and fixes the scan for emulation_prevention_bytes, thus avoiding a read beyond the end of the slice data buffer. Besides, this also uses dri_bo_get_subddata() to read slice data buffer back.
HW specific changes: - SNB: make the HW skip the emulation prevention bytes itself. - IVB: fix MFD_AVC_BSD_OBJECT to report the actual slice data buffer size. Note: this assumes VASliceParameterBufferH264.slice_data_bit_offset represents the offset relative to the raw bitstream with emulation prevention bytes. Should this count be minus emulation prevention bytes, then avc_get_first_mb_bit_offset() is swapped with the _epb() variant with '+' EPB count instead of '-'. It might be possible to make SNB & IVB work without EPB scan, by using the other bit modes? Signed-off-by: Gwenole Beauchesne <[email protected]> --- src/gen6_mfd.c | 43 ++++++++-------------------------- src/gen7_mfd.c | 37 +++++----------------------- src/i965_avc_bsd.c | 58 ++++++++++----------------------------------- src/i965_decoder_utils.c | 53 ++++++++++++++++++++++++++++++++++++++++++ src/i965_decoder_utils.h | 14 +++++++++++ 5 files changed, 97 insertions(+), 108 deletions(-) diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c index 384b70e..85d9a8c 100644 --- a/src/gen6_mfd.c +++ b/src/gen6_mfd.c @@ -847,27 +847,6 @@ gen6_mfd_avc_weightoffset_state(VADriverContextP ctx, } } -static int -gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset) -{ - int out_slice_data_bit_offset; - int slice_header_size = in_slice_data_bit_offset / 8; - int i, j; - - for (i = 0, j = 0; i < slice_header_size; i++, j++) { - if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) { - i++, j += 2; - } - } - - out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8; - - if (mode_flag == ENTROPY_CABAC) - out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8); - - return out_slice_data_bit_offset; -} - static void gen6_mfd_avc_bsd_object(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, @@ -876,21 +855,19 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx, struct gen6_mfd_context *gen6_mfd_context) { struct intel_batchbuffer *batch = gen6_mfd_context->base.batch; - int slice_data_bit_offset; - uint8_t *slice_data = NULL; + unsigned int slice_data_bit_offset; - dri_bo_map(slice_data_bo, 0); - slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset); - slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data, - pic_param->pic_fields.bits.entropy_coding_mode_flag, - slice_param->slice_data_bit_offset); - dri_bo_unmap(slice_data_bo); + slice_data_bit_offset = avc_get_first_mb_bit_offset( + slice_data_bo, + slice_param, + pic_param->pic_fields.bits.entropy_coding_mode_flag + ); BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2)); OUT_BCS_BATCH(batch, - ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0)); - OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_data_bit_offset >> 3)); + (slice_param->slice_data_size - slice_param->slice_data_offset)); + OUT_BCS_BATCH(batch, slice_param->slice_data_offset); OUT_BCS_BATCH(batch, (0 << 31) | (0 << 14) | @@ -898,8 +875,8 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx, (0 << 10) | (0 << 8)); OUT_BCS_BATCH(batch, - (0 << 16) | - (0 << 6) | + ((slice_data_bit_offset >> 3) << 16) | + (1 << 6) | ((0x7 - (slice_data_bit_offset & 0x7)) << 0)); OUT_BCS_BATCH(batch, 0); ADVANCE_BCS_BATCH(batch); diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c index 7b89e1c..f9bf09b 100644 --- a/src/gen7_mfd.c +++ b/src/gen7_mfd.c @@ -817,27 +817,6 @@ gen7_mfd_avc_weightoffset_state(VADriverContextP ctx, } } -static int -gen7_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset) -{ - int out_slice_data_bit_offset; - int slice_header_size = in_slice_data_bit_offset / 8; - int i, j; - - for (i = 0, j = 0; i < slice_header_size; i++, j++) { - if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) { - i++, j += 2; - } - } - - out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8; - - if (mode_flag == ENTROPY_CABAC) - out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8); - - return out_slice_data_bit_offset; -} - static void gen7_mfd_avc_bsd_object(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param, @@ -847,21 +826,19 @@ gen7_mfd_avc_bsd_object(VADriverContextP ctx, struct gen7_mfd_context *gen7_mfd_context) { struct intel_batchbuffer *batch = gen7_mfd_context->base.batch; - int slice_data_bit_offset; - uint8_t *slice_data = NULL; + unsigned int slice_data_bit_offset; - dri_bo_map(slice_data_bo, 0); - slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset); - slice_data_bit_offset = gen7_mfd_avc_get_slice_bit_offset(slice_data, - pic_param->pic_fields.bits.entropy_coding_mode_flag, - slice_param->slice_data_bit_offset); - dri_bo_unmap(slice_data_bo); + slice_data_bit_offset = avc_get_first_mb_bit_offset( + slice_data_bo, + slice_param, + pic_param->pic_fields.bits.entropy_coding_mode_flag + ); /* the input bitsteam format on GEN7 differs from GEN6 */ BEGIN_BCS_BATCH(batch, 6); OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2)); OUT_BCS_BATCH(batch, - (slice_param->slice_data_size)); + (slice_param->slice_data_size - slice_param->slice_data_offset)); OUT_BCS_BATCH(batch, slice_param->slice_data_offset); OUT_BCS_BATCH(batch, (0 << 31) | diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c index 612d0ee..b2b6c92 100644 --- a/src/i965_avc_bsd.c +++ b/src/i965_avc_bsd.c @@ -534,35 +534,6 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx, ADVANCE_BCS_BATCH(batch); } -/* - * Return the bit offset to the first bit of the slice data - * - * VASliceParameterBufferH264.slice_data_bit_offset will point into the part - * of slice header if there are some escaped bytes in the slice header. The offset - * to slice data is needed for BSD unit so that BSD unit can fetch right slice data - * for processing. This fixes conformance case BASQP1_Sony_C.jsv - */ -static int -i965_avc_bsd_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset) -{ - int out_slice_data_bit_offset; - int slice_header_size = in_slice_data_bit_offset / 8; - int i, j; - - for (i = 0, j = 0; i < slice_header_size; i++, j++) { - if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) { - i++, j += 2; - } - } - - out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8; - - if (mode_flag == ENTROPY_CABAC) - out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8); - - return out_slice_data_bit_offset; -} - static void g4x_avc_bsd_object(VADriverContextP ctx, struct decode_state *decode_state, @@ -581,11 +552,10 @@ g4x_avc_bsd_object(VADriverContextP ctx, int num_ref_idx_l0, num_ref_idx_l1; int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag && pic_param->seq_fields.bits.mb_adaptive_frame_field_flag); - int slice_data_bit_offset; + unsigned int slice_data_bit_offset; int weighted_pred_idc = 0; int first_mb_in_slice = 0; int slice_type; - uint8_t *slice_data = NULL; encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */ @@ -595,12 +565,12 @@ g4x_avc_bsd_object(VADriverContextP ctx, } else cmd_len = 8; - dri_bo_map(decode_state->slice_datas[slice_index]->bo, 0); - slice_data = (uint8_t *)(decode_state->slice_datas[slice_index]->bo->virtual + slice_param->slice_data_offset); - slice_data_bit_offset = i965_avc_bsd_get_slice_bit_offset(slice_data, - pic_param->pic_fields.bits.entropy_coding_mode_flag, - slice_param->slice_data_bit_offset); - dri_bo_unmap(decode_state->slice_datas[slice_index]->bo); + + slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb( + decode_state->slice_datas[slice_index]->bo, + slice_param, + pic_param->pic_fields.bits.entropy_coding_mode_flag + ); if (slice_param->slice_type == SLICE_TYPE_I || slice_param->slice_type == SLICE_TYPE_SI) @@ -710,11 +680,10 @@ ironlake_avc_bsd_object(VADriverContextP ctx, int num_ref_idx_l0, num_ref_idx_l1; int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag && pic_param->seq_fields.bits.mb_adaptive_frame_field_flag); - int slice_data_bit_offset; + unsigned int slice_data_bit_offset; int weighted_pred_idc = 0; int first_mb_in_slice; int slice_type; - uint8_t *slice_data = NULL; encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */ @@ -723,12 +692,11 @@ ironlake_avc_bsd_object(VADriverContextP ctx, } else counter_value = 0; - dri_bo_map(decode_state->slice_datas[slice_index]->bo, 0); - slice_data = (uint8_t *)(decode_state->slice_datas[slice_index]->bo->virtual + slice_param->slice_data_offset); - slice_data_bit_offset = i965_avc_bsd_get_slice_bit_offset(slice_data, - pic_param->pic_fields.bits.entropy_coding_mode_flag, - slice_param->slice_data_bit_offset); - dri_bo_unmap(decode_state->slice_datas[slice_index]->bo); + slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb( + decode_state->slice_datas[slice_index]->bo, + slice_param, + pic_param->pic_fields.bits.entropy_coding_mode_flag + ); if (slice_param->slice_type == SLICE_TYPE_I || slice_param->slice_type == SLICE_TYPE_SI) diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c index d4400c5..6fff67f 100644 --- a/src/i965_decoder_utils.c +++ b/src/i965_decoder_utils.c @@ -24,6 +24,7 @@ #include <assert.h> #include <stddef.h> #include <string.h> +#include <alloca.h> #include "intel_batchbuffer.h" #include "i965_decoder_utils.h" #include "i965_defines.h" @@ -39,6 +40,58 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix) memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8)); } +/* Get first macroblock bit offset for BSD (AVC) */ +unsigned int +avc_get_first_mb_bit_offset( + dri_bo *slice_data_bo, + VASliceParameterBufferH264 *slice_param, + unsigned int mode_flag +) +{ + unsigned int in_slice_data_bit_offset = slice_param->slice_data_bit_offset; + unsigned int out_slice_data_bit_offset; + unsigned int i, n, buf_size, data_size; + uint8_t *buf; + int ret; + + buf_size = slice_param->slice_data_bit_offset / 8; + data_size = slice_param->slice_data_size - slice_param->slice_data_offset; + if (buf_size > data_size) + buf_size = data_size; + + buf = alloca(buf_size); + ret = dri_bo_get_subdata( + slice_data_bo, slice_param->slice_data_offset, + buf_size, buf + ); + assert(ret == 0); + + for (i = 2, n = 0; i < buf_size; i++) { + if (!buf[i - 2] && !buf[i - 1] && buf[i] == 3) + i += 2, n++; + } + out_slice_data_bit_offset = in_slice_data_bit_offset - n * 8; + + if (mode_flag == ENTROPY_CABAC) + out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8); + return out_slice_data_bit_offset; +} + +/* Get first macroblock bit offset for BSD, with emulation prevention bytes (AVC) */ +unsigned int +avc_get_first_mb_bit_offset_with_epb( + dri_bo *slice_data_bo, + VASliceParameterBufferH264 *slice_param, + unsigned int mode_flag +) +{ + unsigned int slice_data_bit_offset = slice_param->slice_data_bit_offset; + + if (mode_flag == ENTROPY_CABAC) + slice_data_bit_offset = ALIGN(slice_data_bit_offset, 0x8); + return slice_data_bit_offset; +} + static inline uint8_t get_ref_idx_state_1(const VAPictureH264 *va_pic, unsigned int frame_store_id) { diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h index bf9be84..37402b4 100644 --- a/src/i965_decoder_utils.h +++ b/src/i965_decoder_utils.h @@ -30,6 +30,20 @@ void avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix); +unsigned int +avc_get_first_mb_bit_offset( + dri_bo *slice_data_bo, + VASliceParameterBufferH264 *slice_param, + unsigned int mode_flag +); + +unsigned int +avc_get_first_mb_bit_offset_with_epb( + dri_bo *slice_data_bo, + VASliceParameterBufferH264 *slice_param, + unsigned int mode_flag +); + void gen5_fill_avc_ref_idx_state( uint8_t state[32], -- 1.7.0.4 _______________________________________________ Libva mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libva
