Sure, I will put src/i965_encoder.c into an independent patch. -----Original Message----- From: Zhao, Yakui Sent: Wednesday, January 7, 2015 9:16 AM To: Qu, Pengfei Cc: [email protected] Subject: Re: [Libva] [PATCH V2 5/6] HEVC ENC:Added VME pipeline
On Tue, 2015-01-06 at 01:57 -0700, Qu,Pengfei wrote: It seems that this patch also mixes the VME pipeline setting up and initialization of intel_enc_hw_context_init together. Can it be split into two patches? > Signed-off-by: Qu,Pengfei <[email protected]> > --- > src/gen6_mfc_common.c | 222 +++++++++++++++++++++++ > src/gen6_vme.h | 19 ++ > src/gen9_vme.c | 484 > ++++++++++++++++++++++++++++++++++++++++++++++++++ > src/i965_encoder.c | 74 +++++++- > 4 files changed, 798 insertions(+), 1 deletion(-) > > diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index > fe41dac..6a5f720 100644 > --- a/src/gen6_mfc_common.c > +++ b/src/gen6_mfc_common.c > @@ -1652,3 +1652,225 @@ void > intel_avc_slice_insert_packed_data(VADriverContextP ctx, > return; > } > > +/* HEVC */ > +static int > +hevc_temporal_find_surface(VAPictureHEVC *curr_pic, > + VAPictureHEVC *ref_list, > + int num_pictures, > + int dir) > +{ > + int i, found = -1, min = 0x7FFFFFFF; > + > + for (i = 0; i < num_pictures; i++) { > + int tmp; > + > + if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) || > + (ref_list[i].picture_id == VA_INVALID_SURFACE)) > + break; > + > + tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt; > + > + if (dir) > + tmp = -tmp; > + > + if (tmp > 0 && tmp < min) { > + min = tmp; > + found = i; > + } > + } > + > + return found; > +} > +void > +intel_hevc_vme_reference_state(VADriverContextP ctx, > + struct encode_state *encode_state, > + struct intel_encoder_context *encoder_context, > + int list_index, > + int surface_index, > + void (* vme_source_surface_state)( > + VADriverContextP ctx, > + int index, > + struct object_surface *obj_surface, > + struct intel_encoder_context > +*encoder_context)) { > + struct gen6_vme_context *vme_context = encoder_context->vme_context; > + struct object_surface *obj_surface = NULL; > + struct i965_driver_data *i965 = i965_driver_data(ctx); > + VASurfaceID ref_surface_id; > + VAEncPictureParameterBufferHEVC *pic_param = > (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer; > + VAEncSliceParameterBufferHEVC *slice_param = > (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer; > + int max_num_references; > + VAPictureHEVC *curr_pic; > + VAPictureHEVC *ref_list; > + int ref_idx; > + > + if (list_index == 0) { > + max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 > + 1; > + ref_list = slice_param->ref_pic_list0; > + } else { > + max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 > + 1; > + ref_list = slice_param->ref_pic_list1; > + } > + > + if (max_num_references == 1) { > + if (list_index == 0) { > + ref_surface_id = slice_param->ref_pic_list0[0].picture_id; > + vme_context->used_references[0] = &slice_param->ref_pic_list0[0]; > + } else { > + ref_surface_id = slice_param->ref_pic_list1[0].picture_id; > + vme_context->used_references[1] = &slice_param->ref_pic_list1[0]; > + } > + > + if (ref_surface_id != VA_INVALID_SURFACE) > + obj_surface = SURFACE(ref_surface_id); > + > + if (!obj_surface || > + !obj_surface->bo) { > + obj_surface = encode_state->reference_objects[list_index]; > + vme_context->used_references[list_index] = > &pic_param->reference_frames[list_index]; > + } > + > + ref_idx = 0; > + } else { > + curr_pic = &pic_param->decoded_curr_pic; > + > + /* select the reference frame in temporal space */ > + ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, > max_num_references, list_index == 1); > + ref_surface_id = ref_list[ref_idx].picture_id; > + > + if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later > */ > + obj_surface = SURFACE(ref_surface_id); > + > + vme_context->used_reference_objects[list_index] = obj_surface; > + vme_context->used_references[list_index] = &ref_list[ref_idx]; > + } > + > + if (obj_surface && > + obj_surface->bo) { > + assert(ref_idx >= 0); > + vme_context->used_reference_objects[list_index] = obj_surface; > + vme_source_surface_state(ctx, surface_index, obj_surface, > encoder_context); > + vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 | > + ref_idx << 16 | > + ref_idx << 8 | > + ref_idx); > + } else { > + vme_context->used_reference_objects[list_index] = NULL; > + vme_context->used_references[list_index] = NULL; > + vme_context->ref_index_in_mb[list_index] = 0; > + } > +} > + > +void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx, > + struct encode_state *encode_state, > + struct intel_encoder_context > +*encoder_context) { > + //struct gen6_mfc_context *mfc_context = encoder_context->mfc_context; > + struct gen6_vme_context *vme_context = encoder_context->vme_context; > + VAEncPictureParameterBufferHEVC *pic_param = > (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer; > + VAEncSliceParameterBufferHEVC *slice_param = > (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer; > + int qp, m_cost, j, mv_count; > + uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message); > + float lambda, m_costf; > + > + /* here no SI SP slice for HEVC, do not need slice fixup */ > + int slice_type = slice_param->slice_type; > + > + > + /* to do for CBR*/ > + //if (encoder_context->rate_control_mode == VA_RC_CQP) > + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; > + //else > + //qp = > + mfc_context->bit_rate_control_context[slice_type].QpPrimeY; > + > + if (vme_state_message == NULL) > + return; > + > + assert(qp <= QP_MAX); > + lambda = intel_lambda_qp(qp); > + if (slice_type == SLICE_TYPE_I) { > + vme_state_message[MODE_INTRA_16X16] = 0; > + m_cost = lambda * 4; > + vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, > 0x8f); > + m_cost = lambda * 16; > + vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, > 0x8f); > + m_cost = lambda * 3; > + vme_state_message[MODE_INTRA_NONPRED] = > intel_format_lutvalue(m_cost, 0x6f); > + } else { > + m_cost = 0; > + vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, > 0x6f); > + for (j = 1; j < 3; j++) { > + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; > + m_cost = (int)m_costf; > + vme_state_message[MODE_INTER_MV0 + j] = > intel_format_lutvalue(m_cost, 0x6f); > + } > + mv_count = 3; > + for (j = 4; j <= 64; j *= 2) { > + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda; > + m_cost = (int)m_costf; > + vme_state_message[MODE_INTER_MV0 + mv_count] = > intel_format_lutvalue(m_cost, 0x6f); > + mv_count++; > + } > + > + if (qp <= 25) { > + vme_state_message[MODE_INTRA_16X16] = 0x4a; > + vme_state_message[MODE_INTRA_8X8] = 0x4a; > + vme_state_message[MODE_INTRA_4X4] = 0x4a; > + vme_state_message[MODE_INTRA_NONPRED] = 0x4a; > + vme_state_message[MODE_INTER_16X16] = 0x4a; > + vme_state_message[MODE_INTER_16X8] = 0x4a; > + vme_state_message[MODE_INTER_8X8] = 0x4a; > + vme_state_message[MODE_INTER_8X4] = 0x4a; > + vme_state_message[MODE_INTER_4X4] = 0x4a; > + vme_state_message[MODE_INTER_BWD] = 0x2a; > + return; > + } > + m_costf = lambda * 10; > + vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, > 0x8f); > + m_cost = lambda * 14; > + vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, > 0x8f); > + m_cost = lambda * 24; > + vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, > 0x8f); > + m_costf = lambda * 3.5; > + m_cost = m_costf; > + vme_state_message[MODE_INTRA_NONPRED] = > intel_format_lutvalue(m_cost, 0x6f); > + if (slice_type == SLICE_TYPE_P) { > + m_costf = lambda * 2.5; > + m_cost = m_costf; > + vme_state_message[MODE_INTER_16X16] = > intel_format_lutvalue(m_cost, 0x8f); > + m_costf = lambda * 4; > + m_cost = m_costf; > + vme_state_message[MODE_INTER_16X8] = > intel_format_lutvalue(m_cost, 0x8f); > + m_costf = lambda * 1.5; > + m_cost = m_costf; > + vme_state_message[MODE_INTER_8X8] = > intel_format_lutvalue(m_cost, 0x6f); > + m_costf = lambda * 3; > + m_cost = m_costf; > + vme_state_message[MODE_INTER_8X4] = > intel_format_lutvalue(m_cost, 0x6f); > + m_costf = lambda * 5; > + m_cost = m_costf; > + vme_state_message[MODE_INTER_4X4] = > intel_format_lutvalue(m_cost, 0x6f); > + /* BWD is not used in P-frame */ > + vme_state_message[MODE_INTER_BWD] = 0; > + } else { > + m_costf = lambda * 2.5; > + m_cost = m_costf; > + vme_state_message[MODE_INTER_16X16] = > intel_format_lutvalue(m_cost, 0x8f); > + m_costf = lambda * 5.5; > + m_cost = m_costf; > + vme_state_message[MODE_INTER_16X8] = > intel_format_lutvalue(m_cost, 0x8f); > + m_costf = lambda * 3.5; > + m_cost = m_costf; > + vme_state_message[MODE_INTER_8X8] = > intel_format_lutvalue(m_cost, 0x6f); > + m_costf = lambda * 5.0; > + m_cost = m_costf; > + vme_state_message[MODE_INTER_8X4] = > intel_format_lutvalue(m_cost, 0x6f); > + m_costf = lambda * 6.5; > + m_cost = m_costf; > + vme_state_message[MODE_INTER_4X4] = > intel_format_lutvalue(m_cost, 0x6f); > + m_costf = lambda * 1.5; > + m_cost = m_costf; > + vme_state_message[MODE_INTER_BWD] = > intel_format_lutvalue(m_cost, 0x6f); > + } > + } > +} > diff --git a/src/gen6_vme.h b/src/gen6_vme.h index bc62c14..c9d6b48 > 100644 > --- a/src/gen6_vme.h > +++ b/src/gen6_vme.h > @@ -83,6 +83,7 @@ struct gen6_vme_context > unsigned long > surface_state_offset); > void *vme_state_message; > unsigned int h264_level; > + unsigned int hevc_level; > unsigned int video_coding_type; > unsigned int vme_kernel_sum; > unsigned int mpeg2_level; > @@ -174,6 +175,24 @@ intel_avc_vme_reference_state(VADriverContextP ctx, > struct object_surface *obj_surface, > struct intel_encoder_context > *encoder_context)); > > +/* HEVC */ > +void > +intel_hevc_vme_reference_state(VADriverContextP ctx, > + struct encode_state *encode_state, > + struct intel_encoder_context *encoder_context, > + int list_index, > + int surface_index, > + void (* vme_source_surface_state)( > + VADriverContextP ctx, > + int index, > + struct object_surface *obj_surface, > + struct intel_encoder_context > +*encoder_context)); > + > +void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx, > + struct encode_state *encode_state, > + struct intel_encoder_context > +*encoder_context); > + > + > extern Bool gen8_vme_context_init(VADriverContextP ctx, struct > intel_encoder_context *encoder_context); > > extern Bool gen9_vme_context_init(VADriverContextP ctx, struct > intel_encoder_context *encoder_context); diff --git a/src/gen9_vme.c > b/src/gen9_vme.c index b4310f2..0e94581 100644 > --- a/src/gen9_vme.c > +++ b/src/gen9_vme.c > @@ -120,6 +120,43 @@ static struct i965_kernel gen9_vme_mpeg2_kernels[] = { > }, > }; > > +/* HEVC */ > + > +static const uint32_t gen9_vme_hevc_intra_frame[][4] = { #include > +"shaders/vme/intra_frame_gen9.g9b" > +}; > + > +static const uint32_t gen9_vme_hevc_inter_frame[][4] = { #include > +"shaders/vme/inter_frame_gen9.g9b" > +}; > + > +static const uint32_t gen9_vme_hevc_inter_bframe[][4] = { #include > +"shaders/vme/inter_bframe_gen9.g9b" > +}; > + > +static struct i965_kernel gen9_vme_hevc_kernels[] = { > + { > + "VME Intra Frame", > + VME_INTRA_SHADER, /*index*/ > + gen9_vme_hevc_intra_frame, > + sizeof(gen9_vme_hevc_intra_frame), > + NULL > + }, > + { > + "VME inter Frame", > + VME_INTER_SHADER, > + gen9_vme_hevc_inter_frame, > + sizeof(gen9_vme_hevc_inter_frame), > + NULL > + }, > + { > + "VME inter BFrame", > + VME_BINTER_SHADER, > + gen9_vme_hevc_inter_bframe, > + sizeof(gen9_vme_hevc_inter_bframe), > + NULL > + } > +}; > /* only used for VME source surface state */ static void > gen9_vme_source_surface_state(VADriverContextP ctx, @@ -330,6 +367,13 > @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx, > } > } else if (encoder_context->codec == CODEC_MPEG2) { > mv_num = 2; > + }else if (encoder_context->codec == CODEC_HEVC) { > + if (vme_context->hevc_level >= 30*3) { > + mv_num = 16; > + > + if (vme_context->hevc_level >= 31*3) > + mv_num = 8; > + }/* use the avc level setting */ > } > > vme_state_message[31] = mv_num; > @@ -1130,6 +1174,440 @@ gen9_vme_mpeg2_pipeline(VADriverContextP ctx, > return VA_STATUS_SUCCESS; > } > > +/* HEVC */ > + > +static void > +gen9_vme_hevc_output_buffer_setup(VADriverContextP ctx, > + struct encode_state *encode_state, > + int index, > + struct intel_encoder_context > +*encoder_context) > + > +{ > + struct i965_driver_data *i965 = i965_driver_data(ctx); > + struct gen6_vme_context *vme_context = encoder_context->vme_context; > + VAEncSequenceParameterBufferHEVC *pSequenceParameter = > (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer; > + VAEncSliceParameterBufferHEVC *pSliceParameter = > (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer; > + int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I; > + int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + > 15)/16; > + int height_in_mbs = > +(pSequenceParameter->pic_height_in_luma_samples + 15)/16; > + > + > + vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs; > + vme_context->vme_output.pitch = 16; /* in bytes, always 16 */ > + > + if (is_intra) > + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2; > + else > + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24; > + /* > + * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME > Ref > + * + 16 FBR Info + 128 FBR MV + 32 FBR Ref. > + * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24. > + */ > + > + vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr, > + "VME output buffer", > + > vme_context->vme_output.num_blocks * vme_context->vme_output.size_block, > + 0x1000); > + assert(vme_context->vme_output.bo); > + vme_context->vme_buffer_suface_setup(ctx, > + &vme_context->gpe_context, > + &vme_context->vme_output, > + BINDING_TABLE_OFFSET(index), > + > +SURFACE_STATE_OFFSET(index)); } > + > +static void > +gen9_vme_hevc_output_vme_batchbuffer_setup(VADriverContextP ctx, > + struct encode_state *encode_state, > + int index, > + struct intel_encoder_context > +*encoder_context) > + > +{ > + struct i965_driver_data *i965 = i965_driver_data(ctx); > + struct gen6_vme_context *vme_context = encoder_context->vme_context; > + VAEncSequenceParameterBufferHEVC *pSequenceParameter = > (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer; > + int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + > 15)/16; > + int height_in_mbs = > +(pSequenceParameter->pic_height_in_luma_samples + 15)/16; > + > + vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + > 1; > + vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */ > + vme_context->vme_batchbuffer.pitch = 16; > + vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, > + "VME batchbuffer", > + > vme_context->vme_batchbuffer.num_blocks * > vme_context->vme_batchbuffer.size_block, > + 0x1000); } static > +VAStatus gen9_vme_hevc_surface_setup(VADriverContextP ctx, > + struct encode_state *encode_state, > + int is_intra, > + struct intel_encoder_context *encoder_context) > +{ > + struct object_surface *obj_surface; > + > + /*Setup surfaces state*/ > + /* current picture for encoding */ > + obj_surface = encode_state->input_yuv_object; > + gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context); > + gen9_vme_media_source_surface_state(ctx, 4, obj_surface, > encoder_context); > + gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, > + encoder_context); > + > + if (!is_intra) { > + VAEncSliceParameterBufferHEVC *slice_param = > (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer; > + int slice_type; > + > + slice_type = slice_param->slice_type; > + assert(slice_type != SLICE_TYPE_I && slice_type != > + SLICE_TYPE_SI); > + > + /* to do HEVC */ > + intel_hevc_vme_reference_state(ctx, encode_state, > + encoder_context, 0, 1, gen9_vme_source_surface_state); > + > + if (slice_type == SLICE_TYPE_B) > + intel_hevc_vme_reference_state(ctx, encode_state, > encoder_context, 1, 2, gen9_vme_source_surface_state); > + } > + > + /* VME output */ > + gen9_vme_hevc_output_buffer_setup(ctx, encode_state, 3, encoder_context); > + gen9_vme_hevc_output_vme_batchbuffer_setup(ctx, encode_state, 5, > + encoder_context); > + > + return VA_STATUS_SUCCESS; > +} > +static void > +gen9wa_vme_hevc_walker_fill_vme_batchbuffer(VADriverContextP ctx, > + struct encode_state *encode_state, > + int mb_width, int mb_height, > + int kernel, > + int transform_8x8_mode_flag, > + struct intel_encoder_context > +*encoder_context) { > + struct gen6_vme_context *vme_context = encoder_context->vme_context; > + int mb_row; > + int s; > + unsigned int *command_ptr; > + VAEncSequenceParameterBufferHEVC *pSequenceParameter = > (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer; > + int log2_cu_size = > pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3; > + int log2_ctb_size = > pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size; > + int ctb_size = 1 << log2_ctb_size; > + int num_mb_in_ctb = (ctb_size + 15)/16; > + num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb; > + > +#define USE_SCOREBOARD (1 << 21) > + > + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); > + command_ptr = vme_context->vme_batchbuffer.bo->virtual; > + > + /*slice_segment_address must picture_width_in_ctb alainment */ > + for (s = 0; s < encode_state->num_slice_params_ext; s++) { > + VAEncSliceParameterBufferHEVC *pSliceParameter = > (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer; > + int first_mb = pSliceParameter->slice_segment_address * > num_mb_in_ctb; > + int num_mb = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb; > + unsigned int mb_intra_ub, score_dep; > + int x_outer, y_outer, x_inner, y_inner; > + int xtemp_outer = 0; > + > + x_outer = first_mb % mb_width; > + y_outer = first_mb / mb_width; > + mb_row = y_outer; > + > + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, > first_mb, num_mb, mb_width, mb_height); ) { > + x_inner = x_outer; > + y_inner = y_outer; > + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, > mb_width, mb_height);) { > + mb_intra_ub = 0; > + score_dep = 0; > + if (x_inner != 0) { > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; > + score_dep |= MB_SCOREBOARD_A; > + } > + if (y_inner != mb_row) { > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; > + score_dep |= MB_SCOREBOARD_B; > + if (x_inner != 0) > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; > + if (x_inner != (mb_width -1)) { > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; > + score_dep |= MB_SCOREBOARD_C; > + } > + } > + > + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); > + *command_ptr++ = kernel; > + *command_ptr++ = USE_SCOREBOARD; > + /* Indirect data */ > + *command_ptr++ = 0; > + /* the (X, Y) term of scoreboard */ > + *command_ptr++ = ((y_inner << 16) | x_inner); > + *command_ptr++ = score_dep; > + /*inline data */ > + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); > + *command_ptr++ = ((1 << 18) | (1 << 16) | > transform_8x8_mode_flag | (mb_intra_ub << 8)); > + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; > + *command_ptr++ = 0; > + > + x_inner -= 2; > + y_inner += 1; > + } > + x_outer += 1; > + } > + > + xtemp_outer = mb_width - 2; > + if (xtemp_outer < 0) > + xtemp_outer = 0; > + x_outer = xtemp_outer; > + y_outer = first_mb / mb_width; > + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, > mb_height); ) { > + y_inner = y_outer; > + x_inner = x_outer; > + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, > mb_width, mb_height);) { > + mb_intra_ub = 0; > + score_dep = 0; > + if (x_inner != 0) { > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; > + score_dep |= MB_SCOREBOARD_A; > + } > + if (y_inner != mb_row) { > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; > + score_dep |= MB_SCOREBOARD_B; > + if (x_inner != 0) > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; > + > + if (x_inner != (mb_width -1)) { > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; > + score_dep |= MB_SCOREBOARD_C; > + } > + } > + > + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); > + *command_ptr++ = kernel; > + *command_ptr++ = USE_SCOREBOARD; > + /* Indirect data */ > + *command_ptr++ = 0; > + /* the (X, Y) term of scoreboard */ > + *command_ptr++ = ((y_inner << 16) | x_inner); > + *command_ptr++ = score_dep; > + /*inline data */ > + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner); > + *command_ptr++ = ((1 << 18) | (1 << 16) | > + transform_8x8_mode_flag | (mb_intra_ub << 8)); > + > + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; > + *command_ptr++ = 0; > + x_inner -= 2; > + y_inner += 1; > + } > + x_outer++; > + if (x_outer >= mb_width) { > + y_outer += 1; > + x_outer = xtemp_outer; > + } > + } > + } > + > + *command_ptr++ = MI_BATCH_BUFFER_END; > + *command_ptr++ = 0; > + > + dri_bo_unmap(vme_context->vme_batchbuffer.bo); > +} > + > +static void > +gen9_vme_hevc_fill_vme_batchbuffer(VADriverContextP ctx, > + struct encode_state *encode_state, > + int mb_width, int mb_height, > + int kernel, > + int transform_8x8_mode_flag, > + struct intel_encoder_context > +*encoder_context) { > + struct gen6_vme_context *vme_context = encoder_context->vme_context; > + int mb_x = 0, mb_y = 0; > + int i, s; > + unsigned int *command_ptr; > + VAEncSequenceParameterBufferHEVC *pSequenceParameter = > (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer; > + int log2_cu_size = > pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3; > + int log2_ctb_size = > +pSequenceParameter->log2_diff_max_min_luma_coding_block_size + > +log2_cu_size; > + > + int ctb_size = 1 << log2_ctb_size; > + int num_mb_in_ctb = (ctb_size + 15)/16; > + num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb; > + > + dri_bo_map(vme_context->vme_batchbuffer.bo, 1); > + command_ptr = vme_context->vme_batchbuffer.bo->virtual; > + > + for (s = 0; s < encode_state->num_slice_params_ext; s++) { > + VAEncSliceParameterBufferHEVC *pSliceParameter = > (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer; > + int slice_mb_begin = pSliceParameter->slice_segment_address * > num_mb_in_ctb; > + int slice_mb_number = pSliceParameter->num_ctu_in_slice * > + num_mb_in_ctb; > + > + unsigned int mb_intra_ub; > + int slice_mb_x = slice_mb_begin % mb_width; > + for (i = 0; i < slice_mb_number; ) { > + int mb_count = i + slice_mb_begin; > + mb_x = mb_count % mb_width; > + mb_y = mb_count / mb_width; > + mb_intra_ub = 0; > + > + if (mb_x != 0) { > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE; > + } > + if (mb_y != 0) { > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B; > + if (mb_x != 0) > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D; > + if (mb_x != (mb_width -1)) > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; > + } > + if (i < mb_width) { > + if (i == 0) > + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE); > + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK); > + if ((i == (mb_width - 1)) && slice_mb_x) { > + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C; > + } > + } > + > + if ((i == mb_width) && slice_mb_x) { > + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D); > + } > + > + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2)); > + *command_ptr++ = kernel; > + *command_ptr++ = 0; > + *command_ptr++ = 0; > + *command_ptr++ = 0; > + *command_ptr++ = 0; > + > + /*inline data */ > + *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x); > + *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | > + (mb_intra_ub << 8)); > + > + *command_ptr++ = CMD_MEDIA_STATE_FLUSH; > + *command_ptr++ = 0; > + i += 1; > + } > + } > + > + *command_ptr++ = MI_BATCH_BUFFER_END; > + *command_ptr++ = 0; > + > + dri_bo_unmap(vme_context->vme_batchbuffer.bo); > +} > + > +static void gen9_vme_hevc_pipeline_programing(VADriverContextP ctx, > + struct encode_state *encode_state, > + struct intel_encoder_context > +*encoder_context) { > + struct gen6_vme_context *vme_context = encoder_context->vme_context; > + struct intel_batchbuffer *batch = encoder_context->base.batch; > + VAEncSliceParameterBufferHEVC *pSliceParameter = > (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer; > + VAEncSequenceParameterBufferHEVC *pSequenceParameter = > (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer; > + int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + > 15)/16; > + int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + > 15)/16; > + int kernel_shader; > + bool allow_hwscore = true; > + int s; > + > + int log2_cu_size = > pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3; > + int log2_ctb_size = > + pSequenceParameter->log2_diff_max_min_luma_coding_block_size + > + log2_cu_size; > + > + int ctb_size = 1 << log2_ctb_size; > + int num_mb_in_ctb = (ctb_size + 15)/16; > + int transform_8x8_mode_flag = 1; > + num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb; > + > + for (s = 0; s < encode_state->num_slice_params_ext; s++) { > + pSliceParameter = (VAEncSliceParameterBufferHEVC > *)encode_state->slice_params_ext[s]->buffer; > + int slice_mb_begin = pSliceParameter->slice_segment_address * > num_mb_in_ctb; > + if ((slice_mb_begin % width_in_mbs)) { > + allow_hwscore = false; > + break; > + } > + } > + > + if (pSliceParameter->slice_type == SLICE_TYPE_I) { > + kernel_shader = VME_INTRA_SHADER; > + } else if (pSliceParameter->slice_type == SLICE_TYPE_P) { > + kernel_shader = VME_INTER_SHADER; > + } else { > + kernel_shader = VME_BINTER_SHADER; > + if (!allow_hwscore) > + kernel_shader = VME_INTER_SHADER; > + } > + if (allow_hwscore) > + gen9wa_vme_hevc_walker_fill_vme_batchbuffer(ctx, > + encode_state, > + width_in_mbs, height_in_mbs, > + kernel_shader, > + transform_8x8_mode_flag, > + encoder_context); > + else > + gen9_vme_hevc_fill_vme_batchbuffer(ctx, > + encode_state, > + width_in_mbs, height_in_mbs, > + kernel_shader, > + transform_8x8_mode_flag, > + encoder_context); > + > + intel_batchbuffer_start_atomic(batch, 0x1000); > + gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch); > + BEGIN_BATCH(batch, 3); > + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0)); > + OUT_RELOC(batch, > + vme_context->vme_batchbuffer.bo, > + I915_GEM_DOMAIN_COMMAND, 0, > + 0); > + OUT_BATCH(batch, 0); > + ADVANCE_BATCH(batch); > + > + gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch); > + > + intel_batchbuffer_end_atomic(batch); > +} > + > +static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx, > + struct encode_state *encode_state, > + struct intel_encoder_context > +*encoder_context) { > + VAStatus vaStatus = VA_STATUS_SUCCESS; > + VAEncSliceParameterBufferHEVC *pSliceParameter = > (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer; > + int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I; > + VAEncSequenceParameterBufferHEVC *pSequenceParameter = > (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer; > + struct gen6_vme_context *vme_context = > +encoder_context->vme_context; > + > + /* here use the avc level for hevc vme */ > + if (!vme_context->hevc_level || > + (vme_context->hevc_level != pSequenceParameter->general_level_idc)) { > + vme_context->hevc_level = pSequenceParameter->general_level_idc; > + } > + > + intel_vme_hevc_update_mbmv_cost(ctx, encode_state, > + encoder_context); > + > + /*Setup all the memory object*/ > + gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, > encoder_context); > + gen9_vme_interface_setup(ctx, encode_state, encoder_context); > + //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context); > + gen9_vme_constant_setup(ctx, encode_state, encoder_context); > + > + /*Programing media pipeline*/ > + gen9_vme_hevc_pipeline_programing(ctx, encode_state, > + encoder_context); > + > + return vaStatus; > +} > + > + > +static VAStatus > +gen9_vme_hevc_pipeline(VADriverContextP ctx, > + VAProfile profile, > + struct encode_state *encode_state, > + struct intel_encoder_context *encoder_context) { > + gen9_vme_media_init(ctx, encoder_context); > + gen9_vme_hevc_prepare(ctx, encode_state, encoder_context); > + gen9_vme_run(ctx, encode_state, encoder_context); > + gen9_vme_stop(ctx, encode_state, encoder_context); > + > + return VA_STATUS_SUCCESS; > +} > + > + > static void > gen9_vme_context_destroy(void *context) { @@ -1172,6 +1650,12 @@ > Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context > *e > vme_kernel_list = gen9_vme_mpeg2_kernels; > encoder_context->vme_pipeline = gen9_vme_mpeg2_pipeline; > i965_kernel_num = sizeof(gen9_vme_mpeg2_kernels) / > sizeof(struct i965_kernel); > + break; > + > + case CODEC_HEVC: > + vme_kernel_list = gen9_vme_hevc_kernels; > + encoder_context->vme_pipeline = gen9_vme_hevc_pipeline; > + i965_kernel_num = sizeof(gen9_vme_hevc_kernels) / > + sizeof(struct i965_kernel); > > break; > > diff --git a/src/i965_encoder.c b/src/i965_encoder.c index > d924f5a..c9ff2ec 100644 > --- a/src/i965_encoder.c > +++ b/src/i965_encoder.c > @@ -39,10 +39,12 @@ > #include "i965_encoder.h" > #include "gen6_vme.h" > #include "gen6_mfc.h" > +#include "gen9_mfc.h" > > extern Bool gen6_mfc_context_init(VADriverContextP ctx, struct > intel_encoder_context *encoder_context); extern Bool > gen6_vme_context_init(VADriverContextP ctx, struct > intel_encoder_context *encoder_context); extern Bool > gen7_mfc_context_init(VADriverContextP ctx, struct > intel_encoder_context *encoder_context); > +extern Bool gen9_hcpe_context_init(VADriverContextP ctx, struct > +intel_encoder_context *encoder_context); > > static VAStatus > intel_encoder_check_yuv_surface(VADriverContextP ctx, @@ -422,6 > +424,63 @@ error: > } > > static VAStatus > +intel_encoder_check_hevc_parameter(VADriverContextP ctx, > + struct encode_state *encode_state, > + struct intel_encoder_context > +*encoder_context) { > + struct i965_driver_data *i965 = i965_driver_data(ctx); > + struct object_surface *obj_surface; > + struct object_buffer *obj_buffer; > + VAEncPictureParameterBufferHEVC *pic_param = > (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer; > + int i; > + > + assert(!(pic_param->decoded_curr_pic.flags & > + VA_PICTURE_HEVC_INVALID)); > + > + if (pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_INVALID) > + goto error; > + > + obj_surface = SURFACE(pic_param->decoded_curr_pic.picture_id); > + assert(obj_surface); /* It is possible the store buffer isn't > + allocated yet */ > + > + if (!obj_surface) > + goto error; > + > + encode_state->reconstructed_object = obj_surface; > + obj_buffer = BUFFER(pic_param->coded_buf); > + assert(obj_buffer && obj_buffer->buffer_store && > + obj_buffer->buffer_store->bo); > + > + if (!obj_buffer || !obj_buffer->buffer_store || > !obj_buffer->buffer_store->bo) > + goto error; > + > + encode_state->coded_buf_object = obj_buffer; > + > + for (i = 0; i < 15; i++) { > + if (pic_param->reference_frames[i].flags & VA_PICTURE_HEVC_INVALID || > + pic_param->reference_frames[i].picture_id == VA_INVALID_SURFACE) > + break; > + else { > + obj_surface = SURFACE(pic_param->reference_frames[i].picture_id); > + assert(obj_surface); > + > + if (!obj_surface) > + goto error; > + > + if (obj_surface->bo) > + encode_state->reference_objects[i] = obj_surface; > + else > + encode_state->reference_objects[i] = NULL; /* FIXME: Warning > or Error ??? */ > + } > + } > + > + for ( ; i < 15; i++) > + encode_state->reference_objects[i] = NULL; > + > + return VA_STATUS_SUCCESS; > + > +error: > + return VA_STATUS_ERROR_INVALID_PARAMETER; > +} > +static VAStatus > intel_encoder_sanity_check_input(VADriverContextP ctx, > VAProfile profile, > struct encode_state *encode_state, > @@ -459,6 +518,13 @@ intel_encoder_sanity_check_input(VADriverContextP ctx, > break; > } > > + case VAProfileHEVCMain: { > + vaStatus = intel_encoder_check_hevc_parameter(ctx, encode_state, > encoder_context); > + if (vaStatus != VA_STATUS_SUCCESS) > + goto out; > + vaStatus = intel_encoder_check_yuv_surface(ctx, profile, > encode_state, encoder_context); > + break; > + } > default: > vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE; > break; > @@ -554,6 +620,10 @@ intel_enc_hw_context_init(VADriverContextP ctx, > encoder_context->codec = CODEC_JPEG; > break; > > + case VAProfileHEVCMain: > + encoder_context->codec = CODEC_HEVC; > + break; > + > default: > /* Never get here */ > assert(0); > @@ -617,7 +687,9 @@ gen8_enc_hw_context_init(VADriverContextP ctx, > struct object_config *obj_config) struct hw_context * > gen9_enc_hw_context_init(VADriverContextP ctx, struct object_config > *obj_config) { > - if (obj_config->profile == VAProfileJPEGBaseline) > + if (obj_config->profile == VAProfileHEVCMain) { > + return intel_enc_hw_context_init(ctx, obj_config, > gen9_vme_context_init, gen9_hcpe_context_init); > + } else if (obj_config->profile == VAProfileJPEGBaseline) > return intel_enc_hw_context_init(ctx, obj_config, > gen8_vme_context_init, gen8_mfc_context_init); > else > return intel_enc_hw_context_init(ctx, obj_config, > gen9_vme_context_init, gen9_mfc_context_init); > -- > 1.9.1 > > _______________________________________________ > Libva mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/libva _______________________________________________ Libva mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libva
