On Tue, Jan 10, 2017 at 4:21 PM, Mark Thompson <s...@jkqxz.net> wrote:
> On 10/01/17 22:02, Sean V Kelley wrote: > > From: "Xiang, Haihao" <haihao.xi...@intel.com> > > > > Currently only one temporal layer is supported > > > > Signed-off-by: Xiang, Haihao <haihao.xi...@intel.com> > > Reviewed-by: Sean V Kelley <sea...@posteo.de> > > --- > > src/Makefile.am | 3 + > > src/gen8_encoder_vp8.c | 140 + > > src/gen8_mfc.c | 8 +- > > src/gen8_vme.c | 5 + > > src/i965_defines.h | 10 + > > src/i965_encoder.c | 2 + > > src/i965_encoder_vp8.c | 6697 ++++++++++++++++++++++++++++++ > ++++++++++++++++++ > > src/i965_encoder_vp8.h | 2643 +++++++++++++++++++ > > 8 files changed, 9507 insertions(+), 1 deletion(-) > > I had a go with this on Kaby Lake. In general, big win - looks like it > can be under half the bitrate at comparable quality (though it was pretty > terrible before...). > > However, the rate control seems to do odd things at low bitrate relative > to the frame size? I can get GPU hangs and wildly varying output bitrate > with it, though it seems ok at high bitrate. > That's a concern. Please report the If it really is a GPU hang, I need the error report for the DRM card0 log. cat /sys/class/drm/card0/error Please rerun and capture the DRM (i915) card0 error log. > > I had a look around the rate control and found two minor issues in the RC > configuration, though I don't think either of them are relevant to my > problem (see below). I can try to make a reproducer if this is not already > known? > > Please do attempt to reproduce. That's why I've put the patches out here to test. Thanks, Sean > Thanks, > > - Mark > > > > ... > > + > > +static void > > +i965_encoder_vp8_get_misc_parameters(VADriverContextP ctx, > > + struct encode_state *encode_state, > > + struct intel_encoder_context > *encoder_context) > > +{ > > + struct i965_encoder_vp8_context *vp8_context = > encoder_context->vme_context; > > + > > + if (vp8_context->internal_rate_mode == I965_BRC_CQP) { > > + vp8_context->init_vbv_buffer_fullness_in_bit = 0; > > + vp8_context->vbv_buffer_size_in_bit = 0; > > + vp8_context->target_bit_rate = 0; > > + vp8_context->max_bit_rate = 0; > > + vp8_context->min_bit_rate = 0; > > + vp8_context->brc_need_reset = 0; > > + } else { > > + vp8_context->gop_size = encoder_context->brc.gop_size; > > + > > + if (encoder_context->brc.need_reset) { > > + vp8_context->framerate = encoder_context->brc.framerate[0]; > > + vp8_context->vbv_buffer_size_in_bit = > encoder_context->brc.hrd_buffer_size; > > + vp8_context->init_vbv_buffer_fullness_in_bit = > encoder_context->brc.hrd_initial_buffer_fullness; > > + vp8_context->max_bit_rate = > > encoder_context->brc.bits_per_second[0]; > // currently only one layer is supported > > + vp8_context->brc_need_reset = (vp8_context->brc_initted && > encoder_context->brc.need_reset); > > + > > + if (vp8_context->internal_rate_mode == I965_BRC_CBR) { > > + vp8_context->min_bit_rate = vp8_context->max_bit_rate; > > + vp8_context->target_bit_rate = > vp8_context->max_bit_rate; > > + } else { > > + assert(vp8_context->internal_rate_mode == > I965_BRC_VBR); > > + vp8_context->min_bit_rate = vp8_context->max_bit_rate * > (2 * encoder_context->brc.target_percentage[0] - 100) / 100; > > If target percentage is < 50 then (2 * > encoder_context->brc.target_percentage[0] > - 100) is negative. Since it's unsigned, you end up with a garbage number > in min_bit_rate. > That's a concern, also we may need to reconcile this with our handling for VP9 encode. > > > + vp8_context->target_bit_rate = > vp8_context->max_bit_rate * encoder_context->brc.target_percentage[0] / > 100; > > + } > > + } > > + } > > + > > + if (encoder_context->quality_level == ENCODER_LOW_QUALITY) > > + vp8_context->hme_16x_supported = 0; > > +} > > + > > ... > > + > > +static void > > +i965_encoder_vp8_vme_brc_init_reset_set_curbe(VADriverContextP ctx, > > + struct encode_state > *encode_state, > > + struct > intel_encoder_context *encoder_context, > > + struct i965_gpe_context > *gpe_context) > > +{ > > + struct i965_encoder_vp8_context *vp8_context = > encoder_context->vme_context; > > + VAEncPictureParameterBufferVP8 *pic_param = > > (VAEncPictureParameterBufferVP8 > *)encode_state->pic_param_ext->buffer; > > + struct vp8_brc_init_reset_curbe_data *pcmd = > i965_gpe_context_map_curbe(gpe_context); > > + double input_bits_per_frame, bps_ratio; > > + > > + memset(pcmd, 0, sizeof(*pcmd)); > > + > > + pcmd->dw0.profile_level_max_frame = vp8_context->frame_width * > vp8_context->frame_height; > > + pcmd->dw1.init_buf_full_in_bits = vp8_context->init_vbv_buffer_ > fullness_in_bit; > > + pcmd->dw2.buf_size_in_bits = vp8_context->vbv_buffer_size_in_bit; > > + pcmd->dw3.average_bitrate = ALIGN(vp8_context->target_bit_rate, > VP8_BRC_KBPS) / VP8_BRC_KBPS * VP8_BRC_KBPS; > > + pcmd->dw4.max_bitrate = ALIGN(vp8_context->max_bit_rate, > VP8_BRC_KBPS) / VP8_BRC_KBPS * VP8_BRC_KBPS; > > VP8_BRC_KBPS is 1000 which is not a power of two, so the ALIGN macro isn't > doing anything sensible here. > Agree... > > > + pcmd->dw6.frame_rate_m = vp8_context->framerate.num; > > + pcmd->dw7.frame_rate_d = vp8_context->framerate.den; > > + pcmd->dw8.brc_flag = 0; > > + pcmd->dw8.gop_minus1 = vp8_context->gop_size - 1; > > + > > + if (vp8_context->internal_rate_mode == I965_BRC_CBR) { > > + pcmd->dw4.max_bitrate = pcmd->dw3.average_bitrate; > > + > > + pcmd->dw8.brc_flag = pcmd->dw8.brc_flag | BRC_KERNEL_CBR; > > + } else if (vp8_context->internal_rate_mode == I965_BRC_VBR) { > > + if (pcmd->dw4.max_bitrate < pcmd->dw3.average_bitrate) { > > + pcmd->dw4.max_bitrate = 2 * pcmd->dw3.average_bitrate; > > + } > > + > > + pcmd->dw8.brc_flag = pcmd->dw8.brc_flag | BRC_KERNEL_VBR; > > + } > > + > > + input_bits_per_frame = > > + ((double)(pcmd->dw4.max_bitrate) * > (double)(pcmd->dw7.frame_rate_d) / > > + (double)(pcmd->dw6.frame_rate_m)); > > + > > + if (pcmd->dw2.buf_size_in_bits < (unsigned int)input_bits_per_frame > * 4) { > > + pcmd->dw2.buf_size_in_bits = (unsigned int)input_bits_per_frame > * 4; > > + } > > + > > + if (pcmd->dw1.init_buf_full_in_bits == 0) { > > + pcmd->dw1.init_buf_full_in_bits = 7 * > pcmd->dw2.buf_size_in_bits / 8; > > + } > > + > > + if (pcmd->dw1.init_buf_full_in_bits < (unsigned > int)(input_bits_per_frame * 2)) { > > + pcmd->dw1.init_buf_full_in_bits = (unsigned > int)(input_bits_per_frame * 2); > > + } > > + > > + if (pcmd->dw1.init_buf_full_in_bits > pcmd->dw2.buf_size_in_bits) { > > + pcmd->dw1.init_buf_full_in_bits = pcmd->dw2.buf_size_in_bits; > > + } > > + > > + bps_ratio = input_bits_per_frame / > > ((double)(pcmd->dw2.buf_size_in_bits) > / 30); > > + bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 : > bps_ratio; > > + > > + pcmd->dw9.frame_width_in_bytes = vp8_context->frame_width; > > + pcmd->dw10.frame_height_in_bytes = vp8_context->frame_height; > > + pcmd->dw10.avbr_accuracy = 30; > > + pcmd->dw11.avbr_convergence = 150; > > + pcmd->dw11.min_qp = pic_param->clamp_qindex_low; > > + pcmd->dw12.max_qp = pic_param->clamp_qindex_high; > > + pcmd->dw12.level_qp = 60; > > + > > + // DW13 default 100 > > + pcmd->dw13.max_section_pct = 100; > > + pcmd->dw13.under_shoot_cbr_pct = 115; > > + > > + // DW14 default 100 > > + pcmd->dw14.min_section_pct = 100; > > + pcmd->dw14.vbr_bias_pct = 100; > > + pcmd->dw15.instant_rate_threshold_0_for_p = 30; > > + pcmd->dw15.instant_rate_threshold_1_for_p = 50; > > + pcmd->dw15.instant_rate_threshold_2_for_p = 70; > > + pcmd->dw15.instant_rate_threshold_3_for_p = 120; > > + > > + pcmd->dw17.instant_rate_threshold_0_for_i = 30; > > + pcmd->dw17.instant_rate_threshold_1_for_i = 50; > > + pcmd->dw17.instant_rate_threshold_2_for_i = 90; > > + pcmd->dw17.instant_rate_threshold_3_for_i = 115; > > + pcmd->dw18.deviation_threshold_0_for_p = (unsigned int)(-50 * > pow(0.9, bps_ratio)); > > + pcmd->dw18.deviation_threshold_1_for_p = (unsigned int)(-50 * > pow(0.66, bps_ratio)); > > + pcmd->dw18.deviation_threshold_2_for_p = (unsigned int)(-50 * > pow(0.46, bps_ratio)); > > + pcmd->dw18.deviation_threshold_3_for_p = (unsigned int)(-50 * > pow(0.3, bps_ratio)); > > + pcmd->dw19.deviation_threshold_4_for_p = (unsigned int)(50 * > pow(0.3, bps_ratio)); > > + pcmd->dw19.deviation_threshold_5_for_p = (unsigned int)(50 * > pow(0.46, bps_ratio)); > > + pcmd->dw19.deviation_threshold_6_for_p = (unsigned int)(50 * > pow(0.7, bps_ratio)); > > + pcmd->dw19.deviation_threshold_7_for_p = (unsigned int)(50 * > pow(0.9, bps_ratio)); > > + pcmd->dw20.deviation_threshold_0_for_vbr = (unsigned int)(-50 * > pow(0.9, bps_ratio)); > > + pcmd->dw20.deviation_threshold_1_for_vbr = (unsigned int)(-50 * > pow(0.7, bps_ratio)); > > + pcmd->dw20.deviation_threshold_2_for_vbr = (unsigned int)(-50 * > pow(0.5, bps_ratio)); > > + pcmd->dw20.deviation_threshold_3_for_vbr = (unsigned int)(-50 * > pow(0.3, bps_ratio)); > > + pcmd->dw21.deviation_threshold_4_for_vbr = (unsigned int)(100 * > pow(0.4, bps_ratio)); > > + pcmd->dw21.deviation_threshold_5_for_vbr = (unsigned int)(100 * > pow(0.5, bps_ratio)); > > + pcmd->dw21.deviation_threshold_6_for_vbr = (unsigned int)(100 * > pow(0.75, bps_ratio)); > > + pcmd->dw21.deviation_threshold_7_for_vbr = (unsigned int)(100 * > pow(0.9, bps_ratio)); > > + pcmd->dw22.deviation_threshold_0_for_i = (unsigned int)(-50 * > pow(0.8, bps_ratio)); > > + pcmd->dw22.deviation_threshold_1_for_i = (unsigned int)(-50 * > pow(0.6, bps_ratio)); > > + pcmd->dw22.deviation_threshold_2_for_i = (unsigned int)(-50 * > pow(0.34, bps_ratio)); > > + pcmd->dw22.deviation_threshold_3_for_i = (unsigned int)(-50 * > pow(0.2, bps_ratio)); > > + pcmd->dw23.deviation_threshold_4_for_i = (unsigned int)(50 * > pow(0.2, bps_ratio)); > > + pcmd->dw23.deviation_threshold_5_for_i = (unsigned int)(50 * > pow(0.4, bps_ratio)); > > + pcmd->dw23.deviation_threshold_6_for_i = (unsigned int)(50 * > pow(0.66, bps_ratio)); > > + pcmd->dw23.deviation_threshold_7_for_i = (unsigned int)(50 * > pow(0.9, bps_ratio)); > > + > > + // Default: 1 > > + pcmd->dw24.num_t_levels = 1; > > + > > + if (!vp8_context->brc_initted) { > > + vp8_context->brc_init_current_target_buf_full_in_bits = > pcmd->dw1.init_buf_full_in_bits; > > + } > > + > > + vp8_context->brc_init_reset_buf_size_in_bits = > pcmd->dw2.buf_size_in_bits; > > + vp8_context->brc_init_reset_input_bits_per_frame = > input_bits_per_frame; > > + > > + pcmd->dw26.history_buffer_bti = VP8_BTI_BRC_INIT_RESET_HISTORY; > > + pcmd->dw27.distortion_buffer_bti = VP8_BTI_BRC_INIT_RESET_ > DISTORTION; > > + > > + i965_gpe_context_unmap_curbe(gpe_context); > > +} > > + > > ... > _______________________________________________ > Libva mailing list > Libva@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/libva > -- Sean V. Kelley <sean.v.kel...@intel.com> Open Source Technology Center / SSG Intel Corp.
_______________________________________________ Libva mailing list Libva@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libva